From 5d5f02cd86cf885be486427a450b999086c6b1ec Mon Sep 17 00:00:00 2001 From: rich Date: Thu, 6 Sep 2007 21:02:31 +0000 Subject: [PATCH] Added FORTH interpreter and older versions. --- jonesforth.S | 984 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ jonesforth.S.1 | 217 +++++++++++++ jonesforth.S.10 | 673 ++++++++++++++++++++++++++++++++++++++ jonesforth.S.11 | 914 ++++++++++++++++++++++++++++++++++++++++++++++++++++ jonesforth.S.2 | 222 +++++++++++++ jonesforth.S.3 | 250 ++++++++++++++ jonesforth.S.4 | 310 ++++++++++++++++++ jonesforth.S.5 | 370 +++++++++++++++++++++ jonesforth.S.6 | 435 +++++++++++++++++++++++++ jonesforth.S.7 | 475 +++++++++++++++++++++++++++ jonesforth.S.8 | 598 ++++++++++++++++++++++++++++++++++ jonesforth.S.9 | 630 ++++++++++++++++++++++++++++++++++++ 12 files changed, 6078 insertions(+) create mode 100644 jonesforth.S create mode 100644 jonesforth.S.1 create mode 100644 jonesforth.S.10 create mode 100644 jonesforth.S.11 create mode 100644 jonesforth.S.2 create mode 100644 jonesforth.S.3 create mode 100644 jonesforth.S.4 create mode 100644 jonesforth.S.5 create mode 100644 jonesforth.S.6 create mode 100644 jonesforth.S.7 create mode 100644 jonesforth.S.8 create mode 100644 jonesforth.S.9 diff --git a/jonesforth.S b/jonesforth.S new file mode 100644 index 0000000..11b0cfd --- /dev/null +++ b/jonesforth.S @@ -0,0 +1,984 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NOTES------------------------------------------------------------------------------------------------------------------- + +Need to say something about $ before constants. + +And about je/jne/ja/jb/jbe/etc + + + + +*/ + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + lea -4(%ebp),%ebp // push reg on to return stack + movl \reg,(%ebp) + .endm + + .macro POPRSP reg + mov (%ebp),\reg // pop top of return stack to reg + lea 4(%ebp),%ebp + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov %esp,var_S0 // Store the initial data stack pointer. + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + PUSHRSP %esi // push %esi on to the return stack + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x20 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int code_\label // codeword + .text + .align 4 + .globl code_\label +code_\label : // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + .macro defvar name, namelen, flags=0, label, initial=0 + defcode \name,\namelen,\flags,\label + push $var_\name + NEXT + .data + .align 4 +var_\name : + .int \initial + .endm + + // Some easy ones, written in assembly for speed + defcode "DROP",4,,DROP + pop %eax // drop top of stack + NEXT + + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + + defcode "OVER",4,,OVER + mov 4(%esp),%eax // get the second element of stack + push %eax // and push it on top + NEXT + + defcode "ROT",3,,ROT + pop %eax + pop %ebx + pop %ecx + push %eax + push %ecx + push %ebx + NEXT + + defcode "-ROT",4,,NROT + pop %eax + pop %ebx + pop %ecx + push %ebx + push %eax + push %ecx + NEXT + + defcode "1+",2,,INCR + incl (%esp) // increment top of stack + NEXT + + defcode "1-",2,,DECR + decl (%esp) // decrement top of stack + NEXT + + defcode "4+",2,,INCR4 + addl $4,(%esp) // increment top of stack + NEXT + + defcode "4-",2,,DECR4 + subl $4,(%esp) // decrement top of stack + NEXT + + defcode "+",1,,ADD + pop %eax + addl %eax,(%esp) + NEXT + + defcode "-",1,,SUB + pop %eax + subl %eax,(%esp) + NEXT + + defcode "*",1,,MUL + pop %eax + pop %ebx + imull %ebx,%eax + push %eax // ignore overflow + NEXT + + defcode "/",1,,DIV + xor %edx,%edx + pop %ebx + pop %eax + idivl %ebx + push %eax // push quotient + NEXT + + defcode "MOD",3,,MOD + xor %edx,%edx + pop %ebx + pop %eax + idivl %ebx + push %edx // push remainder + NEXT + + defcode "=",1,,EQU // top two words are equal? + pop %eax + pop %ebx + cmp %ebx,%eax + je 1f + pushl $0 + NEXT +1: pushl $1 + NEXT + + defcode "<>",2,,NEQU // top two words are not equal? + pop %eax + pop %ebx + cmp %ebx,%eax + je 1f + pushl $1 + NEXT +1: pushl $0 + NEXT + + defcode "0=",2,,ZEQU // top of stack equals 0? + pop %eax + test %eax,%eax + jz 1f + pushl $0 + NEXT +1: pushl $1 + NEXT + + defcode "AND",3,,AND + pop %eax + andl %eax,(%esp) + NEXT + + defcode "OR",2,,OR + pop %eax + orl %eax,(%esp) + NEXT + + defcode "INVERT",6,,INVERT + notl (%esp) + NEXT + + // COLD must not return (ie. must not call EXIT). + defword "COLD",4,,COLD + // XXX reinitialisation of the interpreter + .int INTERPRETER // call the interpreter loop (never returns) + .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1). + + defcode "EXIT",4,,EXIT + POPRSP %esi // pop return stack into %esi + NEXT + + defcode "LIT",3,,LIT + // %esi points to the next command, but in this case it points to the next + // literal 32 bit integer. Get that literal into %eax and increment %esi. + // On x86, it's a convenient single byte instruction! (cf. NEXT macro) + lodsl + push %eax // push the literal number on to stack + NEXT + + defcode "LITSTRING",9,,LITSTRING + lodsl // get the length of the string + push %eax // push it on the stack + push %esi // push the address of the start of the string + addl %eax,%esi // skip past the string + addl $3,%esi // but round up to next 4 byte boundary + andl $~3,%esi + NEXT + + defcode "BRANCH",6,,BRANCH + add (%esi),%esi // add the offset to the instruction pointer + NEXT + + defcode "0BRANCH",7,,ZBRANCH + pop %eax + test %eax,%eax // top of stack is zero? + jz code_BRANCH // if so, jump back to the branch function above + lodsl // otherwise we need to skip the offset + NEXT + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + +/* ! and @ (STORE and FETCH) store 32-bit words. It's also useful to be able to read and write bytes. + * I don't know whether FORTH has these words, so I invented my own, called !b and @b. + * Byte-oriented operations only work on architectures which permit them (i386 is one of those). + */ + defcode "!b",2,,STOREBYTE + pop %ebx // address to store at + pop %eax // data to store there + movb %al,(%ebx) // store it + NEXT + + defcode "@b",2,,FETCHBYTE + pop %ebx // address to fetch + xor %eax,%eax + movb (%ebx),%al // fetch it + push %eax // push value onto stack + NEXT + + // The STATE variable is 0 for execute mode, != 0 for compile mode + defvar "STATE",5,,STATE + + // This points to where compiled words go. + defvar "HERE",4,,HERE,user_defs_start + + // This is the last definition in the dictionary. + defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary + + // _X, _Y and _Z are scratch variables used by standard words. + defvar "_X",2,,TX + defvar "_Y",2,,TY + defvar "_Z",2,,TZ + + // This stores the top of the data stack. + defvar "S0",2,,SZ + + // This stores the top of the return stack. + defvar "R0",2,,RZ,return_stack + + defcode "DSP@",4,,DSPFETCH + mov %esp,%eax + push %eax + NEXT + + defcode "DSP!",4,,DSPSTORE + pop %esp + NEXT + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + PUSHRSP %eax // push it on to the return stack + NEXT + + defcode "R>",2,,FROMR + POPRSP %eax // pop return stack on to %eax + push %eax // and push on to parameter stack + NEXT + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop return stack and throw away + NEXT + + defcode "KEY",3,,KEY + call _KEY + push %eax // push return value on stack + NEXT +_KEY: + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + inc %ebx + mov %ebx,(currkey) + ret + +1: // out of input; use read(2) to fetch more input from stdin + xor %ebx,%ebx // 1st param: stdin + mov $buffer,%ecx // 2nd param: buffer + mov %ecx,currkey + mov $buffend-buffer,%edx // 3rd param: max length + mov $__NR_read,%eax // syscall: read + int $0x80 + test %eax,%eax // If %eax <= 0, then exit. + jbe 2f + addl %eax,%ecx // buffer+%eax = bufftop + mov %ecx,bufftop + jmp _KEY + +2: // error or out of input: exit + xor %ebx,%ebx + mov $__NR_exit,%eax // syscall: exit + int $0x80 + + defcode "EMIT",4,,EMIT + pop %eax + call _EMIT + NEXT +_EMIT: + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + ret + + .bss +2: .space 1 // scratch used by EMIT + + defcode "WORD",4,,WORD + call _WORD + push %ecx // push length + push %edi // push base address + NEXT + +_WORD: + /* Search for first non-blank character. Also skip \ comments. */ +1: + call _KEY // get next key, returned in %eax + cmpb $'\\',%al // start of a comment? + je 3f // if so, skip the comment + cmpb $' ',%al + jbe 1b // if so, keep looking + + /* Search for the end of the word, storing chars as we go. */ + mov $5f,%edi // pointer to return buffer +2: + stosb // add character to return buffer + call _KEY // get next key, returned in %al + cmpb $' ',%al // is blank? + ja 2b // if not, keep looping + + /* Return the word (well, the static buffer) and length. */ + sub $5f,%edi + mov %edi,%ecx // return length of the word + mov $5f,%edi // return address of the word + ret + + /* Code to skip \ comments to end of the current line. */ +3: + call _KEY + cmpb $'\n',%al // end of line yet? + jne 3b + jmp 1b + + .bss + // A static buffer where WORD returns. Subsequent calls + // overwrite this buffer. Maximum word length is 32 chars. +5: .space 32 + + defcode "EMITSTRING",10,,EMITSTRING + mov $1,%ebx // 1st param: stdout + pop %ecx // 2nd param: address of string + pop %edx // 3rd param: length of string + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + defcode ".",1,,DOT + pop %eax // Get the number to print into %eax + call _DOT // Easier to do this recursively ... + NEXT +_DOT: + mov $10,%ecx // Base 10 +1: + cmp %ecx,%eax + jb 2f + xor %edx,%edx // %edx:%eax / %ecx -> quotient %eax, remainder %edx + idivl %ecx + pushl %edx + call _DOT + popl %eax + jmp 1b +2: + xor %ah,%ah + aam $10 + cwde + addl $'0',%eax + call _EMIT + ret + + // Parse a number from a string on the stack -- almost the opposite of . (DOT) + // Note that there is absolutely no error checking. In particular the length of the + // string must be >= 1 bytes. + defcode "SNUMBER",7,,SNUMBER + pop %edi + pop %ecx + call _SNUMBER + push %eax + NEXT +_SNUMBER: + xor %eax,%eax + xor %ebx,%ebx +1: + imull $10,%eax // %eax *= 10 + movb (%edi),%bl + inc %edi + subb $'0',%bl // ASCII -> digit + add %ebx,%eax + dec %ecx + jnz 1b + ret + + defcode "FIND",4,,FIND + pop %edi // %edi = address + pop %ecx // %ecx = length + call _FIND + push %eax + NEXT + +_FIND: + push %esi // Save %esi so we can use it in string comparison. + + // Now we start searching backwards through the dictionary for this word. + mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary +1: + test %edx,%edx // NULL pointer? (end of the linked list) + je 4f + + // Compare the length expected and the length of the word. + // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery + // this won't pick the word (the length will appear to be wrong). + xor %eax,%eax + movb 4(%edx),%al // %al = flags+length field + andb $(F_HIDDEN|0x1f),%al // %al = name length + cmpb %cl,%al // Length is the same? + jne 2f + + // Compare the strings in detail. + push %ecx // Save the length + push %edi // Save the address (repe cmpsb will move this pointer) + lea 5(%edx),%esi // Dictionary string we are checking against. + repe cmpsb // Compare the strings. + pop %edi + pop %ecx + jne 2f // Not the same. + + // The strings are the same - return the header pointer in %eax + pop %esi + mov %edx,%eax + ret + +2: + mov (%edx),%edx // Move back through the link field to the previous word + jmp 1b // .. and loop. + +4: // Not found. + pop %esi + xor %eax,%eax // Return zero to indicate not found. + ret + + defcode ">CFA",4,,TCFA // DEA -> Codeword address + pop %edi + call _TCFA + push %edi + NEXT +_TCFA: + xor %eax,%eax + add $4,%edi // Skip link pointer. + movb (%edi),%al // Load flags+len into %al. + inc %edi // Skip flags+len byte. + andb $0x1f,%al // Just the length, not the flags. + add %eax,%edi // Skip the name. + addl $3,%edi // The codeword is 4-byte aligned. + andl $~3,%edi + ret + + defcode "CHAR",4,,CHAR + call _WORD // Returns %ecx = length, %edi = pointer to word. + xor %eax,%eax + movb (%edi),%al // Get the first character of the word. + push %eax // Push it onto the stack. + NEXT + + defcode ":",1,,COLON + + // Get the word and create a dictionary entry header for it. + call _WORD // Returns %ecx = length, %edi = pointer to word. + mov %edi,%ebx // %ebx = address of the word + + movl var_HERE,%edi // %edi is the address of the header + movl var_LATEST,%eax // Get link pointer + stosl // and store it in the header. + + mov %cl,%al // Get the length. + orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry. + stosb // Store the length/flags byte. + push %esi + mov %ebx,%esi // %esi = word + rep movsb // Copy the word + pop %esi + addl $3,%edi // Align to next 4 byte boundary. + andl $~3,%edi + + movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter) + stosl + + // Header built, so now update LATEST and HERE. + // We'll be compiling words and putting them HERE. + movl var_HERE,%eax + movl %eax,var_LATEST + movl %edi,var_HERE + + // And go into compile mode by setting STATE to 1. + movl $1,var_STATE + NEXT + + defcode ",",1,,COMMA + pop %eax // Code pointer to store. + call _COMMA + NEXT +_COMMA: + movl var_HERE,%edi // HERE + stosl // Store it. + movl %edi,var_HERE // Update HERE (incremented) + ret + + defcode "HIDDEN",6,,HIDDEN + call _HIDDEN + NEXT +_HIDDEN: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit. + ret + + defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE + call _IMMEDIATE + NEXT +_IMMEDIATE: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_IMMED,(%edi) // Toggle the IMMED bit. + ret + + defcode ";",1,F_IMMED,SEMICOLON + movl $EXIT,%eax // EXIT is the final codeword in compiled words. + call _COMMA // Store it. + call _HIDDEN // Toggle the HIDDEN flag (unhides the new word). + xor %eax,%eax // Set STATE to 0 (back to execute mode). + movl %eax,var_STATE + NEXT + +/* This definiton of ' (TICK) is strictly cheating - it also only works in compiled code. */ + defcode "'",1,,TICK + lodsl // Get the address of the next word and skip it. + pushl %eax // Push it on the stack. + NEXT + +/* This interpreter is pretty simple, but remember that in FORTH you can always override + * it later with a more powerful one! + */ + defword "INTERPRETER",11,,INTERPRETER + .int INTERPRET,RDROP,INTERPRETER + + defcode "INTERPRET",9,,INTERPRET + call _WORD // Returns %ecx = length, %edi = pointer to word. + + // Is it in the dictionary? + xor %eax,%eax + movl %eax,interpret_is_lit // Not a literal number (not yet anyway ...) + call _FIND // Returns %eax = pointer to header or 0 if not found. + test %eax,%eax // Found? + jz 1f + + // In the dictionary. Is it an IMMEDIATE codeword? + mov %eax,%edi // %edi = dictionary entry + movb 4(%edi),%al // Get name+flags. + push %ax // Just save it for now. + call _TCFA // Convert dictionary entry (in %edi) to codeword pointer. + pop %ax + andb $F_IMMED,%al // Is IMMED flag set? + mov %edi,%eax + jnz 4f // If IMMED, jump straight to executing. + + jmp 2f + +1: // Not in the dictionary (not a word) so assume it's a literal number. + incl interpret_is_lit + call _SNUMBER // Returns the parsed number in %eax + mov %eax,%ebx + mov $LIT,%eax // The word is LIT + +2: // Are we compiling or executing? + movl var_STATE,%edx + test %edx,%edx + jz 4f // Jump if executing. + + // Compiling - just append the word to the current dictionary definition. + call _COMMA + mov interpret_is_lit,%ecx // Was it a literal? + test %ecx,%ecx + jz 3f + mov %ebx,%eax // Yes, so LIT is followed by a number. + call _COMMA +3: NEXT + +4: // Executing - run it! + mov interpret_is_lit,%ecx // Literal? + test %ecx,%ecx // Literal? + jnz 5f + + // Not a literal, execute it now. This never returns, but the codeword will + // eventually call NEXT which will reenter the loop in INTERPRETER. + jmp *(%eax) + +5: // Executing a literal, which means push it on the stack. + push %ebx + NEXT + + .data + .align 4 +interpret_is_lit: + .int 0 // Flag used to record if reading a literal + + // NB: SYSEXIT must be the last entry in the built-in dictionary. + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + // XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore + .ascii "\ +\\ Define some character constants +: '\\n' 10 ; +: 'SPACE' 32 ; +: '\"' 34 ; +: ':' 58 ; + +\\ CR prints a carriage return +: CR '\\n' EMIT ; + +\\ SPACE prints a space +: SPACE 'SPACE' EMIT ; + +\\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH. +\\ Notice how we can trivially redefine existing functions. +: . . SPACE ; + +\\ DUP, DROP are defined in assembly for speed, but this is how you might define them +\\ in FORTH. Notice use of the scratch variables _X and _Y. +\\ : DUP _X ! _X @ _X @ ; +\\ : DROP _X ! ; + +\\ The 2... versions of the standard operators work on pairs of stack entries. They're not used +\\ very commonly so not really worth writing in assembler. Here is how they are defined in FORTH. +: 2DUP OVER OVER ; +: 2DROP DROP DROP ; + +\\ More standard FORTH words. +: 2* 2 * ; +: 2/ 2 / ; + +\\ [ and ] allow you to break into immediate mode while compiling a word. +: [ IMMEDIATE \\ define [ as an immediate word + 0 STATE ! \\ go into immediate mode + ; + +: ] + 1 STATE ! \\ go back to compile mode + ; + +\\ LITERAL takes whatever is on the stack and compiles LIT +: LITERAL IMMEDIATE + ' LIT , \\ compile LIT + , \\ compile the literal itself (from the stack) + ; + +\\ condition IF true-part THEN rest +\\ compiles to: +\\ condition 0BRANCH OFFSET true-part rest +\\ where OFFSET is the offset of 'rest' +\\ condition IF true-part ELSE false-part THEN +\\ compiles to: +\\ condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest +\\ where OFFSET if the offset of false-part and OFFSET2 is the offset of rest + +\\ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places +\\ the address of the 0BRANCH on the stack. Later when we see THEN, we pop that address +\\ off the stack, calculate the offset, and back-fill the offset. +: IF IMMEDIATE + ' 0BRANCH , \\ compile 0BRANCH + HERE @ \\ save location of the offset on the stack + 0 , \\ compile a dummy offset +; + +: THEN IMMEDIATE + DUP + HERE @ SWAP - \\ calculate the offset from the address saved on the stack + SWAP ! \\ store the offset in the back-filled location +; + +: ELSE IMMEDIATE + ' BRANCH , \\ definite branch to just over the false-part + HERE @ \\ save location of the offset on the stack + 0 , \\ compile a dummy offset + SWAP \\ now back-fill the original (IF) offset + DUP \\ same as for THEN word above + HERE @ SWAP - + SWAP ! +; + +\\ BEGIN loop-part condition UNTIL +\\ compiles to: +\\ loop-part condition 0BRANCH OFFSET +\\ where OFFSET points back to the loop-part +\\ This is like do { loop-part } while (condition) in the C language +: BEGIN IMMEDIATE + HERE @ \\ save location on the stack +; + +: UNTIL IMMEDIATE + ' 0BRANCH , \\ compile 0BRANCH + HERE @ - \\ calculate the offset from the address saved on the stack + , \\ compile the offset here +; + +\\ BEGIN loop-part AGAIN +\\ compiles to: +\\ loop-part BRANCH OFFSET +\\ where OFFSET points back to the loop-part +\\ In other words, an infinite loop which can only be returned from with EXIT +: AGAIN IMMEDIATE + ' BRANCH , \\ compile BRANCH + HERE @ - \\ calculate the offset back + , \\ compile the offset here +; + +\\ BEGIN condition WHILE loop-part REPEAT +\\ compiles to: +\\ condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET +\\ where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code +\\ So this is like a while (condition) { loop-part } loop in the C language +: WHILE IMMEDIATE + ' 0BRANCH , \\ compile 0BRANCH + HERE @ \\ save location of the offset2 on the stack + 0 , \\ compile a dummy offset2 +; + +: REPEAT IMMEDIATE + ' BRANCH , \\ compile BRANCH + SWAP \\ get the original offset (from BEGIN) + HERE @ - , \\ and compile it after BRANCH + DUP + HERE @ SWAP - \\ calculate the offset2 + SWAP ! \\ and back-fill it in the original location +; + +\\ With the looping constructs, we can now write SPACES, which writes n spaces to stdout. +: SPACES + BEGIN + SPACE \\ print a space + 1- \\ until we count down to 0 + DUP 0= + UNTIL +; + +\\ .S prints the contents of the stack. Very useful for debugging. +: .S + DSP@ \\ get current stack pointer + BEGIN + DUP @ . \\ print the stack element + 4+ \\ move up + DUP S0 @ 4- = \\ stop when we get to the top + UNTIL + DROP +; + +\\ DEPTH returns the depth of the stack. +: DEPTH S0 @ DSP@ - ; + +\\ .\" is the print string operator in FORTH. Example: .\" Something to print\" +\\ The space after the operator is the ordinary space required between words. +\\ This is tricky to define because it has to do different things depending on whether +\\ we are compiling or in immediate mode. (Thus the word is marked IMMEDIATE so it can +\\ detect this and do different things). +\\ In immediate mode we just keep reading characters and printing them until we get to +\\ the next double quote. +\\ In compile mode we have the problem of where we're going to store the string (remember +\\ that the input buffer where the string comes from may be overwritten by the time we +\\ come round to running the function). We store the string in the compiled function +\\ like this: +\\ LITSTRING, string length, string rounded up to 4 bytes, EMITSTRING, ... +: .\" IMMEDIATE + STATE @ \\ compiling? + IF + ' LITSTRING , \\ compile LITSTRING + HERE @ \\ save the address of the length word on the stack + 0 , \\ dummy length - we don't know what it is yet + BEGIN + KEY \\ get next character of the string + DUP '\"' <> + WHILE + HERE @ !b \\ store the character in the compiled image + HERE @ 1+ HERE ! \\ increment HERE pointer by 1 byte + REPEAT + DROP \\ drop the double quote character at the end + DUP \\ get the saved address of the length word + HERE @ SWAP - \\ calculate the length + 4- \\ subtract 4 (because we measured from the start of the length word) + SWAP ! \\ and back-fill the length location + HERE @ \\ round up to next multiple of 4 bytes for the remaining code + 3 + + 3 INVERT AND + HERE ! + ' EMITSTRING , \\ compile the final EMITSTRING + ELSE + \\ In immediate mode, just read characters and print them until we get + \\ to the ending double quote. Much simpler than the above code! + BEGIN + KEY + DUP '\"' = IF EXIT THEN + EMIT + AGAIN + THEN +; + +\\ While compiling, [COMPILE] WORD compiles WORD if it would otherwise be IMMEDIATE. +: [COMPILE] IMMEDIATE + WORD \\ get the next word + FIND \\ find it in the dictionary + >CFA \\ get its codeword + , \\ and compile that +; + +\\ RECURSE makes a recursive call to the current word that is being compiled. +\\ Normally while a word is being compiled, it is marked HIDDEN so that references to the +\\ same word within are calls to the previous definition of the word. +: RECURSE IMMEDIATE + LATEST @ >CFA \\ LATEST points to the word being compiled at the moment + , \\ compile it +; + + +\\ Finally print the welcome prompt. +.\" OK \" +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.1 b/jonesforth.S.1 new file mode 100644 index 0000000..76fd353 --- /dev/null +++ b/jonesforth.S.1 @@ -0,0 +1,217 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text +DOCOL: + lea -4(%ebp),%ebp // push %esi on to the return stack + movl %esi,(%ebp) + + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + + + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x40 + + .macro defcode name, namelen, flags=0, label, link=0 + .section .rodata +N_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int \link // link +\label : + .int 1f // codeword + .text +1: // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label, link=0 + .section .rodata +N_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int \link // link +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + defword "COLD",4,,COLD, + .int KEY,ECHO,RDROP,COLD + + defcode "KEY",3,,KEY,N_COLD + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + push %ax + inc %ebx + mov %ebx,(currkey) + NEXT +1: + mov $0,%ebx // out of input, exit (0) + mov $__NR_exit,%eax + int $0x80 + + defcode "ECHO",4,,ECHO,N_KEY + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + pop %eax + mov %al,(_echo_tmp) + mov $_echo_tmp,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + .bss +_echo_tmp: .space 1 + + defcode "RDROP",5,,RDROP,N_ECHO + lea 4(%ebp),%ebp // pop the return stack + NEXT + +#if 0 + defcode "R>",2,,FROMR,N_TAIL + + defcode ">R",2,,TOR,N_FROMR + + defcode ":",1,,COLON, + call nextword // get next word, the procedure name + // The next word is returned in %ebx and has length %ecx bytes. + + // Save the current value of VOCAB. + mov v_vocab,%eax + push %eax + + // Change VOCAB to point to our new word's header (at LATEST). + mov v_latest,%edi + mov %edi,v_vocab + + // We'll start by writing the word's header at LATEST; the header + // is just length byte, the word itself, link pointer. + mov %ecx,(%edi) // Length byte + inc %edi + mov %ebx,%esi // Copy the string. + rep movsb + // Round up to the next multiple of 4 so that the link pointer + // is aligned. + or $3,%edi + inc %edi + pop %eax // Link pointer, points to old VOCAB. + mov %eax,(%edi) + add $4,%edi + // Write the codeword, which for user-defined words is always a + // pointer to the FORTH indirect threaded interpreter. + movl $DOCOL,(%edi) + add $4,%edi + + // Finally, update LATEST. As we go along compiling, we'll be + // writing compiled codewords to the LATEST pointer (and moving + // it along each time). + mov %edi,v_latest + + movl $1,v_state // go into compiling mode + ret + + defcode ";",1,F_IMMED,SEMICOLON,N_COLON + // XXX + +#endif + + defcode SYSEXIT,7,,SYSEXIT, //N_COLON + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Variables containing the interpreter's state. + */ + .data + + .align 4 +v_state: + .int 0 // 0 = immediate, 1 = compiling +v_vocab: + .int N_SYSEXIT // last word in the dictionary +v_latest: + .int user_defs_start // pointer to next space for user definition or current compiled def + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + .ascii "TEST OF READING WORDS 1 2 3" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.10 b/jonesforth.S.10 new file mode 100644 index 0000000..0a40b05 --- /dev/null +++ b/jonesforth.S.10 @@ -0,0 +1,673 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NOTES------------------------------------------------------------------------------------------------------------------- + +Need to say something about $ before constants. + +And about je/jne/ja/jb/jbe/etc + + + + +*/ + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + lea -4(%ebp),%ebp // push reg on to return stack + movl \reg,(%ebp) + .endm + + .macro POPRSP reg + mov (%ebp),\reg // pop top of return stack to reg + lea 4(%ebp),%ebp + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + PUSHRSP %esi // push %esi on to the return stack + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x20 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int code_\label // codeword + .text + .align 4 + .globl code_\label +code_\label : // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + .macro defvar name, namelen, flags=0, label, initial=0 + defcode \name,\namelen,\flags,\label + push $var_\name + NEXT + .data + .align 4 +var_\name : + .int \initial + .endm + + // Some easy ones, written in assembly for speed + defcode "DROP",4,,DROP + pop %eax // drop top of stack + NEXT + + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + + defcode "OVER",4,,OVER + mov 4(%esp),%eax // get the second element of stack + push %eax // and push it on top + NEXT + + defcode "1+",2,,INCR + incl (%esp) // increment top of stack + NEXT + + defcode "1-",2,,DECR + decl (%esp) // decrement top of stack + NEXT + + defcode "+",1,,ADD + pop %eax + addl %eax,(%esp) + NEXT + + defcode "-",1,,SUB + pop %eax + subl %eax,(%esp) + NEXT + + defcode "*",1,,MUL + pop %eax + pop %ebx + imull %ebx,%eax + push %eax // ignore overflow + NEXT + + defcode "/",1,,DIV + xor %edx,%edx + pop %ebx + pop %eax + idivl %ebx + push %eax // push quotient + NEXT + + defcode "MOD",3,,MOD + xor %edx,%edx + pop %ebx + pop %eax + idivl %ebx + push %edx // push remainder + NEXT + + // COLD must not return (ie. must not call EXIT). + defword "COLD",4,,COLD + // XXX reinitialisation of the interpreter + .int INTERPRETER // call the interpreter loop (never returns) + .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1). + + defcode "EXIT",4,,EXIT + POPRSP %esi // pop return stack into %esi + NEXT + + defcode "LIT",3,,LIT + // %esi points to the next command, but in this case it points to the next + // literal 32 bit integer. Get that literal into %eax and increment %esi. + // On x86, it's a convenient single byte instruction! (cf. NEXT macro) + lodsl + push %eax // push the literal number on to stack + NEXT + +#if 0 + defcode "0SKIP",5,,ZSKIP + // If the top of stack is zero, skip the next instruction. + pop %eax + test %eax,%eax + jnz 1f + lodsl // this does the skip +1: NEXT +#endif + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + + // The STATE variable is 0 for execute mode, != 0 for compile mode + defvar "STATE",5,,STATE + + // This points to where compiled words go. + defvar "HERE",4,,HERE,user_defs_start + + // This is the last definition in the dictionary. + defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary + + // _X, _Y and _Z are scratch variables used by standard words. + defvar "_X",2,,TX + defvar "_Y",2,,TY + defvar "_Z",2,,TZ + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + PUSHRSP %eax // push it on to the return stack + NEXT + + defcode "R>",2,,FROMR + POPRSP %eax // pop return stack on to %eax + push %eax // and push on to parameter stack + NEXT + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop return stack and throw away + NEXT + + defcode "KEY",3,,KEY + call _KEY + push %eax // push return value on stack + NEXT +_KEY: + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + inc %ebx + mov %ebx,(currkey) + ret + +1: // out of input; use read(2) to fetch more input from stdin + xor %ebx,%ebx // 1st param: stdin + mov $buffer,%ecx // 2nd param: buffer + mov %ecx,currkey + mov $buffend-buffer,%edx // 3rd param: max length + mov $__NR_read,%eax // syscall: read + int $0x80 + test %eax,%eax // If %eax <= 0, then exit. + jbe 2f + addl %eax,%ecx // buffer+%eax = bufftop + mov %ecx,bufftop + jmp _KEY + +2: // error or out of input: exit + xor %ebx,%ebx + mov $__NR_exit,%eax // syscall: exit + int $0x80 + + defcode "EMIT",4,,EMIT + pop %eax + call _EMIT + NEXT +_EMIT: + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + ret + + .bss +2: .space 1 // scratch used by EMIT + + defcode "WORD",4,,WORD + call _WORD + push %ecx // push length + push %edi // push base address + NEXT + +_WORD: + /* Search for first non-blank character. Also skip \ comments. */ +1: + call _KEY // get next key, returned in %eax + cmpb $'\\',%al // start of a comment? + je 3f // if so, skip the comment + cmpb $' ',%al + jbe 1b // if so, keep looking + + /* Search for the end of the word, storing chars as we go. */ + mov $5f,%edi // pointer to return buffer +2: + stosb // add character to return buffer + call _KEY // get next key, returned in %al + cmpb $' ',%al // is blank? + ja 2b // if not, keep looping + + /* Return the word (well, the static buffer) and length. */ + sub $5f,%edi + mov %edi,%ecx // return length of the word + mov $5f,%edi // return address of the word + ret + + /* Code to skip \ comments to end of the current line. */ +3: + call _KEY + cmpb $'\n',%al // end of line yet? + jne 3b + jmp 1b + + .bss + // A static buffer where WORD returns. Subsequent calls + // overwrite this buffer. Maximum word length is 32 chars. +5: .space 32 + + defcode "EMITWORD",8,,EMITWORD + mov $1,%ebx // 1st param: stdout + pop %ecx // 2nd param: address of string + pop %edx // 3rd param: length of string + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + defcode ".",1,,DOT + pop %eax // Get the number to print into %eax + call _DOT // Easier to do this recursively ... + NEXT +_DOT: + mov $10,%ecx // Base 10 +1: + cmp %ecx,%eax + jb 2f + xor %edx,%edx // %edx:%eax / %ecx -> quotient %eax, remainder %edx + idivl %ecx + pushl %edx + call _DOT + popl %eax + jmp 1b +2: + xor %ah,%ah + aam $10 + cwde + addl $'0',%eax + call _EMIT + ret + + // Parse a number from a string on the stack -- almost the opposite of . (DOT) + // Note that there is absolutely no error checking. In particular the length of the + // string must be >= 1 bytes. + defcode "SNUMBER",7,,SNUMBER + pop %edi + pop %ecx + call _SNUMBER + push %eax + NEXT +_SNUMBER: + xor %eax,%eax + xor %ebx,%ebx +1: + imull $10,%eax // %eax *= 10 + movb (%edi),%bl + inc %edi + subb $'0',%bl // ASCII -> digit + add %ebx,%eax + dec %ecx + jnz 1b + ret + + defcode "FIND",4,,FIND + pop %edi // %edi = address + pop %ecx // %ecx = length + call _FIND + push %eax + NEXT + +_FIND: + push %esi // Save %esi so we can use it in string comparison. + + // Now we start searching backwards through the dictionary for this word. + mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary +1: + test %edx,%edx // NULL pointer? (end of the linked list) + je 4f + + // Compare the length expected and the length of the word. + // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery + // this won't pick the word (the length will appear to be wrong). + xor %eax,%eax + movb 4(%edx),%al // %al = flags+length field + andb $(F_HIDDEN|0x1f),%al // %al = name length + cmpb %cl,%al // Length is the same? + jne 2f + + // Compare the strings in detail. + push %ecx // Save the length + push %edi // Save the address (repe cmpsb will move this pointer) + lea 5(%edx),%esi // Dictionary string we are checking against. + repe cmpsb // Compare the strings. + pop %edi + pop %ecx + jne 2f // Not the same. + + // The strings are the same - return the header pointer in %eax + pop %esi + mov %edx,%eax + ret + +2: + mov (%edx),%edx // Move back through the link field to the previous word + jmp 1b // .. and loop. + +4: // Not found. + pop %esi + xor %eax,%eax // Return zero to indicate not found. + ret + + defcode ">CFA",4,,TCFA // DEA -> Codeword address + pop %edi + call _TCFA + push %edi + NEXT +_TCFA: + xor %eax,%eax + add $4,%edi // Skip link pointer. + movb (%edi),%al // Load flags+len into %al. + inc %edi // Skip flags+len byte. + andb $0x1f,%al // Just the length, not the flags. + add %eax,%edi // Skip the name. + addl $3,%edi // The codeword is 4-byte aligned. + andl $~3,%edi + ret + +#if 0 + defword "'",1,,TICK + .int WORD // Get the following word. + .int FIND // Look it up in the dictionary. + .int DUP // If not found, skip >CFA (TCFA) instruction. + .int ZSKIP + .int TCFA // Convert to a codeword pointer. + .int EXIT // Return. +#endif + + defcode ":",1,,COLON + + // Get the word and create a dictionary entry header for it. + call _WORD // Returns %ecx = length, %edi = pointer to word. + mov %edi,%ebx // %ebx = address of the word + + movl var_HERE,%edi // %edi is the address of the header + movl var_LATEST,%eax // Get link pointer + stosl // and store it in the header. + + mov %cl,%al // Get the length. + orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry. + stosb // Store the length/flags byte. + push %esi + mov %ebx,%esi // %esi = word + rep movsb // Copy the word + pop %esi + addl $3,%edi // Align to next 4 byte boundary. + andl $~3,%edi + + movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter) + stosl + + // Header built, so now update LATEST and HERE. + // We'll be compiling words and putting them HERE. + movl var_HERE,%eax + movl %eax,var_LATEST + movl %edi,var_HERE + + // And go into compile mode by setting STATE to 1. + xor %eax,%eax + inc %eax + mov %eax,var_STATE + NEXT + + defcode ",",1,,COMMA + pop %eax // Code pointer to store. + call _COMMA + NEXT +_COMMA: + movl var_HERE,%edi // HERE + stosl // Store it. + movl %edi,var_HERE // Update HERE (incremented) + ret + + defcode "HIDDEN",6,,HIDDEN + call _HIDDEN + NEXT +_HIDDEN: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit. + ret + + defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE + call _IMMEDIATE + NEXT +_IMMEDIATE: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_IMMED,(%edi) // Toggle the IMMED bit. + ret + + defcode ";",1,F_IMMED,SEMICOLON + movl $EXIT,%eax // EXIT is the final codeword in compiled words. + call _COMMA // Store it. + call _HIDDEN // Toggle the HIDDEN flag (unhides the new word). + xor %eax,%eax // Set STATE to 0 (back to execute mode). + movl %eax,var_STATE + NEXT + +/* This interpreter is pretty simple, but remember that in FORTH you can always override + * it later with a more powerful one! + */ + defword "INTERPRETER",11,,INTERPRETER + .int INTERPRET,RDROP,INTERPRETER + + defcode "INTERPRET",9,,INTERPRET + call _WORD // Returns %ecx = length, %edi = pointer to word. + + // Is it in the dictionary? + call _FIND // Returns %eax = pointer to header or 0 if not found. + test %eax,%eax // Found? + jz 1f + + // In the dictionary. Is it an IMMEDIATE codeword? + mov %eax,%edi // %edi = dictionary entry + movb 4(%edi),%al // Get name+flags. + push %ax // Just save it for now. + call _TCFA // Convert dictionary entry (in %edi) to codeword pointer. + pop %ax + andb $F_IMMED,%al // Is IMMED flag set? + mov %edi,%eax + jnz 3f // If IMMED, jump straight to executing. + + jmp 2f + +1: // Not in the dictionary (not a word) so assume it's a number. + call _SNUMBER // Returns the parsed number in %eax + mov %eax,%ebx + mov $LIT,%eax // The word is LIT + +2: // Are we compiling or executing? + movl var_STATE,%edx + test %edx,%edx + jz 3f // Jump if executing. + + // Compiling - just append the word to the current dictionary definition. + call _COMMA + cmp $LIT,%eax // Was it LIT? + jne 4f + mov %ebx,%eax // Yes, so LIT is followed by a number. + call _COMMA + NEXT + +3: // Executing - run it! + cmp $LIT,%eax // Literal? + je 4f + // Not a literal, execute it now. This never returns, but the codeword will + // eventually call NEXT which will reenter the loop in INTERPRETER. + jmp *(%eax) + +4: // Executing a literal, which means push it on the stack. + push %ebx + NEXT + + // NB: SYSEXIT must be the last entry in the built-in dictionary. + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + // XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore + .ascii "\ +\\ Define some character constants +: '\\n' 10 ; +: 'SPACE' 32 ; +: '\"' 34 ; + +\\ CR prints a carriage return +: CR '\\n' EMIT ; + +\\ SPACE prints a space +: SPACE 'SPACE' EMIT ; + +\\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH. +\\ Notice how we can trivially redefine existing functions. +: . . SPACE ; + +\\ XXX SPACES + +\\ DUP, DROP are defined in assembly for speed, but this is how you might define them +\\ in FORTH. Notice use of the scratch variables _X and _Y. +\\ : DUP _X ! _X @ _X @ ; +\\ : DROP _X ! ; + + + +\\ Finally print the welcome prompt. +79 EMIT 75 EMIT 'SPACE' EMIT +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.11 b/jonesforth.S.11 new file mode 100644 index 0000000..3f87d36 --- /dev/null +++ b/jonesforth.S.11 @@ -0,0 +1,914 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NOTES------------------------------------------------------------------------------------------------------------------- + +Need to say something about $ before constants. + +And about je/jne/ja/jb/jbe/etc + + + + +*/ + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + lea -4(%ebp),%ebp // push reg on to return stack + movl \reg,(%ebp) + .endm + + .macro POPRSP reg + mov (%ebp),\reg // pop top of return stack to reg + lea 4(%ebp),%ebp + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + PUSHRSP %esi // push %esi on to the return stack + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x20 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int code_\label // codeword + .text + .align 4 + .globl code_\label +code_\label : // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + .macro defvar name, namelen, flags=0, label, initial=0 + defcode \name,\namelen,\flags,\label + push $var_\name + NEXT + .data + .align 4 +var_\name : + .int \initial + .endm + + // Some easy ones, written in assembly for speed + defcode "DROP",4,,DROP + pop %eax // drop top of stack + NEXT + + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + + defcode "OVER",4,,OVER + mov 4(%esp),%eax // get the second element of stack + push %eax // and push it on top + NEXT + + defcode "1+",2,,INCR + incl (%esp) // increment top of stack + NEXT + + defcode "1-",2,,DECR + decl (%esp) // decrement top of stack + NEXT + + defcode "+",1,,ADD + pop %eax + addl %eax,(%esp) + NEXT + + defcode "-",1,,SUB + pop %eax + subl %eax,(%esp) + NEXT + + defcode "*",1,,MUL + pop %eax + pop %ebx + imull %ebx,%eax + push %eax // ignore overflow + NEXT + + defcode "/",1,,DIV + xor %edx,%edx + pop %ebx + pop %eax + idivl %ebx + push %eax // push quotient + NEXT + + defcode "MOD",3,,MOD + xor %edx,%edx + pop %ebx + pop %eax + idivl %ebx + push %edx // push remainder + NEXT + + defcode "=",1,,EQU // top two words are equal? + pop %eax + pop %ebx + cmp %ebx,%eax + je 1f + pushl $0 + NEXT +1: pushl $1 + NEXT + + defcode "<>",2,,NEQU // top two words are not equal? + pop %eax + pop %ebx + cmp %ebx,%eax + je 1f + pushl $1 + NEXT +1: pushl $0 + NEXT + + defcode "0=",2,,ZEQU // top of stack equals 0? + pop %eax + test %eax,%eax + jz 1f + pushl $0 + NEXT +1: pushl $1 + NEXT + + defcode "AND",3,,AND + pop %eax + andl %eax,(%esp) + NEXT + + defcode "OR",2,,OR + pop %eax + orl %eax,(%esp) + NEXT + + defcode "INVERT",6,,INVERT + notl (%esp) + NEXT + + // COLD must not return (ie. must not call EXIT). + defword "COLD",4,,COLD + // XXX reinitialisation of the interpreter + .int INTERPRETER // call the interpreter loop (never returns) + .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1). + + defcode "EXIT",4,,EXIT + POPRSP %esi // pop return stack into %esi + NEXT + + defcode "LIT",3,,LIT + // %esi points to the next command, but in this case it points to the next + // literal 32 bit integer. Get that literal into %eax and increment %esi. + // On x86, it's a convenient single byte instruction! (cf. NEXT macro) + lodsl + push %eax // push the literal number on to stack + NEXT + + defcode "LITSTRING",9,,LITSTRING + lodsl // get the length of the string + push %eax // push it on the stack + push %esi // push the address of the start of the string + addl %eax,%esi // skip past the string + addl $3,%esi // but round up to next 4 byte boundary + andl $~3,%esi + NEXT + + defcode "BRANCH",6,,BRANCH + add (%esi),%esi // add the offset to the instruction pointer + NEXT + + defcode "0BRANCH",7,,ZBRANCH + pop %eax + test %eax,%eax // top of stack is zero? + jz code_BRANCH // if so, jump back to the branch function above + lodsl // otherwise we need to skip the offset + NEXT + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + +/* ! and @ (STORE and FETCH) store 32-bit words. It's also useful to be able to read and write bytes. + * I don't know whether FORTH has these words, so I invented my own, called !b and @b. + * Byte-oriented operations only work on architectures which permit them (i386 is one of those). + */ + defcode "!b",2,,STOREBYTE + pop %ebx // address to store at + pop %eax // data to store there + movb %al,(%ebx) // store it + NEXT + + defcode "@b",2,,FETCHBYTE + pop %ebx // address to fetch + xor %eax,%eax + movb (%ebx),%al // fetch it + push %eax // push value onto stack + NEXT + + // The STATE variable is 0 for execute mode, != 0 for compile mode + defvar "STATE",5,,STATE + + // This points to where compiled words go. + defvar "HERE",4,,HERE,user_defs_start + + // This is the last definition in the dictionary. + defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary + + // _X, _Y and _Z are scratch variables used by standard words. + defvar "_X",2,,TX + defvar "_Y",2,,TY + defvar "_Z",2,,TZ + + defcode "DSP@",4,,DSPFETCH + mov %esp,%eax + push %eax + NEXT + + defcode "DSP!",4,,DSPSTORE + pop %esp + NEXT + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + PUSHRSP %eax // push it on to the return stack + NEXT + + defcode "R>",2,,FROMR + POPRSP %eax // pop return stack on to %eax + push %eax // and push on to parameter stack + NEXT + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop return stack and throw away + NEXT + + defcode "KEY",3,,KEY + call _KEY + push %eax // push return value on stack + NEXT +_KEY: + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + inc %ebx + mov %ebx,(currkey) + ret + +1: // out of input; use read(2) to fetch more input from stdin + xor %ebx,%ebx // 1st param: stdin + mov $buffer,%ecx // 2nd param: buffer + mov %ecx,currkey + mov $buffend-buffer,%edx // 3rd param: max length + mov $__NR_read,%eax // syscall: read + int $0x80 + test %eax,%eax // If %eax <= 0, then exit. + jbe 2f + addl %eax,%ecx // buffer+%eax = bufftop + mov %ecx,bufftop + jmp _KEY + +2: // error or out of input: exit + xor %ebx,%ebx + mov $__NR_exit,%eax // syscall: exit + int $0x80 + + defcode "EMIT",4,,EMIT + pop %eax + call _EMIT + NEXT +_EMIT: + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + ret + + .bss +2: .space 1 // scratch used by EMIT + + defcode "WORD",4,,WORD + call _WORD + push %ecx // push length + push %edi // push base address + NEXT + +_WORD: + /* Search for first non-blank character. Also skip \ comments. */ +1: + call _KEY // get next key, returned in %eax + cmpb $'\\',%al // start of a comment? + je 3f // if so, skip the comment + cmpb $' ',%al + jbe 1b // if so, keep looking + + /* Search for the end of the word, storing chars as we go. */ + mov $5f,%edi // pointer to return buffer +2: + stosb // add character to return buffer + call _KEY // get next key, returned in %al + cmpb $' ',%al // is blank? + ja 2b // if not, keep looping + + /* Return the word (well, the static buffer) and length. */ + sub $5f,%edi + mov %edi,%ecx // return length of the word + mov $5f,%edi // return address of the word + ret + + /* Code to skip \ comments to end of the current line. */ +3: + call _KEY + cmpb $'\n',%al // end of line yet? + jne 3b + jmp 1b + + .bss + // A static buffer where WORD returns. Subsequent calls + // overwrite this buffer. Maximum word length is 32 chars. +5: .space 32 + + defcode "EMITSTRING",10,,EMITSTRING + mov $1,%ebx // 1st param: stdout + pop %ecx // 2nd param: address of string + pop %edx // 3rd param: length of string + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + defcode ".",1,,DOT + pop %eax // Get the number to print into %eax + call _DOT // Easier to do this recursively ... + NEXT +_DOT: + mov $10,%ecx // Base 10 +1: + cmp %ecx,%eax + jb 2f + xor %edx,%edx // %edx:%eax / %ecx -> quotient %eax, remainder %edx + idivl %ecx + pushl %edx + call _DOT + popl %eax + jmp 1b +2: + xor %ah,%ah + aam $10 + cwde + addl $'0',%eax + call _EMIT + ret + + // Parse a number from a string on the stack -- almost the opposite of . (DOT) + // Note that there is absolutely no error checking. In particular the length of the + // string must be >= 1 bytes. + defcode "SNUMBER",7,,SNUMBER + pop %edi + pop %ecx + call _SNUMBER + push %eax + NEXT +_SNUMBER: + xor %eax,%eax + xor %ebx,%ebx +1: + imull $10,%eax // %eax *= 10 + movb (%edi),%bl + inc %edi + subb $'0',%bl // ASCII -> digit + add %ebx,%eax + dec %ecx + jnz 1b + ret + + defcode "FIND",4,,FIND + pop %edi // %edi = address + pop %ecx // %ecx = length + call _FIND + push %eax + NEXT + +_FIND: + push %esi // Save %esi so we can use it in string comparison. + + // Now we start searching backwards through the dictionary for this word. + mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary +1: + test %edx,%edx // NULL pointer? (end of the linked list) + je 4f + + // Compare the length expected and the length of the word. + // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery + // this won't pick the word (the length will appear to be wrong). + xor %eax,%eax + movb 4(%edx),%al // %al = flags+length field + andb $(F_HIDDEN|0x1f),%al // %al = name length + cmpb %cl,%al // Length is the same? + jne 2f + + // Compare the strings in detail. + push %ecx // Save the length + push %edi // Save the address (repe cmpsb will move this pointer) + lea 5(%edx),%esi // Dictionary string we are checking against. + repe cmpsb // Compare the strings. + pop %edi + pop %ecx + jne 2f // Not the same. + + // The strings are the same - return the header pointer in %eax + pop %esi + mov %edx,%eax + ret + +2: + mov (%edx),%edx // Move back through the link field to the previous word + jmp 1b // .. and loop. + +4: // Not found. + pop %esi + xor %eax,%eax // Return zero to indicate not found. + ret + + defcode ">CFA",4,,TCFA // DEA -> Codeword address + pop %edi + call _TCFA + push %edi + NEXT +_TCFA: + xor %eax,%eax + add $4,%edi // Skip link pointer. + movb (%edi),%al // Load flags+len into %al. + inc %edi // Skip flags+len byte. + andb $0x1f,%al // Just the length, not the flags. + add %eax,%edi // Skip the name. + addl $3,%edi // The codeword is 4-byte aligned. + andl $~3,%edi + ret + + defcode "CHAR",4,,CHAR + call _WORD // Returns %ecx = length, %edi = pointer to word. + xor %eax,%eax + movb (%edi),%al // Get the first character of the word. + push %eax // Push it onto the stack. + NEXT + + defcode ":",1,,COLON + + // Get the word and create a dictionary entry header for it. + call _WORD // Returns %ecx = length, %edi = pointer to word. + mov %edi,%ebx // %ebx = address of the word + + movl var_HERE,%edi // %edi is the address of the header + movl var_LATEST,%eax // Get link pointer + stosl // and store it in the header. + + mov %cl,%al // Get the length. + orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry. + stosb // Store the length/flags byte. + push %esi + mov %ebx,%esi // %esi = word + rep movsb // Copy the word + pop %esi + addl $3,%edi // Align to next 4 byte boundary. + andl $~3,%edi + + movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter) + stosl + + // Header built, so now update LATEST and HERE. + // We'll be compiling words and putting them HERE. + movl var_HERE,%eax + movl %eax,var_LATEST + movl %edi,var_HERE + + // And go into compile mode by setting STATE to 1. + movl $1,var_STATE + NEXT + + defcode ",",1,,COMMA + pop %eax // Code pointer to store. + call _COMMA + NEXT +_COMMA: + movl var_HERE,%edi // HERE + stosl // Store it. + movl %edi,var_HERE // Update HERE (incremented) + ret + + defcode "HIDDEN",6,,HIDDEN + call _HIDDEN + NEXT +_HIDDEN: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit. + ret + + defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE + call _IMMEDIATE + NEXT +_IMMEDIATE: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_IMMED,(%edi) // Toggle the IMMED bit. + ret + + defcode ";",1,F_IMMED,SEMICOLON + movl $EXIT,%eax // EXIT is the final codeword in compiled words. + call _COMMA // Store it. + call _HIDDEN // Toggle the HIDDEN flag (unhides the new word). + xor %eax,%eax // Set STATE to 0 (back to execute mode). + movl %eax,var_STATE + NEXT + +/* This definiton of ' (TICK) is strictly cheating - it also only works in compiled code. */ + defcode "'",1,,TICK + lodsl // Get the address of the next word and skip it. + pushl %eax // Push it on the stack. + NEXT + +/* This interpreter is pretty simple, but remember that in FORTH you can always override + * it later with a more powerful one! + */ + defword "INTERPRETER",11,,INTERPRETER + .int INTERPRET,RDROP,INTERPRETER + + defcode "INTERPRET",9,,INTERPRET + call _WORD // Returns %ecx = length, %edi = pointer to word. + + // Is it in the dictionary? + xor %eax,%eax + movl %eax,interpret_is_lit // Not a literal number (not yet anyway ...) + call _FIND // Returns %eax = pointer to header or 0 if not found. + test %eax,%eax // Found? + jz 1f + + // In the dictionary. Is it an IMMEDIATE codeword? + mov %eax,%edi // %edi = dictionary entry + movb 4(%edi),%al // Get name+flags. + push %ax // Just save it for now. + call _TCFA // Convert dictionary entry (in %edi) to codeword pointer. + pop %ax + andb $F_IMMED,%al // Is IMMED flag set? + mov %edi,%eax + jnz 4f // If IMMED, jump straight to executing. + + jmp 2f + +1: // Not in the dictionary (not a word) so assume it's a literal number. + incl interpret_is_lit + call _SNUMBER // Returns the parsed number in %eax + mov %eax,%ebx + mov $LIT,%eax // The word is LIT + +2: // Are we compiling or executing? + movl var_STATE,%edx + test %edx,%edx + jz 4f // Jump if executing. + + // Compiling - just append the word to the current dictionary definition. + call _COMMA + mov interpret_is_lit,%ecx // Was it a literal? + test %ecx,%ecx + jz 3f + mov %ebx,%eax // Yes, so LIT is followed by a number. + call _COMMA +3: NEXT + +4: // Executing - run it! + mov interpret_is_lit,%ecx // Literal? + test %ecx,%ecx // Literal? + jnz 5f + + // Not a literal, execute it now. This never returns, but the codeword will + // eventually call NEXT which will reenter the loop in INTERPRETER. + jmp *(%eax) + +5: // Executing a literal, which means push it on the stack. + push %ebx + NEXT + + .data + .align 4 +interpret_is_lit: + .int 0 // Flag used to record if reading a literal + + // NB: SYSEXIT must be the last entry in the built-in dictionary. + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + // XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore + .ascii "\ +\\ Define some character constants +: '\\n' 10 ; +: 'SPACE' 32 ; +: '\"' 34 ; +: ':' 58 ; + +\\ CR prints a carriage return +: CR '\\n' EMIT ; + +\\ SPACE prints a space +: SPACE 'SPACE' EMIT ; + +\\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH. +\\ Notice how we can trivially redefine existing functions. +: . . SPACE ; + +\\ DUP, DROP are defined in assembly for speed, but this is how you might define them +\\ in FORTH. Notice use of the scratch variables _X and _Y. +\\ : DUP _X ! _X @ _X @ ; +\\ : DROP _X ! ; + +\\ [ and ] allow you to break into immediate mode while compiling a word. +: [ IMMEDIATE \\ define [ as an immediate word + 0 STATE ! \\ go into immediate mode + ; + +: ] + 1 STATE ! \\ go back to compile mode + ; + +\\ LITERAL takes whatever is on the stack and compiles LIT +: LITERAL IMMEDIATE + ' LIT , \\ compile LIT + , \\ compile the literal itself (from the stack) + ; + +\\ condition IF true-part THEN rest +\\ compiles to: +\\ condition 0BRANCH OFFSET true-part rest +\\ where OFFSET is the offset of 'rest' +\\ condition IF true-part ELSE false-part THEN +\\ compiles to: +\\ condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest +\\ where OFFSET if the offset of false-part and OFFSET2 is the offset of rest + +\\ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places +\\ the address of the 0BRANCH on the stack. Later when we see THEN, we pop that address +\\ off the stack, calculate the offset, and back-fill the offset. +: IF IMMEDIATE + ' 0BRANCH , \\ compile 0BRANCH + HERE @ \\ save location of the offset on the stack + 0 , \\ compile a dummy offset +; + +: THEN IMMEDIATE + DUP + HERE @ SWAP - \\ calculate the offset from the address saved on the stack + SWAP ! \\ store the offset in the back-filled location +; + +: ELSE IMMEDIATE + ' BRANCH , \\ definite branch to just over the false-part + HERE @ \\ save location of the offset on the stack + 0 , \\ compile a dummy offset + SWAP \\ now back-fill the original (IF) offset + DUP \\ same as for THEN word above + HERE @ SWAP - + SWAP ! +; + +\\ BEGIN loop-part condition UNTIL +\\ compiles to: +\\ loop-part condition 0BRANCH OFFSET +\\ where OFFSET points back to the loop-part +\\ This is like do { loop-part } while (condition) in the C language +: BEGIN IMMEDIATE + HERE @ \\ save location on the stack +; + +: UNTIL IMMEDIATE + ' 0BRANCH , \\ compile 0BRANCH + HERE @ - \\ calculate the offset from the address saved on the stack + , \\ compile the offset here +; + +\\ BEGIN loop-part AGAIN +\\ compiles to: +\\ loop-part BRANCH OFFSET +\\ where OFFSET points back to the loop-part +\\ In other words, an infinite loop which can only be returned from with EXIT +: AGAIN IMMEDIATE + ' BRANCH , \\ compile BRANCH + HERE @ - \\ calculate the offset back + , \\ compile the offset here +; + +\\ BEGIN condition WHILE loop-part REPEAT +\\ compiles to: +\\ condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET +\\ where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code +\\ So this is like a while (condition) { loop-part } loop in the C language +: WHILE IMMEDIATE + ' 0BRANCH , \\ compile 0BRANCH + HERE @ \\ save location of the offset2 on the stack + 0 , \\ compile a dummy offset2 +; + +: REPEAT IMMEDIATE + ' BRANCH , \\ compile BRANCH + SWAP \\ get the original offset (from BEGIN) + HERE @ - , \\ and compile it after BRANCH + DUP + HERE @ SWAP - \\ calculate the offset2 + SWAP ! \\ and back-fill it in the original location +; + +\\ With the looping constructs, we can now write SPACES, which writes n spaces to stdout. +: SPACES + BEGIN + SPACE \\ print a space + 1- \\ until we count down to 0 + DUP 0= + UNTIL +; + +\\ .\" is the print string operator in FORTH. Example: .\" Something to print\" +\\ The space after the operator is the ordinary space required between words. +\\ This is tricky to define because it has to do different things depending on whether +\\ we are compiling or in immediate mode. (Thus the word is marked IMMEDIATE so it can +\\ detect this and do different things). +\\ In immediate mode we just keep reading characters and printing them until we get to +\\ the next double quote. +\\ In compile mode we have the problem of where we're going to store the string (remember +\\ that the input buffer where the string comes from may be overwritten by the time we +\\ come round to running the function). We store the string in the compiled function +\\ like this: +\\ LITSTRING, string length, string rounded up to 4 bytes, EMITSTRING, ... +: .\" IMMEDIATE + STATE @ \\ compiling? + IF + ' LITSTRING , \\ compile LITSTRING + HERE @ \\ save the address of the length word on the stack + 0 , \\ dummy length - we don't know what it is yet + BEGIN + KEY \\ get next character of the string + DUP '\"' <> + WHILE + HERE @ !b \\ store the character in the compiled image + HERE @ 1+ HERE ! \\ increment HERE pointer by 1 byte + REPEAT + DROP \\ drop the double quote character at the end + DUP \\ get the saved address of the length word + HERE @ SWAP - \\ calculate the length + 4 - \\ subtract 4 (because we measured from the start of the length word) + SWAP ! \\ and back-fill the length location + HERE @ \\ round up to next multiple of 4 bytes for the remaining code + 3 + + 3 INVERT AND + HERE ! + ' EMITSTRING , \\ compile the final EMITSTRING + ELSE + \\ In immediate mode, just read characters and print them until we get + \\ to the ending double quote. Much simpler! + BEGIN + KEY + DUP '\"' = IF EXIT THEN + EMIT + AGAIN + THEN +; + +: TEST .\" hello, world..!\" CR ; + + +\\ Finally print the welcome prompt. +.\" OK \" +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.2 b/jonesforth.S.2 new file mode 100644 index 0000000..f7b8d17 --- /dev/null +++ b/jonesforth.S.2 @@ -0,0 +1,222 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text +DOCOL: + lea -4(%ebp),%ebp // push %esi on to the return stack + movl %esi,(%ebp) + + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + + + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x40 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata +N_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int link // link + .set link,N_\label +\label : + .int 1f // codeword + .text +1: // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata +N_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int link // link + .set link,N_\label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + defword "COLD",4,,COLD + .int KEY,ECHO,RDROP,COLD + + defcode "KEY",3,,KEY + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + push %ax + inc %ebx + mov %ebx,(currkey) + NEXT +1: + mov $0,%ebx // out of input, exit (0) + mov $__NR_exit,%eax + int $0x80 + + defcode "ECHO",4,,ECHO + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + pop %eax + mov %al,(_echo_tmp) + mov $_echo_tmp,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + .bss +_echo_tmp: .space 1 + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop the return stack + NEXT + +#if 0 + defcode "R>",2,,FROMR + + defcode ">R",2,,TOR + + defcode ":",1,,COLON + call nextword // get next word, the procedure name + // The next word is returned in %ebx and has length %ecx bytes. + + // Save the current value of VOCAB. + mov v_vocab,%eax + push %eax + + // Change VOCAB to point to our new word's header (at LATEST). + mov v_latest,%edi + mov %edi,v_vocab + + // We'll start by writing the word's header at LATEST; the header + // is just length byte, the word itself, link pointer. + mov %ecx,(%edi) // Length byte + inc %edi + mov %ebx,%esi // Copy the string. + rep movsb + // Round up to the next multiple of 4 so that the link pointer + // is aligned. + or $3,%edi + inc %edi + pop %eax // Link pointer, points to old VOCAB. + mov %eax,(%edi) + add $4,%edi + // Write the codeword, which for user-defined words is always a + // pointer to the FORTH indirect threaded interpreter. + movl $DOCOL,(%edi) + add $4,%edi + + // Finally, update LATEST. As we go along compiling, we'll be + // writing compiled codewords to the LATEST pointer (and moving + // it along each time). + mov %edi,v_latest + + movl $1,v_state // go into compiling mode + ret + + defcode ";",1,F_IMMED,SEMICOLON + // XXX + +#endif + + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Variables containing the interpreter's state. + */ + .data + + .align 4 +v_state: + .int 0 // 0 = immediate, 1 = compiling +v_vocab: + .int N_SYSEXIT // last word in the dictionary +v_latest: + .int user_defs_start // pointer to next space for user definition or current compiled def + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + .ascii "TEST OF READING WORDS 1 2 3" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.3 b/jonesforth.S.3 new file mode 100644 index 0000000..29e31b0 --- /dev/null +++ b/jonesforth.S.3 @@ -0,0 +1,250 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text +DOCOL: + lea -4(%ebp),%ebp // push %esi on to the return stack + movl %esi,(%ebp) + + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + + + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x40 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata +N_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int link // link + .set link,N_\label +\label : + .int 1f // codeword + .text +1: // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata +N_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int link // link + .set link,N_\label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + defword "COLD",4,,COLD + .int KEY,KEY,SWAP,ECHO,ECHO,RDROP,COLD + + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "DROP",3,,DROP + pop %eax // drop top of stack + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + + defcode "KEY",3,,KEY + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + push %eax + inc %ebx + mov %ebx,(currkey) + NEXT +1: + mov $0,%ebx // out of input, exit (0) + mov $__NR_exit,%eax + int $0x80 + + defcode "ECHO",4,,ECHO + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + pop %eax + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + .bss +2: .space 1 // scratch used by ECHO + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop the return stack + NEXT + +#if 0 + defcode "R>",2,,FROMR + + defcode ">R",2,,TOR + + defcode ":",1,,COLON + call nextword // get next word, the procedure name + // The next word is returned in %ebx and has length %ecx bytes. + + // Save the current value of VOCAB. + mov v_vocab,%eax + push %eax + + // Change VOCAB to point to our new word's header (at LATEST). + mov v_latest,%edi + mov %edi,v_vocab + + // We'll start by writing the word's header at LATEST; the header + // is just length byte, the word itself, link pointer. + mov %ecx,(%edi) // Length byte + inc %edi + mov %ebx,%esi // Copy the string. + rep movsb + // Round up to the next multiple of 4 so that the link pointer + // is aligned. + or $3,%edi + inc %edi + pop %eax // Link pointer, points to old VOCAB. + mov %eax,(%edi) + add $4,%edi + // Write the codeword, which for user-defined words is always a + // pointer to the FORTH indirect threaded interpreter. + movl $DOCOL,(%edi) + add $4,%edi + + // Finally, update LATEST. As we go along compiling, we'll be + // writing compiled codewords to the LATEST pointer (and moving + // it along each time). + mov %edi,v_latest + + movl $1,v_state // go into compiling mode + ret + + defcode ";",1,F_IMMED,SEMICOLON + // XXX + +#endif + + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Variables containing the interpreter's state. + */ + .data + + .align 4 +v_state: + .int 0 // 0 = immediate, 1 = compiling +v_vocab: + .int N_SYSEXIT // last word in the dictionary +v_latest: + .int user_defs_start // pointer to next space for user definition or current compiled def + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + .ascii " \n\ +\\ Define some constants \n\ +: '\\n' 10 ; \n\ +: ')' 41 ; \n\ +: 'space' 32 ; \n\ +: '\"' 34 ; \n\ +: '-' 45 ; \n\ +: '0' 48 ; \n\ + \n\ +\\ CR command \n\ +: CR '\\n' ECHO ; \n\ +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.4 b/jonesforth.S.4 new file mode 100644 index 0000000..839d52c --- /dev/null +++ b/jonesforth.S.4 @@ -0,0 +1,310 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + .endm + + .macro POPRSP reg + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + lea -4(%ebp),%ebp // push %esi on to the return stack + movl %esi,(%ebp) + + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x40 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata +N_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int link // link + .set link,N_\label +\label : + .int 1f // codeword + .text + .align 4 +1: // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata +N_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int link // link + .set link,N_\label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + /* COLD must not return (ie. must not call EXIT). */ + defword "COLD",4,,COLD + .int KEY,ECHO,RDROP,COLD + + defcode "EXIT",4,,EXIT + movl (%ebp),%esi // pop return stack into %esi + lea 4(%ebp),%ebp + NEXT + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + + defcode "STATE",5,,STATE + push $v_state + NEXT + + defcode "HERE",4,,HERE + push $v_here + NEXT + + defcode "LATEST",6,,LATEST + push $v_latest + NEXT + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + lea -4(%ebp),%ebp // push %eax on to return stack + movl %eax,(%ebp) + NEXT + + defcode "R>",2,,FROMR + mov (%ebp),%eax // pop top of return stack to %eax + lea 4(%ebp),%ebp + push %eax // and push on to parameter stack + NEXT + +#if 0 /* This definition is wrong. */ + defcode "R",1,,R + mov %(ebp),%eax // copy (don't pop) top of return stack to %eax + push %eax // and push on to parameter stack + NEXT +#endif + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop the return stack + NEXT + + defcode "KEY",3,,KEY + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + push %eax + inc %ebx + mov %ebx,(currkey) + NEXT +1: + mov $0,%ebx // out of input, exit (0) + mov $__NR_exit,%eax + int $0x80 + + defcode "ECHO",4,,ECHO + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + pop %eax + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + .bss +2: .space 1 // scratch used by ECHO + + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "DROP",3,,DROP + pop %eax // drop top of stack + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + +#if 0 + defcode ":",1,,COLON + call nextword // get next word, the procedure name + // The next word is returned in %ebx and has length %ecx bytes. + + // Save the current value of VOCAB. + mov v_vocab,%eax + push %eax + + // Change VOCAB to point to our new word's header (at LATEST). + mov v_latest,%edi + mov %edi,v_vocab + + // We'll start by writing the word's header at LATEST; the header + // is just length byte, the word itself, link pointer. + mov %ecx,(%edi) // Length byte + inc %edi + mov %ebx,%esi // Copy the string. + rep movsb + // Round up to the next multiple of 4 so that the link pointer + // is aligned. + or $3,%edi + inc %edi + pop %eax // Link pointer, points to old VOCAB. + mov %eax,(%edi) + add $4,%edi + // Write the codeword, which for user-defined words is always a + // pointer to the FORTH indirect threaded interpreter. + movl $DOCOL,(%edi) + add $4,%edi + + // Finally, update LATEST. As we go along compiling, we'll be + // writing compiled codewords to the LATEST pointer (and moving + // it along each time). + mov %edi,v_latest + + movl $1,v_state // go into compiling mode + ret + + defcode ";",1,F_IMMED,SEMICOLON + // XXX + +#endif + + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Variables containing the interpreter's state. + */ + .data + + .align 4 +v_state: + .int 0 // 0 = immediate, non-zero = compiling +v_latest: + .int N_SYSEXIT // last word in the dictionary +v_here: + .int user_defs_start // pointer to next space for user definition or current compiled def + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + .ascii " \n\ +\\ Define some constants \n\ +: '\\n' 10 ; \n\ +: ')' 41 ; \n\ +: 'space' 32 ; \n\ +: '\"' 34 ; \n\ +: '-' 45 ; \n\ +: '0' 48 ; \n\ + \n\ +\\ CR command \n\ +: CR '\\n' ECHO ; \n\ +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.5 b/jonesforth.S.5 new file mode 100644 index 0000000..c5c61d9 --- /dev/null +++ b/jonesforth.S.5 @@ -0,0 +1,370 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + lea -4(%ebp),%ebp // push reg on to return stack + movl \reg,(%ebp) + .endm + + .macro POPRSP reg + mov (%ebp),\reg // pop top of return stack to reg + lea 4(%ebp),%ebp + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + PUSHRSP %esi // push %esi on to the return stack + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x40 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata + .globl name_\label +name_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int link // link + .set link,name_\label + .globl \label +\label : + .int code_\label // codeword + .text + .align 4 + .globl code_\label +code_\label : // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata + .globl name_\label +name_\label : + .align 4 + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .int link // link + .set link,name_\label + .globl \label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + /* COLD must not return (ie. must not call EXIT). */ + defword "COLD",4,,COLD + .int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD + + defcode "EXIT",4,,EXIT + POPRSP %esi // pop return stack into %esi + NEXT + + defcode "LIT",3,,LIT + // %esi points to the next command, but in this case it points to the next + // literal 32 bit integer. Get that literal into %eax and increment %esi. + // On x86, it's a convenient single byte instruction! (cf. NEXT macro) + lodsl + push %eax // push %eax on to stack + NEXT + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + + defcode "STATE",5,,STATE + push $v_state + NEXT + + defcode "HERE",4,,HERE + push $v_here + NEXT + + defcode "LATEST",6,,LATEST + push $v_latest + NEXT + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + PUSHRSP %eax // push it on to the return stack + NEXT + + defcode "R>",2,,FROMR + POPRSP %eax // pop return stack on to %eax + push %eax // and push on to parameter stack + NEXT + +#if 0 /* This definition is wrong. */ + defcode "R",1,,R + mov %(ebp),%eax // copy (don't pop) top of return stack to %eax + push %eax // and push on to parameter stack + NEXT +#endif + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop return stack and throw away + NEXT + + defcode "KEY",3,,KEY + call _KEY + push %eax // push return value on stack + NEXT +_KEY: + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + inc %ebx + mov %ebx,(currkey) + ret +1: + mov $0,%ebx // out of input, exit (0) + mov $__NR_exit,%eax + int $0x80 + + defcode "WORD",4,,WORD + /* Search for first non-blank character. Also skip \ comments. */ +1: + call _KEY // get next key, returned in %eax + cmpb $'\\',%al // start of a comment? + je 3f // if so, skip the comment + cmpb $' ',%al + jbe 1b // if so, keep looking + + /* Search for the end of the word, storing chars as we go. */ + mov $5f,%edi // pointer to return buffer +2: + stosb // add character to return buffer + call _KEY // get next key, returned in %al + cmpb $' ',%al // is blank? + ja 2b // if not, keep looping + + /* Return the word (well, the static buffer) and length. */ + sub $5f,%edi + push %edi // push length + push $5f // push base address + NEXT + + /* Code to skip \ comments to end of the current line. */ +3: + call _KEY + cmpb $'\n',%al // end of line yet? + jne 3b + jmp 1b + + .bss + // A static buffer where WORD returns. Subsequent calls + // overwrite this buffer. Maximum word length is 32 chars. +5: .space 32 + + defcode "ECHO",4,,ECHO + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + pop %eax + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + .bss +2: .space 1 // scratch used by ECHO + + defcode "ECHOWORD",8,,ECHOWORD + mov $1,%ebx // 1st param: stdout + pop %ecx // 2nd param: address of string + pop %edx // 3rd param: length of string + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "DROP",3,,DROP + pop %eax // drop top of stack + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + +#if 0 + defcode ":",1,,COLON + call nextword // get next word, the procedure name + // The next word is returned in %ebx and has length %ecx bytes. + + // Save the current value of VOCAB. + mov v_vocab,%eax + push %eax + + // Change VOCAB to point to our new word's header (at LATEST). + mov v_latest,%edi + mov %edi,v_vocab + + // We'll start by writing the word's header at LATEST; the header + // is just length byte, the word itself, link pointer. + mov %ecx,(%edi) // Length byte + inc %edi + mov %ebx,%esi // Copy the string. + rep movsb + // Round up to the next multiple of 4 so that the link pointer + // is aligned. + or $3,%edi + inc %edi + pop %eax // Link pointer, points to old VOCAB. + mov %eax,(%edi) + add $4,%edi + // Write the codeword, which for user-defined words is always a + // pointer to the FORTH indirect threaded interpreter. + movl $DOCOL,(%edi) + add $4,%edi + + // Finally, update LATEST. As we go along compiling, we'll be + // writing compiled codewords to the LATEST pointer (and moving + // it along each time). + mov %edi,v_latest + + movl $1,v_state // go into compiling mode + ret + + defcode ";",1,F_IMMED,SEMICOLON + // XXX + +#endif + + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Variables containing the interpreter's state. + */ + .data + + .align 4 +v_state: + .int 0 // 0 = immediate, non-zero = compiling +v_latest: + .int name_SYSEXIT // last word in the dictionary +v_here: + .int user_defs_start // pointer to next space for user definition or current compiled def + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + .ascii " \n\ +\\ Define some constants \n\ +: '\\n' 10 ; \n\ +: ')' 41 ; \n\ +: 'space' 32 ; \n\ +: '\"' 34 ; \n\ +: '-' 45 ; \n\ +: '0' 48 ; \n\ + \n\ +\\ CR command \n\ +: CR '\\n' ECHO ; \n\ +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.6 b/jonesforth.S.6 new file mode 100644 index 0000000..d3e02fc --- /dev/null +++ b/jonesforth.S.6 @@ -0,0 +1,435 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NOTES---------------------------------------------------------------------- + +Need to say something about $ before constants. + +And about je/jne/ja/jb/jbe/etc + + + + +*/ + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + lea -4(%ebp),%ebp // push reg on to return stack + movl \reg,(%ebp) + .endm + + .macro POPRSP reg + mov (%ebp),\reg // pop top of return stack to reg + lea 4(%ebp),%ebp + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + PUSHRSP %esi // push %esi on to the return stack + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x40 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int code_\label // codeword + .text + .align 4 + .globl code_\label +code_\label : // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + /* Some easy ones .... */ + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "DROP",3,,DROP + pop %eax // drop top of stack + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + + /* COLD must not return (ie. must not call EXIT). */ + defword "COLD",4,,COLD + .int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD + +/* +This prints out each word in the input as \n + defword "COLD",4,,COLD + .int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD +*/ + + defcode "EXIT",4,,EXIT + POPRSP %esi // pop return stack into %esi + NEXT + + defcode "LIT",3,,LIT + // %esi points to the next command, but in this case it points to the next + // literal 32 bit integer. Get that literal into %eax and increment %esi. + // On x86, it's a convenient single byte instruction! (cf. NEXT macro) + lodsl + push %eax // push %eax on to stack + NEXT + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + + defcode "STATE",5,,STATE + push $v_state + NEXT + + defcode "HERE",4,,HERE + push $v_here + NEXT + + defcode "LATEST",6,,LATEST + push $v_latest + NEXT + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + PUSHRSP %eax // push it on to the return stack + NEXT + + defcode "R>",2,,FROMR + POPRSP %eax // pop return stack on to %eax + push %eax // and push on to parameter stack + NEXT + +#if 0 /* This definition is wrong. */ + defcode "R",1,,R + mov %(ebp),%eax // copy (don't pop) top of return stack to %eax + push %eax // and push on to parameter stack + NEXT +#endif + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop return stack and throw away + NEXT + + defcode "KEY",3,,KEY + call _KEY + push %eax // push return value on stack + NEXT +_KEY: + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + inc %ebx + mov %ebx,(currkey) + ret +1: + mov $0,%ebx // out of input, exit (0) + mov $__NR_exit,%eax + int $0x80 + + defcode "ECHO",4,,ECHO + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + pop %eax + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + .bss +2: .space 1 // scratch used by ECHO + + defcode "WORD",4,,WORD + call _WORD + push %eax // push length + push %ebx // push base address + NEXT + +_WORD: + /* Search for first non-blank character. Also skip \ comments. */ +1: + call _KEY // get next key, returned in %eax + cmpb $'\\',%al // start of a comment? + je 3f // if so, skip the comment + cmpb $' ',%al + jbe 1b // if so, keep looking + + /* Search for the end of the word, storing chars as we go. */ + mov $5f,%edi // pointer to return buffer +2: + stosb // add character to return buffer + call _KEY // get next key, returned in %al + cmpb $' ',%al // is blank? + ja 2b // if not, keep looping + + /* Return the word (well, the static buffer) and length. */ + sub $5f,%edi + mov %edi,%eax // return length of the word + mov $5f,%ebx // return address of the word + ret + + /* Code to skip \ comments to end of the current line. */ +3: + call _KEY + cmpb $'\n',%al // end of line yet? + jne 3b + jmp 1b + + .bss + // A static buffer where WORD returns. Subsequent calls + // overwrite this buffer. Maximum word length is 32 chars. +5: .space 32 + + defcode "ECHOWORD",8,,ECHOWORD + mov $1,%ebx // 1st param: stdout + pop %ecx // 2nd param: address of string + pop %edx // 3rd param: length of string + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + defcode "'",1,,TICK + call _WORD // returns %ebx = address of next word, %eax = length in bytes + mov %ebx,%edi // %edi = address + mov %eax,%ecx // %ecx = length + + push %esi // Save %esi so we can use it in string comparison. + + // Now we start searching backwards through the dictionary for this word. + mov v_latest,%edx // LATEST points to name header of the latest word in the dictionary +1: + cmp %edx,%edx // NULL pointer? (end of the linked list) + je 4f + + xor %eax,%eax + movb 4(%edx),%al // %al = flags+length field + andb $0x1f,%al // %al = name length + cmpb %cl,%al // Length is the same? + jne 2f + + // Compare the strings in detail. + push %ecx // Save the length + lea 5(%edx),%esi // Dictionary string we are checking against. + repe cmpsb // Compare the strings. + pop %ecx + jne 2f // Not the same. + + // The strings are the same - return the header pointer on the stack. + pop %esi + push %edx + NEXT + +2: + mov (%edx),%edx // Move back through the link field to the previous word + jmp 1b // .. and loop. + +4: // Not found. + pop %esi + xor %eax,%eax // Push zero on to the stack to indicate not found. + push %eax + NEXT + +#if 0 + defcode ":",1,,COLON + call nextword // get next word, the procedure name + // The next word is returned in %ebx and has length %ecx bytes. + + // Save the current value of VOCAB. + mov v_vocab,%eax + push %eax + + // Change VOCAB to point to our new word's header (at LATEST). + mov v_latest,%edi + mov %edi,v_vocab + + // We'll start by writing the word's header at LATEST; the header + // is just length byte, the word itself, link pointer. + mov %ecx,(%edi) // Length byte + inc %edi + mov %ebx,%esi // Copy the string. + rep movsb + // Round up to the next multiple of 4 so that the link pointer + // is aligned. + or $3,%edi + inc %edi + pop %eax // Link pointer, points to old VOCAB. + mov %eax,(%edi) + add $4,%edi + // Write the codeword, which for user-defined words is always a + // pointer to the FORTH indirect threaded interpreter. + movl $DOCOL,(%edi) + add $4,%edi + + // Finally, update LATEST. As we go along compiling, we'll be + // writing compiled codewords to the LATEST pointer (and moving + // it along each time). + mov %edi,v_latest + + movl $1,v_state // go into compiling mode + ret + + defcode ";",1,F_IMMED,SEMICOLON + // XXX + +#endif + + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Variables containing the interpreter's state. + */ + .data + + .align 4 +v_state: + .int 0 // 0 = immediate, non-zero = compiling +v_latest: + .int name_SYSEXIT // last word in the dictionary +v_here: + .int user_defs_start // pointer to next space for user definition or current compiled def + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + .ascii " \n\ +\\ Define some constants \n\ +: '\\n' 10 ; \n\ +: ')' 41 ; \n\ +: 'space' 32 ; \n\ +: '\"' 34 ; \n\ +: '-' 45 ; \n\ +: '0' 48 ; \n\ + \n\ +\\ CR command \n\ +: CR '\\n' ECHO ; \n\ +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.7 b/jonesforth.S.7 new file mode 100644 index 0000000..ede6edd --- /dev/null +++ b/jonesforth.S.7 @@ -0,0 +1,475 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NOTES---------------------------------------------------------------------- + +Need to say something about $ before constants. + +And about je/jne/ja/jb/jbe/etc + + + + +*/ + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + lea -4(%ebp),%ebp // push reg on to return stack + movl \reg,(%ebp) + .endm + + .macro POPRSP reg + mov (%ebp),\reg // pop top of return stack to reg + lea 4(%ebp),%ebp + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + PUSHRSP %esi // push %esi on to the return stack + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x40 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int code_\label // codeword + .text + .align 4 + .globl code_\label +code_\label : // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + /* Some easy ones .... */ + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "DROP",4,,DROP + pop %eax // drop top of stack + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + + defcode "OVER",4,,OVER + mov 4(%esp),%eax // get the second element of stack + push %eax // and push it on top + NEXT + + /* COLD must not return (ie. must not call EXIT). */ + defword "COLD",4,,COLD + .int LIT,'<',ECHO + .int WORD,OVER,OVER,ECHOWORD,LIT,'=',ECHO,FIND,DOT + .int LIT,'>',ECHO + .int LIT,10,ECHO + .int RDROP,COLD + +/* +This prints out each word in the input as \n + defword "COLD",4,,COLD + .int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD +*/ + + defcode "EXIT",4,,EXIT + POPRSP %esi // pop return stack into %esi + NEXT + + defcode "LIT",3,,LIT + // %esi points to the next command, but in this case it points to the next + // literal 32 bit integer. Get that literal into %eax and increment %esi. + // On x86, it's a convenient single byte instruction! (cf. NEXT macro) + lodsl + push %eax // push %eax on to stack + NEXT + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + + defcode "STATE",5,,STATE + push $v_state + NEXT + + defcode "HERE",4,,HERE + push $v_here + NEXT + + defcode "LATEST",6,,LATEST + push $v_latest + NEXT + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + PUSHRSP %eax // push it on to the return stack + NEXT + + defcode "R>",2,,FROMR + POPRSP %eax // pop return stack on to %eax + push %eax // and push on to parameter stack + NEXT + +#if 0 /* This definition is wrong. */ + defcode "R",1,,R + mov %(ebp),%eax // copy (don't pop) top of return stack to %eax + push %eax // and push on to parameter stack + NEXT +#endif + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop return stack and throw away + NEXT + + defcode "KEY",3,,KEY + call _KEY + push %eax // push return value on stack + NEXT +_KEY: + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + inc %ebx + mov %ebx,(currkey) + ret +1: + mov $0,%ebx // out of input, exit (0) + mov $__NR_exit,%eax + int $0x80 + + defcode "ECHO",4,,ECHO + pop %eax + call _ECHO + NEXT +_ECHO: + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + ret + + .bss +2: .space 1 // scratch used by ECHO + + defcode "WORD",4,,WORD + call _WORD + push %eax // push length + push %ebx // push base address + NEXT + +_WORD: + /* Search for first non-blank character. Also skip \ comments. */ +1: + call _KEY // get next key, returned in %eax + cmpb $'\\',%al // start of a comment? + je 3f // if so, skip the comment + cmpb $' ',%al + jbe 1b // if so, keep looking + + /* Search for the end of the word, storing chars as we go. */ + mov $5f,%edi // pointer to return buffer +2: + stosb // add character to return buffer + call _KEY // get next key, returned in %al + cmpb $' ',%al // is blank? + ja 2b // if not, keep looping + + /* Return the word (well, the static buffer) and length. */ + sub $5f,%edi + mov %edi,%eax // return length of the word + mov $5f,%ebx // return address of the word + ret + + /* Code to skip \ comments to end of the current line. */ +3: + call _KEY + cmpb $'\n',%al // end of line yet? + jne 3b + jmp 1b + + .bss + // A static buffer where WORD returns. Subsequent calls + // overwrite this buffer. Maximum word length is 32 chars. +5: .space 32 + + defcode "ECHOWORD",8,,ECHOWORD + mov $1,%ebx // 1st param: stdout + pop %ecx // 2nd param: address of string + pop %edx // 3rd param: length of string + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + defcode ".",1,,DOT + pop %eax // Get the number to print into %eax + call _DOT // Easier to do this recursively ... + NEXT + +/* if eax >= 10 then print (eax / 10) + r = eax mod 10 + echo r */ +_DOT: + mov $10,%ecx // Base 10 + cmp %ecx,%eax + jb 1f + pushl %eax + xor %edx,%edx + idivl %ecx + call _DOT + popl %eax +1: + aam $10 + cbw + cwde + addl $'0',%eax + call _ECHO + ret + + defcode "FIND",4,,FIND + pop %edi // %edi = address + pop %ecx // %ecx = length + + push %esi // Save %esi so we can use it in string comparison. + + // Now we start searching backwards through the dictionary for this word. + mov v_latest,%edx // LATEST points to name header of the latest word in the dictionary +1: + test %edx,%edx // NULL pointer? (end of the linked list) + je 4f + + xor %eax,%eax + movb 4(%edx),%al // %al = flags+length field + andb $0x1f,%al // %al = name length + cmpb %cl,%al // Length is the same? + jne 2f + + // Compare the strings in detail. + push %ecx // Save the length + push %edi // Save the address (repe cmpsb will move this pointer) + lea 5(%edx),%esi // Dictionary string we are checking against. + repe cmpsb // Compare the strings. + pop %edi + pop %ecx + jne 2f // Not the same. + + // The strings are the same - return the header pointer on the stack. + pop %esi + push %edx + NEXT + +2: + mov (%edx),%edx // Move back through the link field to the previous word + jmp 1b // .. and loop. + +4: // Not found. + pop %esi + xor %eax,%eax // Push zero on to the stack to indicate not found. + push %eax + NEXT + +#if 0 + defcode ":",1,,COLON + call nextword // get next word, the procedure name + // The next word is returned in %ebx and has length %ecx bytes. + + // Save the current value of VOCAB. + mov v_vocab,%eax + push %eax + + // Change VOCAB to point to our new word's header (at LATEST). + mov v_latest,%edi + mov %edi,v_vocab + + // We'll start by writing the word's header at LATEST; the header + // is just length byte, the word itself, link pointer. + mov %ecx,(%edi) // Length byte + inc %edi + mov %ebx,%esi // Copy the string. + rep movsb + // Round up to the next multiple of 4 so that the link pointer + // is aligned. + or $3,%edi + inc %edi + pop %eax // Link pointer, points to old VOCAB. + mov %eax,(%edi) + add $4,%edi + // Write the codeword, which for user-defined words is always a + // pointer to the FORTH indirect threaded interpreter. + movl $DOCOL,(%edi) + add $4,%edi + + // Finally, update LATEST. As we go along compiling, we'll be + // writing compiled codewords to the LATEST pointer (and moving + // it along each time). + mov %edi,v_latest + + movl $1,v_state // go into compiling mode + ret + + defcode ";",1,F_IMMED,SEMICOLON + // XXX + +#endif + + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Variables containing the interpreter's state. + */ + .data + + .align 4 +v_state: + .int 0 // 0 = immediate, non-zero = compiling +v_latest: + // XXX should use 'link', but how to join it with name_? + .int name_SYSEXIT // last word in the dictionary +v_here: + .int user_defs_start // pointer to next space for user definition or current compiled def + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + .ascii " \n\ +\\ Define some constants \n\ +: '\\n' 10 ; \n\ +: ')' 41 ; \n\ +: 'space' 32 ; \n\ +: '\"' 34 ; \n\ +: '-' 45 ; \n\ +: '0' 48 ; \n\ + \n\ +\\ CR command \n\ +: CR '\\n' ECHO ; \n\ + \n\ +ECHO DUP DROP OVER \n\ +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.8 b/jonesforth.S.8 new file mode 100644 index 0000000..c894342 --- /dev/null +++ b/jonesforth.S.8 @@ -0,0 +1,598 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NOTES------------------------------------------------------------------------------------------------------------------- + +Need to say something about $ before constants. + +And about je/jne/ja/jb/jbe/etc + + + + +*/ + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + lea -4(%ebp),%ebp // push reg on to return stack + movl \reg,(%ebp) + .endm + + .macro POPRSP reg + mov (%ebp),\reg // pop top of return stack to reg + lea 4(%ebp),%ebp + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + PUSHRSP %esi // push %esi on to the return stack + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x20 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int code_\label // codeword + .text + .align 4 + .globl code_\label +code_\label : // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + .macro defvar name, namelen, flags=0, label, initial=0 + defcode \name,\namelen,\flags,\label + push $var_\name + NEXT + .data + .align 4 +var_\name : + .int \initial + .endm + + /* Some easy ones .... */ + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "DROP",4,,DROP + pop %eax // drop top of stack + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + + defcode "OVER",4,,OVER + mov 4(%esp),%eax // get the second element of stack + push %eax // and push it on top + NEXT + + // COLD must not return (ie. must not call EXIT). + defword "COLD",4,,COLD + // XXX reinitialisation of the interpreter + .int INTERPRETER // call the interpreter loop (never returns) + .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1). + + defcode "EXIT",4,,EXIT + POPRSP %esi // pop return stack into %esi + NEXT + + defcode "LIT",3,,LIT + // %esi points to the next command, but in this case it points to the next + // literal 32 bit integer. Get that literal into %eax and increment %esi. + // On x86, it's a convenient single byte instruction! (cf. NEXT macro) + lodsl + push %eax // push %eax on to stack + NEXT + + defcode "0SKIP",5,,ZSKIP + // If the top of stack is zero, skip the next instruction. + pop %eax + test %eax,%eax + jnz 1f + lodsl // this does the skip +1: NEXT + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + + // The STATE variable is 0 for execute mode, != 0 for compile mode + defvar "STATE",5,,STATE + + // This points to where compiled words go. + defvar "HERE",4,,HERE,user_defs_start + + // This is the last definition in the dictionary. + defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + PUSHRSP %eax // push it on to the return stack + NEXT + + defcode "R>",2,,FROMR + POPRSP %eax // pop return stack on to %eax + push %eax // and push on to parameter stack + NEXT + +#if 0 /* This definition is wrong. */ + defcode "R",1,,R + mov %(ebp),%eax // copy (don't pop) top of return stack to %eax + push %eax // and push on to parameter stack + NEXT +#endif + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop return stack and throw away + NEXT + + defcode "KEY",3,,KEY + call _KEY + push %eax // push return value on stack + NEXT +_KEY: + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + inc %ebx + mov %ebx,(currkey) + ret +1: + mov $0,%ebx // out of input, exit (0) + mov $__NR_exit,%eax + int $0x80 + + defcode "EMIT",4,,EMIT + pop %eax + call _EMIT + NEXT +_EMIT: + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + ret + + .bss +2: .space 1 // scratch used by EMIT + + defcode "WORD",4,,WORD + call _WORD + push %ecx // push length + push %edi // push base address + NEXT + +_WORD: + /* Search for first non-blank character. Also skip \ comments. */ +1: + call _KEY // get next key, returned in %eax + cmpb $'\\',%al // start of a comment? + je 3f // if so, skip the comment + cmpb $' ',%al + jbe 1b // if so, keep looking + + /* Search for the end of the word, storing chars as we go. */ + mov $5f,%edi // pointer to return buffer +2: + stosb // add character to return buffer + call _KEY // get next key, returned in %al + cmpb $' ',%al // is blank? + ja 2b // if not, keep looping + + /* Return the word (well, the static buffer) and length. */ + sub $5f,%edi + mov %edi,%ecx // return length of the word + mov $5f,%edi // return address of the word + ret + + /* Code to skip \ comments to end of the current line. */ +3: + call _KEY + cmpb $'\n',%al // end of line yet? + jne 3b + jmp 1b + + .bss + // A static buffer where WORD returns. Subsequent calls + // overwrite this buffer. Maximum word length is 32 chars. +5: .space 32 + + defcode "EMITWORD",8,,EMITWORD + mov $1,%ebx // 1st param: stdout + pop %ecx // 2nd param: address of string + pop %edx // 3rd param: length of string + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + defcode ".",1,,DOT + pop %eax // Get the number to print into %eax + call _DOT // Easier to do this recursively ... + NEXT +_DOT: + mov $10,%ecx // Base 10 + cmp %ecx,%eax + jb 1f + pushl %eax + xor %edx,%edx + idivl %ecx + call _DOT + popl %eax +1: + aam $10 + cbw + cwde + addl $'0',%eax + call _EMIT + ret + + // Parse a number from a string on the stack -- almost the opposite of . (DOT) + // Note that there is absolutely no error checking. In particular the length of the + // string must be >= 1 bytes. + defcode "SNUMBER",7,,SNUMBER + pop %edi + pop %ecx + call _SNUMBER + push %eax + NEXT +_SNUMBER: + xor %eax,%eax + xor %ebx,%ebx +1: + imull $10,%eax // %eax *= 10 + movb (%edi),%bl + inc %edi + subb $'0',%bl // ASCII -> digit + add %ebx,%eax + dec %ecx + jnz 1b + ret + + defcode "FIND",4,,FIND + pop %edi // %edi = address + pop %ecx // %ecx = length + call _FIND + push %eax + NEXT + +_FIND: + push %esi // Save %esi so we can use it in string comparison. + + // Now we start searching backwards through the dictionary for this word. + mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary +1: + test %edx,%edx // NULL pointer? (end of the linked list) + je 4f + + // Compare the length expected and the length of the word. + // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery + // this won't pick the word (the length will appear to be wrong). + xor %eax,%eax + movb 4(%edx),%al // %al = flags+length field + andb $(F_HIDDEN|0x1f),%al // %al = name length + cmpb %cl,%al // Length is the same? + jne 2f + + // Compare the strings in detail. + push %ecx // Save the length + push %edi // Save the address (repe cmpsb will move this pointer) + lea 5(%edx),%esi // Dictionary string we are checking against. + repe cmpsb // Compare the strings. + pop %edi + pop %ecx + jne 2f // Not the same. + + // The strings are the same - return the header pointer in %eax + pop %esi + mov %edx,%eax + ret + +2: + mov (%edx),%edx // Move back through the link field to the previous word + jmp 1b // .. and loop. + +4: // Not found. + pop %esi + xor %eax,%eax // Return zero to indicate not found. + ret + + defcode ">CFA",4,,TCFA // DEA -> Codeword address + pop %edi + call _TCFA + push %edi + NEXT +_TCFA: + xor %eax,%eax + add $4,%edi // Skip link pointer. + movb (%edi),%al // Load flags+len into %al. + inc %edi // Skip flags+len byte. + andb $0x1f,%al // Just the length, not the flags. + add %eax,%edi // Skip the name. + orl $3,%edi // The codeword is 4-byte aligned. + inc %edi + ret + + defword "'",1,,TICK + .int WORD // Get the following word. + .int FIND // Look it up in the dictionary. + .int DUP // If not found, skip >CFA (TCFA) instruction. + .int ZSKIP + .int TCFA // Convert to a codeword pointer. + .int EXIT // Return. + + defcode ":",1,,COLON + + // Get the word and create a dictionary entry header for it. + call _WORD // Returns %ecx = length, %edi = pointer to word. + mov %edi,%ebx // %ebx = address of the word + + movl var_HERE,%edi // %edi is the address of the header + movl var_LATEST,%eax // Get link pointer + stosl // and store it in the header. + + mov %cl,%al // Get the length. + orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry. + stosb // Store the length/flags byte. + push %esi + mov %ebx,%esi // %esi = word + rep movsb // Copy the word + pop %esi + orl $3,%edi // Align to next 4 byte boundary. + inc %edi + + movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter) + stosl + + // Header built, so now update LATEST and HERE. + // We'll be compiling words and putting them HERE. + movl var_HERE,%eax + movl %eax,var_LATEST + movl %edi,var_HERE + + // And go into compile mode by setting STATE to 1. + xor %eax,%eax + inc %eax + mov %eax,var_STATE + NEXT + + defcode ",",1,,COMMA + pop %eax // Code pointer to store. + call _COMMA + NEXT +_COMMA: + movl var_HERE,%edi // HERE + stosl // Store it. + movl %edi,var_HERE // Update HERE (incremented) + ret + + defcode "HIDDEN",6,,HIDDEN + call _HIDDEN + NEXT +_HIDDEN: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit. + ret + + defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE + call _IMMEDIATE + NEXT +_IMMEDIATE: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_IMMED,(%edi) // Toggle the IMMED bit. + ret + + defcode ";",1,F_IMMED,SEMICOLON + movl $EXIT,%eax // EXIT is the final codeword in compiled words. + call _COMMA // Store it. + call _HIDDEN // Toggle the HIDDEN flag (unhides the new word). + xor %eax,%eax // Set STATE to 0 (back to execute mode). + movl %eax,var_STATE + NEXT + +/* This interpreter is pretty simple, but remember that in FORTH you can always override + * it later with a more powerful one! + */ + defword "INTERPRETER",11,,INTERPRETER + .int INTERPRET,RDROP,INTERPRETER + + defcode "INTERPRET",9,,INTERPRET + call _WORD // Returns %ecx = length, %edi = pointer to word. + + // Is it in the dictionary? + call _FIND // Returns %eax = pointer to header or 0 if not found. + test %eax,%eax // Found? + jz 1f + + // In the dictionary. Is it an IMMEDIATE codeword? + mov %eax,%edi // %edi = dictionary entry + movb 4(%edi),%al // Get name+flags. + push %ax // Just save it for now. + call _TCFA // Convert dictionary entry (in %edi) to codeword pointer. + pop %ax + andb $F_IMMED,%al // Is IMMED flag set? + mov %edi,%eax + jnz 3f // If IMMED, jump straight to executing. + + jmp 2f + +1: // Not in the dictionary (not a word) so assume it's a number. + call _SNUMBER // Returns the parsed number in %eax + mov %eax,%ebx + mov $LIT,%eax // The word is LIT + +2: // Are we compiling or executing? + movl var_STATE,%edx + test %edx,%edx + jz 3f // Jump if executing. + + // Compiling - just append the word to the current dictionary definition. + call _COMMA + cmp $LIT,%eax // Was it LIT? + jne 4f + mov %ebx,%eax // Yes, so LIT is followed by a number. + call _COMMA + NEXT + +3: // Executing - run it! + cmp $LIT,%eax // Literal? + je 4f + // Not a literal, execute it now. This never returns, but the codeword will + // eventually call NEXT which will reenter the loop in INTERPRETER. + jmp *(%eax) + +4: // Executing a literal, which means push it on the stack. + push %ebx + NEXT + + // NB: SYSEXIT must be the last entry in the built-in dictionary. + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + .ascii "\n\ +: '\\n' 10 ; \n\ +: ')' 41 ; \n\ +: 'space' 32 ; \n\ +: '\"' 34 ; \n\ +: '-' 45 ; \n\ +: '0' 48 ; \n\ + \n\ +\\ CR command \n\ +: CR '\\n' EMIT ; \n\ + \n\ +CR CR '0' EMIT CR CR \n\ +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop diff --git a/jonesforth.S.9 b/jonesforth.S.9 new file mode 100644 index 0000000..5267bb9 --- /dev/null +++ b/jonesforth.S.9 @@ -0,0 +1,630 @@ +/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- + * By Richard W.M. Jones + * + * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S + */ + +#include + +/* NOTES------------------------------------------------------------------------------------------------------------------- + +Need to say something about $ before constants. + +And about je/jne/ja/jb/jbe/etc + + + + +*/ + +/* NEXT macro. */ + .macro NEXT + lodsl + jmp *(%eax) + .endm + +/* Macros to deal with the return stack. */ + .macro PUSHRSP reg + lea -4(%ebp),%ebp // push reg on to return stack + movl \reg,(%ebp) + .endm + + .macro POPRSP reg + mov (%ebp),\reg // pop top of return stack to reg + lea 4(%ebp),%ebp + .endm + +/* ELF entry point. */ + .text + .globl _start +_start: + cld + mov $return_stack,%ebp // Initialise the return stack. + + mov $cold_start,%esi // Initialise interpreter. + NEXT // Run interpreter! + + .section .rodata +cold_start: // High-level code without a codeword. + .int COLD + +/* DOCOL - the interpreter! */ + .text + .align 4 +DOCOL: + PUSHRSP %esi // push %esi on to the return stack + addl $4,%eax // %eax points to codeword, so make + movl %eax,%esi // %esi point to first data word + NEXT + +/*---------------------------------------------------------------------- + * Fixed sized buffers for everything. + */ + .bss + +/* FORTH return stack. */ +#define RETURN_STACK_SIZE 8192 + .align 4096 + .space RETURN_STACK_SIZE +return_stack: + +/* Space for user-defined words. */ +#define USER_DEFS_SIZE 16384 + .align 4096 +user_defs_start: + .space USER_DEFS_SIZE + + + + + + +/*---------------------------------------------------------------------- + * Built-in words defined the long way. + */ +#define F_IMMED 0x80 +#define F_HIDDEN 0x20 + + // Store the chain of links. + .set link,0 + + .macro defcode name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int code_\label // codeword + .text + .align 4 + .globl code_\label +code_\label : // assembler code follows + .endm + + .macro defword name, namelen, flags=0, label + .section .rodata + .align 4 + .globl name_\label +name_\label : + .int link // link + .set link,name_\label + .byte \flags+\namelen // flags + length byte + .ascii "\name" // the name + .align 4 + .globl \label +\label : + .int DOCOL // codeword - the interpreter + // list of word pointers follow + .endm + + .macro defvar name, namelen, flags=0, label, initial=0 + defcode \name,\namelen,\flags,\label + push $var_\name + NEXT + .data + .align 4 +var_\name : + .int \initial + .endm + + // Some easy ones, written in assembly for speed + defcode "DROP",4,,DROP + pop %eax // drop top of stack + NEXT + + defcode "DUP",3,,DUP + pop %eax // duplicate top of stack + push %eax + push %eax + NEXT + + defcode "SWAP",4,,SWAP + pop %eax // swap top of stack + pop %ebx + push %eax + push %ebx + NEXT + + defcode "OVER",4,,OVER + mov 4(%esp),%eax // get the second element of stack + push %eax // and push it on top + NEXT + + // COLD must not return (ie. must not call EXIT). + defword "COLD",4,,COLD + // XXX reinitialisation of the interpreter + .int INTERPRETER // call the interpreter loop (never returns) + .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1). + + defcode "EXIT",4,,EXIT + POPRSP %esi // pop return stack into %esi + NEXT + + defcode "LIT",3,,LIT + // %esi points to the next command, but in this case it points to the next + // literal 32 bit integer. Get that literal into %eax and increment %esi. + // On x86, it's a convenient single byte instruction! (cf. NEXT macro) + lodsl + push %eax // push the literal number on to stack + NEXT + +#if 0 + defcode "0SKIP",5,,ZSKIP + // If the top of stack is zero, skip the next instruction. + pop %eax + test %eax,%eax + jnz 1f + lodsl // this does the skip +1: NEXT +#endif + + defcode "!",1,,STORE + pop %ebx // address to store at + pop %eax // data to store there + mov %eax,(%ebx) // store it + NEXT + + defcode "@",1,,FETCH + pop %ebx // address to fetch + mov (%ebx),%eax // fetch it + push %eax // push value onto stack + NEXT + + // The STATE variable is 0 for execute mode, != 0 for compile mode + defvar "STATE",5,,STATE + + // This points to where compiled words go. + defvar "HERE",4,,HERE,user_defs_start + + // This is the last definition in the dictionary. + defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary + + // _X, _Y and _Z are scratch variables used by standard words. + defvar "_X",2,,TX + defvar "_Y",2,,TY + defvar "_Z",2,,TZ + + defcode ">R",2,,TOR + pop %eax // pop parameter stack into %eax + PUSHRSP %eax // push it on to the return stack + NEXT + + defcode "R>",2,,FROMR + POPRSP %eax // pop return stack on to %eax + push %eax // and push on to parameter stack + NEXT + + defcode "RSP@",4,,RSPFETCH + push %ebp + NEXT + + defcode "RSP!",4,,RSPSTORE + pop %ebp + NEXT + + defcode "RDROP",5,,RDROP + lea 4(%ebp),%ebp // pop return stack and throw away + NEXT + + defcode "KEY",3,,KEY + call _KEY + push %eax // push return value on stack + NEXT +_KEY: + mov (currkey),%ebx + cmp (bufftop),%ebx + jge 1f + xor %eax,%eax + mov (%ebx),%al + inc %ebx + mov %ebx,(currkey) + ret + +1: // out of input; use read(2) to fetch more input from stdin + xor %ebx,%ebx // 1st param: stdin + mov $buffer,%ecx // 2nd param: buffer + mov %ecx,currkey + mov $buffend-buffer,%edx // 3rd param: max length + mov $__NR_read,%eax // syscall: read + int $0x80 + test %eax,%eax // If %eax <= 0, then exit. + jbe 2f + addl %eax,%ecx // buffer+%eax = bufftop + mov %ecx,bufftop + jmp _KEY + +2: // error or out of input: exit + xor %ebx,%ebx + mov $__NR_exit,%eax // syscall: exit + int $0x80 + + defcode "EMIT",4,,EMIT + pop %eax + call _EMIT + NEXT +_EMIT: + mov $1,%ebx // 1st param: stdout + + // write needs the address of the byte to write + mov %al,(2f) + mov $2f,%ecx // 2nd param: address + + mov $1,%edx // 3rd param: nbytes = 1 + + mov $__NR_write,%eax // write syscall + int $0x80 + ret + + .bss +2: .space 1 // scratch used by EMIT + + defcode "WORD",4,,WORD + call _WORD + push %ecx // push length + push %edi // push base address + NEXT + +_WORD: + /* Search for first non-blank character. Also skip \ comments. */ +1: + call _KEY // get next key, returned in %eax + cmpb $'\\',%al // start of a comment? + je 3f // if so, skip the comment + cmpb $' ',%al + jbe 1b // if so, keep looking + + /* Search for the end of the word, storing chars as we go. */ + mov $5f,%edi // pointer to return buffer +2: + stosb // add character to return buffer + call _KEY // get next key, returned in %al + cmpb $' ',%al // is blank? + ja 2b // if not, keep looping + + /* Return the word (well, the static buffer) and length. */ + sub $5f,%edi + mov %edi,%ecx // return length of the word + mov $5f,%edi // return address of the word + ret + + /* Code to skip \ comments to end of the current line. */ +3: + call _KEY + cmpb $'\n',%al // end of line yet? + jne 3b + jmp 1b + + .bss + // A static buffer where WORD returns. Subsequent calls + // overwrite this buffer. Maximum word length is 32 chars. +5: .space 32 + + defcode "EMITWORD",8,,EMITWORD + mov $1,%ebx // 1st param: stdout + pop %ecx // 2nd param: address of string + pop %edx // 3rd param: length of string + + mov $__NR_write,%eax // write syscall + int $0x80 + + NEXT + + defcode ".",1,,DOT + pop %eax // Get the number to print into %eax + call _DOT // Easier to do this recursively ... + NEXT +_DOT: + mov $10,%ecx // Base 10 + cmp %ecx,%eax + jb 1f + pushl %eax + xor %edx,%edx + idivl %ecx + call _DOT + popl %eax +1: + aam $10 + cbw + cwde + addl $'0',%eax + call _EMIT + ret + + // Parse a number from a string on the stack -- almost the opposite of . (DOT) + // Note that there is absolutely no error checking. In particular the length of the + // string must be >= 1 bytes. + defcode "SNUMBER",7,,SNUMBER + pop %edi + pop %ecx + call _SNUMBER + push %eax + NEXT +_SNUMBER: + xor %eax,%eax + xor %ebx,%ebx +1: + imull $10,%eax // %eax *= 10 + movb (%edi),%bl + inc %edi + subb $'0',%bl // ASCII -> digit + add %ebx,%eax + dec %ecx + jnz 1b + ret + + defcode "FIND",4,,FIND + pop %edi // %edi = address + pop %ecx // %ecx = length + call _FIND + push %eax + NEXT + +_FIND: + push %esi // Save %esi so we can use it in string comparison. + + // Now we start searching backwards through the dictionary for this word. + mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary +1: + test %edx,%edx // NULL pointer? (end of the linked list) + je 4f + + // Compare the length expected and the length of the word. + // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery + // this won't pick the word (the length will appear to be wrong). + xor %eax,%eax + movb 4(%edx),%al // %al = flags+length field + andb $(F_HIDDEN|0x1f),%al // %al = name length + cmpb %cl,%al // Length is the same? + jne 2f + + // Compare the strings in detail. + push %ecx // Save the length + push %edi // Save the address (repe cmpsb will move this pointer) + lea 5(%edx),%esi // Dictionary string we are checking against. + repe cmpsb // Compare the strings. + pop %edi + pop %ecx + jne 2f // Not the same. + + // The strings are the same - return the header pointer in %eax + pop %esi + mov %edx,%eax + ret + +2: + mov (%edx),%edx // Move back through the link field to the previous word + jmp 1b // .. and loop. + +4: // Not found. + pop %esi + xor %eax,%eax // Return zero to indicate not found. + ret + + defcode ">CFA",4,,TCFA // DEA -> Codeword address + pop %edi + call _TCFA + push %edi + NEXT +_TCFA: + xor %eax,%eax + add $4,%edi // Skip link pointer. + movb (%edi),%al // Load flags+len into %al. + inc %edi // Skip flags+len byte. + andb $0x1f,%al // Just the length, not the flags. + add %eax,%edi // Skip the name. + addl $3,%edi // The codeword is 4-byte aligned. + andl $~3,%edi + ret + +#if 0 + defword "'",1,,TICK + .int WORD // Get the following word. + .int FIND // Look it up in the dictionary. + .int DUP // If not found, skip >CFA (TCFA) instruction. + .int ZSKIP + .int TCFA // Convert to a codeword pointer. + .int EXIT // Return. +#endif + + defcode ":",1,,COLON + + // Get the word and create a dictionary entry header for it. + call _WORD // Returns %ecx = length, %edi = pointer to word. + mov %edi,%ebx // %ebx = address of the word + + movl var_HERE,%edi // %edi is the address of the header + movl var_LATEST,%eax // Get link pointer + stosl // and store it in the header. + + mov %cl,%al // Get the length. + orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry. + stosb // Store the length/flags byte. + push %esi + mov %ebx,%esi // %esi = word + rep movsb // Copy the word + pop %esi + addl $3,%edi // Align to next 4 byte boundary. + andl $~3,%edi + + movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter) + stosl + + // Header built, so now update LATEST and HERE. + // We'll be compiling words and putting them HERE. + movl var_HERE,%eax + movl %eax,var_LATEST + movl %edi,var_HERE + + // And go into compile mode by setting STATE to 1. + xor %eax,%eax + inc %eax + mov %eax,var_STATE + NEXT + + defcode ",",1,,COMMA + pop %eax // Code pointer to store. + call _COMMA + NEXT +_COMMA: + movl var_HERE,%edi // HERE + stosl // Store it. + movl %edi,var_HERE // Update HERE (incremented) + ret + + defcode "HIDDEN",6,,HIDDEN + call _HIDDEN + NEXT +_HIDDEN: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit. + ret + + defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE + call _IMMEDIATE + NEXT +_IMMEDIATE: + movl var_LATEST,%edi // LATEST word. + addl $4,%edi // Point to name/flags byte. + xorb $F_IMMED,(%edi) // Toggle the IMMED bit. + ret + + defcode ";",1,F_IMMED,SEMICOLON + movl $EXIT,%eax // EXIT is the final codeword in compiled words. + call _COMMA // Store it. + call _HIDDEN // Toggle the HIDDEN flag (unhides the new word). + xor %eax,%eax // Set STATE to 0 (back to execute mode). + movl %eax,var_STATE + NEXT + +/* This interpreter is pretty simple, but remember that in FORTH you can always override + * it later with a more powerful one! + */ + defword "INTERPRETER",11,,INTERPRETER + .int INTERPRET,RDROP,INTERPRETER + + defcode "INTERPRET",9,,INTERPRET + call _WORD // Returns %ecx = length, %edi = pointer to word. + + // Is it in the dictionary? + call _FIND // Returns %eax = pointer to header or 0 if not found. + test %eax,%eax // Found? + jz 1f + + // In the dictionary. Is it an IMMEDIATE codeword? + mov %eax,%edi // %edi = dictionary entry + movb 4(%edi),%al // Get name+flags. + push %ax // Just save it for now. + call _TCFA // Convert dictionary entry (in %edi) to codeword pointer. + pop %ax + andb $F_IMMED,%al // Is IMMED flag set? + mov %edi,%eax + jnz 3f // If IMMED, jump straight to executing. + + jmp 2f + +1: // Not in the dictionary (not a word) so assume it's a number. + call _SNUMBER // Returns the parsed number in %eax + mov %eax,%ebx + mov $LIT,%eax // The word is LIT + +2: // Are we compiling or executing? + movl var_STATE,%edx + test %edx,%edx + jz 3f // Jump if executing. + + // Compiling - just append the word to the current dictionary definition. + call _COMMA + cmp $LIT,%eax // Was it LIT? + jne 4f + mov %ebx,%eax // Yes, so LIT is followed by a number. + call _COMMA + NEXT + +3: // Executing - run it! + cmp $LIT,%eax // Literal? + je 4f + // Not a literal, execute it now. This never returns, but the codeword will + // eventually call NEXT which will reenter the loop in INTERPRETER. + jmp *(%eax) + +4: // Executing a literal, which means push it on the stack. + push %ebx + NEXT + + // NB: SYSEXIT must be the last entry in the built-in dictionary. + defcode SYSEXIT,7,,SYSEXIT + pop %ebx + mov $__NR_exit,%eax + int $0x80 + +/*---------------------------------------------------------------------- + * Input buffer & initial input. + */ + .data + .align 4096 +buffer: + // XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore + .ascii "\ +\\ Define some character constants +: '\\n' 10 ; +: 'SPACE' 32 ; +: '\"' 34 ; + +\\ CR prints a carriage return +: CR '\\n' EMIT ; + +\\ SPACE prints a space +: SPACE 'SPACE' EMIT ; + +\\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH. +\\ Notice how we can trivially redefine existing functions. +: . . SPACE ; + +\\ XXX SPACES + +\\ DUP, DROP are defined in assembly for speed, but this is how you might define them +\\ in FORTH. Notice use of the scratch variables _X and _Y. +\\ : DUP _X ! _X @ _X @ ; +\\ : DROP _X ! ; + + + +\\ Finally print the welcome prompt. +79 EMIT 75 EMIT 'SPACE' EMIT +" + +_initbufftop: + .align 4096 +buffend: + +currkey: + .int buffer +bufftop: + .int _initbufftop -- 1.8.3.1