/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- * By Richard W.M. Jones * * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S */ #include /* NOTES------------------------------------------------------------------------------------------------------------------- Need to say something about $ before constants. And about je/jne/ja/jb/jbe/etc */ /* NEXT macro. */ .macro NEXT lodsl jmp *(%eax) .endm /* Macros to deal with the return stack. */ .macro PUSHRSP reg lea -4(%ebp),%ebp // push reg on to return stack movl \reg,(%ebp) .endm .macro POPRSP reg mov (%ebp),\reg // pop top of return stack to reg lea 4(%ebp),%ebp .endm /* ELF entry point. */ .text .globl _start _start: cld mov $return_stack,%ebp // Initialise the return stack. mov $cold_start,%esi // Initialise interpreter. NEXT // Run interpreter! .section .rodata cold_start: // High-level code without a codeword. .int COLD /* DOCOL - the interpreter! */ .text .align 4 DOCOL: PUSHRSP %esi // push %esi on to the return stack addl $4,%eax // %eax points to codeword, so make movl %eax,%esi // %esi point to first data word NEXT /*---------------------------------------------------------------------- * Fixed sized buffers for everything. */ .bss /* FORTH return stack. */ #define RETURN_STACK_SIZE 8192 .align 4096 .space RETURN_STACK_SIZE return_stack: /* Space for user-defined words. */ #define USER_DEFS_SIZE 16384 .align 4096 user_defs_start: .space USER_DEFS_SIZE /*---------------------------------------------------------------------- * Built-in words defined the long way. */ #define F_IMMED 0x80 #define F_HIDDEN 0x20 // Store the chain of links. .set link,0 .macro defcode name, namelen, flags=0, label .section .rodata .align 4 .globl name_\label name_\label : .int link // link .set link,name_\label .byte \flags+\namelen // flags + length byte .ascii "\name" // the name .align 4 .globl \label \label : .int code_\label // codeword .text .align 4 .globl code_\label code_\label : // assembler code follows .endm .macro defword name, namelen, flags=0, label .section .rodata .align 4 .globl name_\label name_\label : .int link // link .set link,name_\label .byte \flags+\namelen // flags + length byte .ascii "\name" // the name .align 4 .globl \label \label : .int DOCOL // codeword - the interpreter // list of word pointers follow .endm .macro defvar name, namelen, flags=0, label, initial=0 defcode \name,\namelen,\flags,\label push $var_\name NEXT .data .align 4 var_\name : .int \initial .endm // Some easy ones, written in assembly for speed defcode "DROP",4,,DROP pop %eax // drop top of stack NEXT defcode "DUP",3,,DUP pop %eax // duplicate top of stack push %eax push %eax NEXT defcode "SWAP",4,,SWAP pop %eax // swap top of stack pop %ebx push %eax push %ebx NEXT defcode "OVER",4,,OVER mov 4(%esp),%eax // get the second element of stack push %eax // and push it on top NEXT defcode "1+",2,,INCR incl (%esp) // increment top of stack NEXT defcode "1-",2,,DECR decl (%esp) // decrement top of stack NEXT defcode "+",1,,ADD pop %eax addl %eax,(%esp) NEXT defcode "-",1,,SUB pop %eax subl %eax,(%esp) NEXT defcode "*",1,,MUL pop %eax pop %ebx imull %ebx,%eax push %eax // ignore overflow NEXT defcode "/",1,,DIV xor %edx,%edx pop %ebx pop %eax idivl %ebx push %eax // push quotient NEXT defcode "MOD",3,,MOD xor %edx,%edx pop %ebx pop %eax idivl %ebx push %edx // push remainder NEXT // COLD must not return (ie. must not call EXIT). defword "COLD",4,,COLD // XXX reinitialisation of the interpreter .int INTERPRETER // call the interpreter loop (never returns) .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1). defcode "EXIT",4,,EXIT POPRSP %esi // pop return stack into %esi NEXT defcode "LIT",3,,LIT // %esi points to the next command, but in this case it points to the next // literal 32 bit integer. Get that literal into %eax and increment %esi. // On x86, it's a convenient single byte instruction! (cf. NEXT macro) lodsl push %eax // push the literal number on to stack NEXT #if 0 defcode "0SKIP",5,,ZSKIP // If the top of stack is zero, skip the next instruction. pop %eax test %eax,%eax jnz 1f lodsl // this does the skip 1: NEXT #endif defcode "!",1,,STORE pop %ebx // address to store at pop %eax // data to store there mov %eax,(%ebx) // store it NEXT defcode "@",1,,FETCH pop %ebx // address to fetch mov (%ebx),%eax // fetch it push %eax // push value onto stack NEXT // The STATE variable is 0 for execute mode, != 0 for compile mode defvar "STATE",5,,STATE // This points to where compiled words go. defvar "HERE",4,,HERE,user_defs_start // This is the last definition in the dictionary. defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary // _X, _Y and _Z are scratch variables used by standard words. defvar "_X",2,,TX defvar "_Y",2,,TY defvar "_Z",2,,TZ defcode ">R",2,,TOR pop %eax // pop parameter stack into %eax PUSHRSP %eax // push it on to the return stack NEXT defcode "R>",2,,FROMR POPRSP %eax // pop return stack on to %eax push %eax // and push on to parameter stack NEXT defcode "RSP@",4,,RSPFETCH push %ebp NEXT defcode "RSP!",4,,RSPSTORE pop %ebp NEXT defcode "RDROP",5,,RDROP lea 4(%ebp),%ebp // pop return stack and throw away NEXT defcode "KEY",3,,KEY call _KEY push %eax // push return value on stack NEXT _KEY: mov (currkey),%ebx cmp (bufftop),%ebx jge 1f xor %eax,%eax mov (%ebx),%al inc %ebx mov %ebx,(currkey) ret 1: // out of input; use read(2) to fetch more input from stdin xor %ebx,%ebx // 1st param: stdin mov $buffer,%ecx // 2nd param: buffer mov %ecx,currkey mov $buffend-buffer,%edx // 3rd param: max length mov $__NR_read,%eax // syscall: read int $0x80 test %eax,%eax // If %eax <= 0, then exit. jbe 2f addl %eax,%ecx // buffer+%eax = bufftop mov %ecx,bufftop jmp _KEY 2: // error or out of input: exit xor %ebx,%ebx mov $__NR_exit,%eax // syscall: exit int $0x80 defcode "EMIT",4,,EMIT pop %eax call _EMIT NEXT _EMIT: mov $1,%ebx // 1st param: stdout // write needs the address of the byte to write mov %al,(2f) mov $2f,%ecx // 2nd param: address mov $1,%edx // 3rd param: nbytes = 1 mov $__NR_write,%eax // write syscall int $0x80 ret .bss 2: .space 1 // scratch used by EMIT defcode "WORD",4,,WORD call _WORD push %ecx // push length push %edi // push base address NEXT _WORD: /* Search for first non-blank character. Also skip \ comments. */ 1: call _KEY // get next key, returned in %eax cmpb $'\\',%al // start of a comment? je 3f // if so, skip the comment cmpb $' ',%al jbe 1b // if so, keep looking /* Search for the end of the word, storing chars as we go. */ mov $5f,%edi // pointer to return buffer 2: stosb // add character to return buffer call _KEY // get next key, returned in %al cmpb $' ',%al // is blank? ja 2b // if not, keep looping /* Return the word (well, the static buffer) and length. */ sub $5f,%edi mov %edi,%ecx // return length of the word mov $5f,%edi // return address of the word ret /* Code to skip \ comments to end of the current line. */ 3: call _KEY cmpb $'\n',%al // end of line yet? jne 3b jmp 1b .bss // A static buffer where WORD returns. Subsequent calls // overwrite this buffer. Maximum word length is 32 chars. 5: .space 32 defcode "EMITWORD",8,,EMITWORD mov $1,%ebx // 1st param: stdout pop %ecx // 2nd param: address of string pop %edx // 3rd param: length of string mov $__NR_write,%eax // write syscall int $0x80 NEXT defcode ".",1,,DOT pop %eax // Get the number to print into %eax call _DOT // Easier to do this recursively ... NEXT _DOT: mov $10,%ecx // Base 10 1: cmp %ecx,%eax jb 2f xor %edx,%edx // %edx:%eax / %ecx -> quotient %eax, remainder %edx idivl %ecx pushl %edx call _DOT popl %eax jmp 1b 2: xor %ah,%ah aam $10 cwde addl $'0',%eax call _EMIT ret // Parse a number from a string on the stack -- almost the opposite of . (DOT) // Note that there is absolutely no error checking. In particular the length of the // string must be >= 1 bytes. defcode "SNUMBER",7,,SNUMBER pop %edi pop %ecx call _SNUMBER push %eax NEXT _SNUMBER: xor %eax,%eax xor %ebx,%ebx 1: imull $10,%eax // %eax *= 10 movb (%edi),%bl inc %edi subb $'0',%bl // ASCII -> digit add %ebx,%eax dec %ecx jnz 1b ret defcode "FIND",4,,FIND pop %edi // %edi = address pop %ecx // %ecx = length call _FIND push %eax NEXT _FIND: push %esi // Save %esi so we can use it in string comparison. // Now we start searching backwards through the dictionary for this word. mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary 1: test %edx,%edx // NULL pointer? (end of the linked list) je 4f // Compare the length expected and the length of the word. // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery // this won't pick the word (the length will appear to be wrong). xor %eax,%eax movb 4(%edx),%al // %al = flags+length field andb $(F_HIDDEN|0x1f),%al // %al = name length cmpb %cl,%al // Length is the same? jne 2f // Compare the strings in detail. push %ecx // Save the length push %edi // Save the address (repe cmpsb will move this pointer) lea 5(%edx),%esi // Dictionary string we are checking against. repe cmpsb // Compare the strings. pop %edi pop %ecx jne 2f // Not the same. // The strings are the same - return the header pointer in %eax pop %esi mov %edx,%eax ret 2: mov (%edx),%edx // Move back through the link field to the previous word jmp 1b // .. and loop. 4: // Not found. pop %esi xor %eax,%eax // Return zero to indicate not found. ret defcode ">CFA",4,,TCFA // DEA -> Codeword address pop %edi call _TCFA push %edi NEXT _TCFA: xor %eax,%eax add $4,%edi // Skip link pointer. movb (%edi),%al // Load flags+len into %al. inc %edi // Skip flags+len byte. andb $0x1f,%al // Just the length, not the flags. add %eax,%edi // Skip the name. addl $3,%edi // The codeword is 4-byte aligned. andl $~3,%edi ret #if 0 defword "'",1,,TICK .int WORD // Get the following word. .int FIND // Look it up in the dictionary. .int DUP // If not found, skip >CFA (TCFA) instruction. .int ZSKIP .int TCFA // Convert to a codeword pointer. .int EXIT // Return. #endif defcode ":",1,,COLON // Get the word and create a dictionary entry header for it. call _WORD // Returns %ecx = length, %edi = pointer to word. mov %edi,%ebx // %ebx = address of the word movl var_HERE,%edi // %edi is the address of the header movl var_LATEST,%eax // Get link pointer stosl // and store it in the header. mov %cl,%al // Get the length. orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry. stosb // Store the length/flags byte. push %esi mov %ebx,%esi // %esi = word rep movsb // Copy the word pop %esi addl $3,%edi // Align to next 4 byte boundary. andl $~3,%edi movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter) stosl // Header built, so now update LATEST and HERE. // We'll be compiling words and putting them HERE. movl var_HERE,%eax movl %eax,var_LATEST movl %edi,var_HERE // And go into compile mode by setting STATE to 1. xor %eax,%eax inc %eax mov %eax,var_STATE NEXT defcode ",",1,,COMMA pop %eax // Code pointer to store. call _COMMA NEXT _COMMA: movl var_HERE,%edi // HERE stosl // Store it. movl %edi,var_HERE // Update HERE (incremented) ret defcode "HIDDEN",6,,HIDDEN call _HIDDEN NEXT _HIDDEN: movl var_LATEST,%edi // LATEST word. addl $4,%edi // Point to name/flags byte. xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit. ret defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE call _IMMEDIATE NEXT _IMMEDIATE: movl var_LATEST,%edi // LATEST word. addl $4,%edi // Point to name/flags byte. xorb $F_IMMED,(%edi) // Toggle the IMMED bit. ret defcode ";",1,F_IMMED,SEMICOLON movl $EXIT,%eax // EXIT is the final codeword in compiled words. call _COMMA // Store it. call _HIDDEN // Toggle the HIDDEN flag (unhides the new word). xor %eax,%eax // Set STATE to 0 (back to execute mode). movl %eax,var_STATE NEXT /* This interpreter is pretty simple, but remember that in FORTH you can always override * it later with a more powerful one! */ defword "INTERPRETER",11,,INTERPRETER .int INTERPRET,RDROP,INTERPRETER defcode "INTERPRET",9,,INTERPRET call _WORD // Returns %ecx = length, %edi = pointer to word. // Is it in the dictionary? call _FIND // Returns %eax = pointer to header or 0 if not found. test %eax,%eax // Found? jz 1f // In the dictionary. Is it an IMMEDIATE codeword? mov %eax,%edi // %edi = dictionary entry movb 4(%edi),%al // Get name+flags. push %ax // Just save it for now. call _TCFA // Convert dictionary entry (in %edi) to codeword pointer. pop %ax andb $F_IMMED,%al // Is IMMED flag set? mov %edi,%eax jnz 3f // If IMMED, jump straight to executing. jmp 2f 1: // Not in the dictionary (not a word) so assume it's a number. call _SNUMBER // Returns the parsed number in %eax mov %eax,%ebx mov $LIT,%eax // The word is LIT 2: // Are we compiling or executing? movl var_STATE,%edx test %edx,%edx jz 3f // Jump if executing. // Compiling - just append the word to the current dictionary definition. call _COMMA cmp $LIT,%eax // Was it LIT? jne 4f mov %ebx,%eax // Yes, so LIT is followed by a number. call _COMMA NEXT 3: // Executing - run it! cmp $LIT,%eax // Literal? je 4f // Not a literal, execute it now. This never returns, but the codeword will // eventually call NEXT which will reenter the loop in INTERPRETER. jmp *(%eax) 4: // Executing a literal, which means push it on the stack. push %ebx NEXT // NB: SYSEXIT must be the last entry in the built-in dictionary. defcode SYSEXIT,7,,SYSEXIT pop %ebx mov $__NR_exit,%eax int $0x80 /*---------------------------------------------------------------------- * Input buffer & initial input. */ .data .align 4096 buffer: // XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore .ascii "\ \\ Define some character constants : '\\n' 10 ; : 'SPACE' 32 ; : '\"' 34 ; \\ CR prints a carriage return : CR '\\n' EMIT ; \\ SPACE prints a space : SPACE 'SPACE' EMIT ; \\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH. \\ Notice how we can trivially redefine existing functions. : . . SPACE ; \\ XXX SPACES \\ DUP, DROP are defined in assembly for speed, but this is how you might define them \\ in FORTH. Notice use of the scratch variables _X and _Y. \\ : DUP _X ! _X @ _X @ ; \\ : DROP _X ! ; \\ Finally print the welcome prompt. 79 EMIT 75 EMIT 'SPACE' EMIT " _initbufftop: .align 4096 buffend: currkey: .int buffer bufftop: .int _initbufftop