+++ /dev/null
-/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
- * By Richard W.M. Jones <rich@annexia.org>
- *
- * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
- */
-
-#include <asm-i386/unistd.h>
-
-/* NOTES-------------------------------------------------------------------------------------------------------------------
-
-Need to say something about $ before constants.
-
-And about je/jne/ja/jb/jbe/etc
-
-
-
-
-*/
-
-/* NEXT macro. */
- .macro NEXT
- lodsl
- jmp *(%eax)
- .endm
-
-/* Macros to deal with the return stack. */
- .macro PUSHRSP reg
- lea -4(%ebp),%ebp // push reg on to return stack
- movl \reg,(%ebp)
- .endm
-
- .macro POPRSP reg
- mov (%ebp),\reg // pop top of return stack to reg
- lea 4(%ebp),%ebp
- .endm
-
-/* ELF entry point. */
- .text
- .globl _start
-_start:
- cld
- mov $return_stack,%ebp // Initialise the return stack.
-
- mov $cold_start,%esi // Initialise interpreter.
- NEXT // Run interpreter!
-
- .section .rodata
-cold_start: // High-level code without a codeword.
- .int COLD
-
-/* DOCOL - the interpreter! */
- .text
- .align 4
-DOCOL:
- PUSHRSP %esi // push %esi on to the return stack
- addl $4,%eax // %eax points to codeword, so make
- movl %eax,%esi // %esi point to first data word
- NEXT
-
-/*----------------------------------------------------------------------
- * Fixed sized buffers for everything.
- */
- .bss
-
-/* FORTH return stack. */
-#define RETURN_STACK_SIZE 8192
- .align 4096
- .space RETURN_STACK_SIZE
-return_stack:
-
-/* Space for user-defined words. */
-#define USER_DEFS_SIZE 16384
- .align 4096
-user_defs_start:
- .space USER_DEFS_SIZE
-
-
-
-
-
-
-/*----------------------------------------------------------------------
- * Built-in words defined the long way.
- */
-#define F_IMMED 0x80
-#define F_HIDDEN 0x20
-
- // Store the chain of links.
- .set link,0
-
- .macro defcode name, namelen, flags=0, label
- .section .rodata
- .align 4
- .globl name_\label
-name_\label :
- .int link // link
- .set link,name_\label
- .byte \flags+\namelen // flags + length byte
- .ascii "\name" // the name
- .align 4
- .globl \label
-\label :
- .int code_\label // codeword
- .text
- .align 4
- .globl code_\label
-code_\label : // assembler code follows
- .endm
-
- .macro defword name, namelen, flags=0, label
- .section .rodata
- .align 4
- .globl name_\label
-name_\label :
- .int link // link
- .set link,name_\label
- .byte \flags+\namelen // flags + length byte
- .ascii "\name" // the name
- .align 4
- .globl \label
-\label :
- .int DOCOL // codeword - the interpreter
- // list of word pointers follow
- .endm
-
- .macro defvar name, namelen, flags=0, label, initial=0
- defcode \name,\namelen,\flags,\label
- push $var_\name
- NEXT
- .data
- .align 4
-var_\name :
- .int \initial
- .endm
-
- /* Some easy ones .... */
- defcode "DUP",3,,DUP
- pop %eax // duplicate top of stack
- push %eax
- push %eax
- NEXT
-
- defcode "DROP",4,,DROP
- pop %eax // drop top of stack
- NEXT
-
- defcode "SWAP",4,,SWAP
- pop %eax // swap top of stack
- pop %ebx
- push %eax
- push %ebx
- NEXT
-
- defcode "OVER",4,,OVER
- mov 4(%esp),%eax // get the second element of stack
- push %eax // and push it on top
- NEXT
-
- // COLD must not return (ie. must not call EXIT).
- defword "COLD",4,,COLD
- // XXX reinitialisation of the interpreter
- .int INTERPRETER // call the interpreter loop (never returns)
- .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1).
-
- defcode "EXIT",4,,EXIT
- POPRSP %esi // pop return stack into %esi
- NEXT
-
- defcode "LIT",3,,LIT
- // %esi points to the next command, but in this case it points to the next
- // literal 32 bit integer. Get that literal into %eax and increment %esi.
- // On x86, it's a convenient single byte instruction! (cf. NEXT macro)
- lodsl
- push %eax // push %eax on to stack
- NEXT
-
- defcode "0SKIP",5,,ZSKIP
- // If the top of stack is zero, skip the next instruction.
- pop %eax
- test %eax,%eax
- jnz 1f
- lodsl // this does the skip
-1: NEXT
-
- defcode "!",1,,STORE
- pop %ebx // address to store at
- pop %eax // data to store there
- mov %eax,(%ebx) // store it
- NEXT
-
- defcode "@",1,,FETCH
- pop %ebx // address to fetch
- mov (%ebx),%eax // fetch it
- push %eax // push value onto stack
- NEXT
-
- // The STATE variable is 0 for execute mode, != 0 for compile mode
- defvar "STATE",5,,STATE
-
- // This points to where compiled words go.
- defvar "HERE",4,,HERE,user_defs_start
-
- // This is the last definition in the dictionary.
- defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
-
- defcode ">R",2,,TOR
- pop %eax // pop parameter stack into %eax
- PUSHRSP %eax // push it on to the return stack
- NEXT
-
- defcode "R>",2,,FROMR
- POPRSP %eax // pop return stack on to %eax
- push %eax // and push on to parameter stack
- NEXT
-
-#if 0 /* This definition is wrong. */
- defcode "R",1,,R
- mov %(ebp),%eax // copy (don't pop) top of return stack to %eax
- push %eax // and push on to parameter stack
- NEXT
-#endif
-
- defcode "RSP@",4,,RSPFETCH
- push %ebp
- NEXT
-
- defcode "RSP!",4,,RSPSTORE
- pop %ebp
- NEXT
-
- defcode "RDROP",5,,RDROP
- lea 4(%ebp),%ebp // pop return stack and throw away
- NEXT
-
- defcode "KEY",3,,KEY
- call _KEY
- push %eax // push return value on stack
- NEXT
-_KEY:
- mov (currkey),%ebx
- cmp (bufftop),%ebx
- jge 1f
- xor %eax,%eax
- mov (%ebx),%al
- inc %ebx
- mov %ebx,(currkey)
- ret
-1:
- mov $0,%ebx // out of input, exit (0)
- mov $__NR_exit,%eax
- int $0x80
-
- defcode "EMIT",4,,EMIT
- pop %eax
- call _EMIT
- NEXT
-_EMIT:
- mov $1,%ebx // 1st param: stdout
-
- // write needs the address of the byte to write
- mov %al,(2f)
- mov $2f,%ecx // 2nd param: address
-
- mov $1,%edx // 3rd param: nbytes = 1
-
- mov $__NR_write,%eax // write syscall
- int $0x80
- ret
-
- .bss
-2: .space 1 // scratch used by EMIT
-
- defcode "WORD",4,,WORD
- call _WORD
- push %ecx // push length
- push %edi // push base address
- NEXT
-
-_WORD:
- /* Search for first non-blank character. Also skip \ comments. */
-1:
- call _KEY // get next key, returned in %eax
- cmpb $'\\',%al // start of a comment?
- je 3f // if so, skip the comment
- cmpb $' ',%al
- jbe 1b // if so, keep looking
-
- /* Search for the end of the word, storing chars as we go. */
- mov $5f,%edi // pointer to return buffer
-2:
- stosb // add character to return buffer
- call _KEY // get next key, returned in %al
- cmpb $' ',%al // is blank?
- ja 2b // if not, keep looping
-
- /* Return the word (well, the static buffer) and length. */
- sub $5f,%edi
- mov %edi,%ecx // return length of the word
- mov $5f,%edi // return address of the word
- ret
-
- /* Code to skip \ comments to end of the current line. */
-3:
- call _KEY
- cmpb $'\n',%al // end of line yet?
- jne 3b
- jmp 1b
-
- .bss
- // A static buffer where WORD returns. Subsequent calls
- // overwrite this buffer. Maximum word length is 32 chars.
-5: .space 32
-
- defcode "EMITWORD",8,,EMITWORD
- mov $1,%ebx // 1st param: stdout
- pop %ecx // 2nd param: address of string
- pop %edx // 3rd param: length of string
-
- mov $__NR_write,%eax // write syscall
- int $0x80
-
- NEXT
-
- defcode ".",1,,DOT
- pop %eax // Get the number to print into %eax
- call _DOT // Easier to do this recursively ...
- NEXT
-_DOT:
- mov $10,%ecx // Base 10
- cmp %ecx,%eax
- jb 1f
- pushl %eax
- xor %edx,%edx
- idivl %ecx
- call _DOT
- popl %eax
-1:
- aam $10
- cbw
- cwde
- addl $'0',%eax
- call _EMIT
- ret
-
- // Parse a number from a string on the stack -- almost the opposite of . (DOT)
- // Note that there is absolutely no error checking. In particular the length of the
- // string must be >= 1 bytes.
- defcode "SNUMBER",7,,SNUMBER
- pop %edi
- pop %ecx
- call _SNUMBER
- push %eax
- NEXT
-_SNUMBER:
- xor %eax,%eax
- xor %ebx,%ebx
-1:
- imull $10,%eax // %eax *= 10
- movb (%edi),%bl
- inc %edi
- subb $'0',%bl // ASCII -> digit
- add %ebx,%eax
- dec %ecx
- jnz 1b
- ret
-
- defcode "FIND",4,,FIND
- pop %edi // %edi = address
- pop %ecx // %ecx = length
- call _FIND
- push %eax
- NEXT
-
-_FIND:
- push %esi // Save %esi so we can use it in string comparison.
-
- // Now we start searching backwards through the dictionary for this word.
- mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary
-1:
- test %edx,%edx // NULL pointer? (end of the linked list)
- je 4f
-
- // Compare the length expected and the length of the word.
- // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
- // this won't pick the word (the length will appear to be wrong).
- xor %eax,%eax
- movb 4(%edx),%al // %al = flags+length field
- andb $(F_HIDDEN|0x1f),%al // %al = name length
- cmpb %cl,%al // Length is the same?
- jne 2f
-
- // Compare the strings in detail.
- push %ecx // Save the length
- push %edi // Save the address (repe cmpsb will move this pointer)
- lea 5(%edx),%esi // Dictionary string we are checking against.
- repe cmpsb // Compare the strings.
- pop %edi
- pop %ecx
- jne 2f // Not the same.
-
- // The strings are the same - return the header pointer in %eax
- pop %esi
- mov %edx,%eax
- ret
-
-2:
- mov (%edx),%edx // Move back through the link field to the previous word
- jmp 1b // .. and loop.
-
-4: // Not found.
- pop %esi
- xor %eax,%eax // Return zero to indicate not found.
- ret
-
- defcode ">CFA",4,,TCFA // DEA -> Codeword address
- pop %edi
- call _TCFA
- push %edi
- NEXT
-_TCFA:
- xor %eax,%eax
- add $4,%edi // Skip link pointer.
- movb (%edi),%al // Load flags+len into %al.
- inc %edi // Skip flags+len byte.
- andb $0x1f,%al // Just the length, not the flags.
- add %eax,%edi // Skip the name.
- orl $3,%edi // The codeword is 4-byte aligned.
- inc %edi
- ret
-
- defword "'",1,,TICK
- .int WORD // Get the following word.
- .int FIND // Look it up in the dictionary.
- .int DUP // If not found, skip >CFA (TCFA) instruction.
- .int ZSKIP
- .int TCFA // Convert to a codeword pointer.
- .int EXIT // Return.
-
- defcode ":",1,,COLON
-
- // Get the word and create a dictionary entry header for it.
- call _WORD // Returns %ecx = length, %edi = pointer to word.
- mov %edi,%ebx // %ebx = address of the word
-
- movl var_HERE,%edi // %edi is the address of the header
- movl var_LATEST,%eax // Get link pointer
- stosl // and store it in the header.
-
- mov %cl,%al // Get the length.
- orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry.
- stosb // Store the length/flags byte.
- push %esi
- mov %ebx,%esi // %esi = word
- rep movsb // Copy the word
- pop %esi
- orl $3,%edi // Align to next 4 byte boundary.
- inc %edi
-
- movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter)
- stosl
-
- // Header built, so now update LATEST and HERE.
- // We'll be compiling words and putting them HERE.
- movl var_HERE,%eax
- movl %eax,var_LATEST
- movl %edi,var_HERE
-
- // And go into compile mode by setting STATE to 1.
- xor %eax,%eax
- inc %eax
- mov %eax,var_STATE
- NEXT
-
- defcode ",",1,,COMMA
- pop %eax // Code pointer to store.
- call _COMMA
- NEXT
-_COMMA:
- movl var_HERE,%edi // HERE
- stosl // Store it.
- movl %edi,var_HERE // Update HERE (incremented)
- ret
-
- defcode "HIDDEN",6,,HIDDEN
- call _HIDDEN
- NEXT
-_HIDDEN:
- movl var_LATEST,%edi // LATEST word.
- addl $4,%edi // Point to name/flags byte.
- xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit.
- ret
-
- defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
- call _IMMEDIATE
- NEXT
-_IMMEDIATE:
- movl var_LATEST,%edi // LATEST word.
- addl $4,%edi // Point to name/flags byte.
- xorb $F_IMMED,(%edi) // Toggle the IMMED bit.
- ret
-
- defcode ";",1,F_IMMED,SEMICOLON
- movl $EXIT,%eax // EXIT is the final codeword in compiled words.
- call _COMMA // Store it.
- call _HIDDEN // Toggle the HIDDEN flag (unhides the new word).
- xor %eax,%eax // Set STATE to 0 (back to execute mode).
- movl %eax,var_STATE
- NEXT
-
-/* This interpreter is pretty simple, but remember that in FORTH you can always override
- * it later with a more powerful one!
- */
- defword "INTERPRETER",11,,INTERPRETER
- .int INTERPRET,RDROP,INTERPRETER
-
- defcode "INTERPRET",9,,INTERPRET
- call _WORD // Returns %ecx = length, %edi = pointer to word.
-
- // Is it in the dictionary?
- call _FIND // Returns %eax = pointer to header or 0 if not found.
- test %eax,%eax // Found?
- jz 1f
-
- // In the dictionary. Is it an IMMEDIATE codeword?
- mov %eax,%edi // %edi = dictionary entry
- movb 4(%edi),%al // Get name+flags.
- push %ax // Just save it for now.
- call _TCFA // Convert dictionary entry (in %edi) to codeword pointer.
- pop %ax
- andb $F_IMMED,%al // Is IMMED flag set?
- mov %edi,%eax
- jnz 3f // If IMMED, jump straight to executing.
-
- jmp 2f
-
-1: // Not in the dictionary (not a word) so assume it's a number.
- call _SNUMBER // Returns the parsed number in %eax
- mov %eax,%ebx
- mov $LIT,%eax // The word is LIT
-
-2: // Are we compiling or executing?
- movl var_STATE,%edx
- test %edx,%edx
- jz 3f // Jump if executing.
-
- // Compiling - just append the word to the current dictionary definition.
- call _COMMA
- cmp $LIT,%eax // Was it LIT?
- jne 4f
- mov %ebx,%eax // Yes, so LIT is followed by a number.
- call _COMMA
- NEXT
-
-3: // Executing - run it!
- cmp $LIT,%eax // Literal?
- je 4f
- // Not a literal, execute it now. This never returns, but the codeword will
- // eventually call NEXT which will reenter the loop in INTERPRETER.
- jmp *(%eax)
-
-4: // Executing a literal, which means push it on the stack.
- push %ebx
- NEXT
-
- // NB: SYSEXIT must be the last entry in the built-in dictionary.
- defcode SYSEXIT,7,,SYSEXIT
- pop %ebx
- mov $__NR_exit,%eax
- int $0x80
-
-/*----------------------------------------------------------------------
- * Input buffer & initial input.
- */
- .data
- .align 4096
-buffer:
- .ascii "\n\
-: '\\n' 10 ; \n\
-: ')' 41 ; \n\
-: 'space' 32 ; \n\
-: '\"' 34 ; \n\
-: '-' 45 ; \n\
-: '0' 48 ; \n\
- \n\
-\\ CR command \n\
-: CR '\\n' EMIT ; \n\
- \n\
-CR CR '0' EMIT CR CR \n\
-"
-
-_initbufftop:
- .align 4096
-buffend:
-
-currkey:
- .int buffer
-bufftop:
- .int _initbufftop