X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=jonesforth.S.8;fp=jonesforth.S.8;h=0000000000000000000000000000000000000000;hb=279fbb489713c126aa4a8c55d56c48b65a08c721;hp=c894342123926343e06257a82f13a0b9f931197a;hpb=047078f48e027cf0966428b963b3a4e8260172c1;p=jonesforth.git diff --git a/jonesforth.S.8 b/jonesforth.S.8 deleted file mode 100644 index c894342..0000000 --- a/jonesforth.S.8 +++ /dev/null @@ -1,598 +0,0 @@ -/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*- - * By Richard W.M. Jones - * - * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S - */ - -#include - -/* NOTES------------------------------------------------------------------------------------------------------------------- - -Need to say something about $ before constants. - -And about je/jne/ja/jb/jbe/etc - - - - -*/ - -/* NEXT macro. */ - .macro NEXT - lodsl - jmp *(%eax) - .endm - -/* Macros to deal with the return stack. */ - .macro PUSHRSP reg - lea -4(%ebp),%ebp // push reg on to return stack - movl \reg,(%ebp) - .endm - - .macro POPRSP reg - mov (%ebp),\reg // pop top of return stack to reg - lea 4(%ebp),%ebp - .endm - -/* ELF entry point. */ - .text - .globl _start -_start: - cld - mov $return_stack,%ebp // Initialise the return stack. - - mov $cold_start,%esi // Initialise interpreter. - NEXT // Run interpreter! - - .section .rodata -cold_start: // High-level code without a codeword. - .int COLD - -/* DOCOL - the interpreter! */ - .text - .align 4 -DOCOL: - PUSHRSP %esi // push %esi on to the return stack - addl $4,%eax // %eax points to codeword, so make - movl %eax,%esi // %esi point to first data word - NEXT - -/*---------------------------------------------------------------------- - * Fixed sized buffers for everything. - */ - .bss - -/* FORTH return stack. */ -#define RETURN_STACK_SIZE 8192 - .align 4096 - .space RETURN_STACK_SIZE -return_stack: - -/* Space for user-defined words. */ -#define USER_DEFS_SIZE 16384 - .align 4096 -user_defs_start: - .space USER_DEFS_SIZE - - - - - - -/*---------------------------------------------------------------------- - * Built-in words defined the long way. - */ -#define F_IMMED 0x80 -#define F_HIDDEN 0x20 - - // Store the chain of links. - .set link,0 - - .macro defcode name, namelen, flags=0, label - .section .rodata - .align 4 - .globl name_\label -name_\label : - .int link // link - .set link,name_\label - .byte \flags+\namelen // flags + length byte - .ascii "\name" // the name - .align 4 - .globl \label -\label : - .int code_\label // codeword - .text - .align 4 - .globl code_\label -code_\label : // assembler code follows - .endm - - .macro defword name, namelen, flags=0, label - .section .rodata - .align 4 - .globl name_\label -name_\label : - .int link // link - .set link,name_\label - .byte \flags+\namelen // flags + length byte - .ascii "\name" // the name - .align 4 - .globl \label -\label : - .int DOCOL // codeword - the interpreter - // list of word pointers follow - .endm - - .macro defvar name, namelen, flags=0, label, initial=0 - defcode \name,\namelen,\flags,\label - push $var_\name - NEXT - .data - .align 4 -var_\name : - .int \initial - .endm - - /* Some easy ones .... */ - defcode "DUP",3,,DUP - pop %eax // duplicate top of stack - push %eax - push %eax - NEXT - - defcode "DROP",4,,DROP - pop %eax // drop top of stack - NEXT - - defcode "SWAP",4,,SWAP - pop %eax // swap top of stack - pop %ebx - push %eax - push %ebx - NEXT - - defcode "OVER",4,,OVER - mov 4(%esp),%eax // get the second element of stack - push %eax // and push it on top - NEXT - - // COLD must not return (ie. must not call EXIT). - defword "COLD",4,,COLD - // XXX reinitialisation of the interpreter - .int INTERPRETER // call the interpreter loop (never returns) - .int LIT,1,SYSEXIT // hmmm, but in case it does, exit(1). - - defcode "EXIT",4,,EXIT - POPRSP %esi // pop return stack into %esi - NEXT - - defcode "LIT",3,,LIT - // %esi points to the next command, but in this case it points to the next - // literal 32 bit integer. Get that literal into %eax and increment %esi. - // On x86, it's a convenient single byte instruction! (cf. NEXT macro) - lodsl - push %eax // push %eax on to stack - NEXT - - defcode "0SKIP",5,,ZSKIP - // If the top of stack is zero, skip the next instruction. - pop %eax - test %eax,%eax - jnz 1f - lodsl // this does the skip -1: NEXT - - defcode "!",1,,STORE - pop %ebx // address to store at - pop %eax // data to store there - mov %eax,(%ebx) // store it - NEXT - - defcode "@",1,,FETCH - pop %ebx // address to fetch - mov (%ebx),%eax // fetch it - push %eax // push value onto stack - NEXT - - // The STATE variable is 0 for execute mode, != 0 for compile mode - defvar "STATE",5,,STATE - - // This points to where compiled words go. - defvar "HERE",4,,HERE,user_defs_start - - // This is the last definition in the dictionary. - defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary - - defcode ">R",2,,TOR - pop %eax // pop parameter stack into %eax - PUSHRSP %eax // push it on to the return stack - NEXT - - defcode "R>",2,,FROMR - POPRSP %eax // pop return stack on to %eax - push %eax // and push on to parameter stack - NEXT - -#if 0 /* This definition is wrong. */ - defcode "R",1,,R - mov %(ebp),%eax // copy (don't pop) top of return stack to %eax - push %eax // and push on to parameter stack - NEXT -#endif - - defcode "RSP@",4,,RSPFETCH - push %ebp - NEXT - - defcode "RSP!",4,,RSPSTORE - pop %ebp - NEXT - - defcode "RDROP",5,,RDROP - lea 4(%ebp),%ebp // pop return stack and throw away - NEXT - - defcode "KEY",3,,KEY - call _KEY - push %eax // push return value on stack - NEXT -_KEY: - mov (currkey),%ebx - cmp (bufftop),%ebx - jge 1f - xor %eax,%eax - mov (%ebx),%al - inc %ebx - mov %ebx,(currkey) - ret -1: - mov $0,%ebx // out of input, exit (0) - mov $__NR_exit,%eax - int $0x80 - - defcode "EMIT",4,,EMIT - pop %eax - call _EMIT - NEXT -_EMIT: - mov $1,%ebx // 1st param: stdout - - // write needs the address of the byte to write - mov %al,(2f) - mov $2f,%ecx // 2nd param: address - - mov $1,%edx // 3rd param: nbytes = 1 - - mov $__NR_write,%eax // write syscall - int $0x80 - ret - - .bss -2: .space 1 // scratch used by EMIT - - defcode "WORD",4,,WORD - call _WORD - push %ecx // push length - push %edi // push base address - NEXT - -_WORD: - /* Search for first non-blank character. Also skip \ comments. */ -1: - call _KEY // get next key, returned in %eax - cmpb $'\\',%al // start of a comment? - je 3f // if so, skip the comment - cmpb $' ',%al - jbe 1b // if so, keep looking - - /* Search for the end of the word, storing chars as we go. */ - mov $5f,%edi // pointer to return buffer -2: - stosb // add character to return buffer - call _KEY // get next key, returned in %al - cmpb $' ',%al // is blank? - ja 2b // if not, keep looping - - /* Return the word (well, the static buffer) and length. */ - sub $5f,%edi - mov %edi,%ecx // return length of the word - mov $5f,%edi // return address of the word - ret - - /* Code to skip \ comments to end of the current line. */ -3: - call _KEY - cmpb $'\n',%al // end of line yet? - jne 3b - jmp 1b - - .bss - // A static buffer where WORD returns. Subsequent calls - // overwrite this buffer. Maximum word length is 32 chars. -5: .space 32 - - defcode "EMITWORD",8,,EMITWORD - mov $1,%ebx // 1st param: stdout - pop %ecx // 2nd param: address of string - pop %edx // 3rd param: length of string - - mov $__NR_write,%eax // write syscall - int $0x80 - - NEXT - - defcode ".",1,,DOT - pop %eax // Get the number to print into %eax - call _DOT // Easier to do this recursively ... - NEXT -_DOT: - mov $10,%ecx // Base 10 - cmp %ecx,%eax - jb 1f - pushl %eax - xor %edx,%edx - idivl %ecx - call _DOT - popl %eax -1: - aam $10 - cbw - cwde - addl $'0',%eax - call _EMIT - ret - - // Parse a number from a string on the stack -- almost the opposite of . (DOT) - // Note that there is absolutely no error checking. In particular the length of the - // string must be >= 1 bytes. - defcode "SNUMBER",7,,SNUMBER - pop %edi - pop %ecx - call _SNUMBER - push %eax - NEXT -_SNUMBER: - xor %eax,%eax - xor %ebx,%ebx -1: - imull $10,%eax // %eax *= 10 - movb (%edi),%bl - inc %edi - subb $'0',%bl // ASCII -> digit - add %ebx,%eax - dec %ecx - jnz 1b - ret - - defcode "FIND",4,,FIND - pop %edi // %edi = address - pop %ecx // %ecx = length - call _FIND - push %eax - NEXT - -_FIND: - push %esi // Save %esi so we can use it in string comparison. - - // Now we start searching backwards through the dictionary for this word. - mov var_LATEST,%edx // LATEST points to name header of the latest word in the dictionary -1: - test %edx,%edx // NULL pointer? (end of the linked list) - je 4f - - // Compare the length expected and the length of the word. - // Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery - // this won't pick the word (the length will appear to be wrong). - xor %eax,%eax - movb 4(%edx),%al // %al = flags+length field - andb $(F_HIDDEN|0x1f),%al // %al = name length - cmpb %cl,%al // Length is the same? - jne 2f - - // Compare the strings in detail. - push %ecx // Save the length - push %edi // Save the address (repe cmpsb will move this pointer) - lea 5(%edx),%esi // Dictionary string we are checking against. - repe cmpsb // Compare the strings. - pop %edi - pop %ecx - jne 2f // Not the same. - - // The strings are the same - return the header pointer in %eax - pop %esi - mov %edx,%eax - ret - -2: - mov (%edx),%edx // Move back through the link field to the previous word - jmp 1b // .. and loop. - -4: // Not found. - pop %esi - xor %eax,%eax // Return zero to indicate not found. - ret - - defcode ">CFA",4,,TCFA // DEA -> Codeword address - pop %edi - call _TCFA - push %edi - NEXT -_TCFA: - xor %eax,%eax - add $4,%edi // Skip link pointer. - movb (%edi),%al // Load flags+len into %al. - inc %edi // Skip flags+len byte. - andb $0x1f,%al // Just the length, not the flags. - add %eax,%edi // Skip the name. - orl $3,%edi // The codeword is 4-byte aligned. - inc %edi - ret - - defword "'",1,,TICK - .int WORD // Get the following word. - .int FIND // Look it up in the dictionary. - .int DUP // If not found, skip >CFA (TCFA) instruction. - .int ZSKIP - .int TCFA // Convert to a codeword pointer. - .int EXIT // Return. - - defcode ":",1,,COLON - - // Get the word and create a dictionary entry header for it. - call _WORD // Returns %ecx = length, %edi = pointer to word. - mov %edi,%ebx // %ebx = address of the word - - movl var_HERE,%edi // %edi is the address of the header - movl var_LATEST,%eax // Get link pointer - stosl // and store it in the header. - - mov %cl,%al // Get the length. - orb $F_HIDDEN,%al // Set the HIDDEN flag on this entry. - stosb // Store the length/flags byte. - push %esi - mov %ebx,%esi // %esi = word - rep movsb // Copy the word - pop %esi - orl $3,%edi // Align to next 4 byte boundary. - inc %edi - - movl $DOCOL,%eax // The codeword for user-created words is always DOCOL (the interpreter) - stosl - - // Header built, so now update LATEST and HERE. - // We'll be compiling words and putting them HERE. - movl var_HERE,%eax - movl %eax,var_LATEST - movl %edi,var_HERE - - // And go into compile mode by setting STATE to 1. - xor %eax,%eax - inc %eax - mov %eax,var_STATE - NEXT - - defcode ",",1,,COMMA - pop %eax // Code pointer to store. - call _COMMA - NEXT -_COMMA: - movl var_HERE,%edi // HERE - stosl // Store it. - movl %edi,var_HERE // Update HERE (incremented) - ret - - defcode "HIDDEN",6,,HIDDEN - call _HIDDEN - NEXT -_HIDDEN: - movl var_LATEST,%edi // LATEST word. - addl $4,%edi // Point to name/flags byte. - xorb $F_HIDDEN,(%edi) // Toggle the HIDDEN bit. - ret - - defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE - call _IMMEDIATE - NEXT -_IMMEDIATE: - movl var_LATEST,%edi // LATEST word. - addl $4,%edi // Point to name/flags byte. - xorb $F_IMMED,(%edi) // Toggle the IMMED bit. - ret - - defcode ";",1,F_IMMED,SEMICOLON - movl $EXIT,%eax // EXIT is the final codeword in compiled words. - call _COMMA // Store it. - call _HIDDEN // Toggle the HIDDEN flag (unhides the new word). - xor %eax,%eax // Set STATE to 0 (back to execute mode). - movl %eax,var_STATE - NEXT - -/* This interpreter is pretty simple, but remember that in FORTH you can always override - * it later with a more powerful one! - */ - defword "INTERPRETER",11,,INTERPRETER - .int INTERPRET,RDROP,INTERPRETER - - defcode "INTERPRET",9,,INTERPRET - call _WORD // Returns %ecx = length, %edi = pointer to word. - - // Is it in the dictionary? - call _FIND // Returns %eax = pointer to header or 0 if not found. - test %eax,%eax // Found? - jz 1f - - // In the dictionary. Is it an IMMEDIATE codeword? - mov %eax,%edi // %edi = dictionary entry - movb 4(%edi),%al // Get name+flags. - push %ax // Just save it for now. - call _TCFA // Convert dictionary entry (in %edi) to codeword pointer. - pop %ax - andb $F_IMMED,%al // Is IMMED flag set? - mov %edi,%eax - jnz 3f // If IMMED, jump straight to executing. - - jmp 2f - -1: // Not in the dictionary (not a word) so assume it's a number. - call _SNUMBER // Returns the parsed number in %eax - mov %eax,%ebx - mov $LIT,%eax // The word is LIT - -2: // Are we compiling or executing? - movl var_STATE,%edx - test %edx,%edx - jz 3f // Jump if executing. - - // Compiling - just append the word to the current dictionary definition. - call _COMMA - cmp $LIT,%eax // Was it LIT? - jne 4f - mov %ebx,%eax // Yes, so LIT is followed by a number. - call _COMMA - NEXT - -3: // Executing - run it! - cmp $LIT,%eax // Literal? - je 4f - // Not a literal, execute it now. This never returns, but the codeword will - // eventually call NEXT which will reenter the loop in INTERPRETER. - jmp *(%eax) - -4: // Executing a literal, which means push it on the stack. - push %ebx - NEXT - - // NB: SYSEXIT must be the last entry in the built-in dictionary. - defcode SYSEXIT,7,,SYSEXIT - pop %ebx - mov $__NR_exit,%eax - int $0x80 - -/*---------------------------------------------------------------------- - * Input buffer & initial input. - */ - .data - .align 4096 -buffer: - .ascii "\n\ -: '\\n' 10 ; \n\ -: ')' 41 ; \n\ -: 'space' 32 ; \n\ -: '\"' 34 ; \n\ -: '-' 45 ; \n\ -: '0' 48 ; \n\ - \n\ -\\ CR command \n\ -: CR '\\n' EMIT ; \n\ - \n\ -CR CR '0' EMIT CR CR \n\ -" - -_initbufftop: - .align 4096 -buffend: - -currkey: - .int buffer -bufftop: - .int _initbufftop