From 31a2023bf2670b9a15629f873cbb0ef2ae28bcd3 Mon Sep 17 00:00:00 2001 From: rich Date: Mon, 24 Sep 2007 00:18:19 +0000 Subject: [PATCH] Split into two files. --- Makefile | 5 +- jonesforth.S | 747 ++--------------------------------------------------------- jonesforth.f | 727 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 751 insertions(+), 728 deletions(-) create mode 100644 jonesforth.f diff --git a/Makefile b/Makefile index ea0b970..9b31c69 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ -# $Id: Makefile,v 1.2 2007-09-15 11:21:09 rich Exp $ +# $Id: Makefile,v 1.3 2007-09-24 00:18:19 rich Exp $ all: gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S run: - ./jonesforth + cat jonesforth.f - | ./jonesforth + diff --git a/jonesforth.S b/jonesforth.S index 908acf3..de5bbe3 100644 --- a/jonesforth.S +++ b/jonesforth.S @@ -1,11 +1,11 @@ /* A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*- By Richard W.M. Jones http://annexia.org/forth This is PUBLIC DOMAIN (see public domain release statement below). - $Id: jonesforth.S,v 1.27 2007-09-24 00:01:38 rich Exp $ + $Id: jonesforth.S,v 1.28 2007-09-24 00:18:19 rich Exp $ gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S */ - .set JONES_VERSION,27 + .set JONES_VERSION,28 /* INTRODUCTION ---------------------------------------------------------------------- @@ -115,20 +115,15 @@ assemble and run the code (save this file as 'jonesforth.S') are: gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S - ./jonesforth - - You will see lots of 'Warning: unterminated string; newline inserted' messages from the - assembler. That's just because the GNU assembler doesn't have a good syntax for multi-line - strings (or rather it used to, but the developers removed it!) so I've abused the syntax - slightly to make things readable. Ignore these warnings. + cat jonesforth.f - | ./jonesforth If you want to run your own FORTH programs you can do: - ./jonesforth < myprog.f + cat jonesforth.f myprog.f | ./jonesforth If you want to load your own FORTH code and then continue reading user commands, you can do: - cat myfunctions.f - | ./jonesforth + cat jonesforth.f myfunctions.f - | ./jonesforth ASSEMBLER ---------------------------------------------------------------------- @@ -590,6 +585,18 @@ return_stack: // Initial top of return stack. user_defs_start: .space USER_DEFS_SIZE +/* This is used as a temporary input buffer when reading from files or the terminal. */ + .set BUFFER_SIZE,4096 + .align 4096 +buffer: +_initbufftop: + .space BUFFER_SIZE +buffend: +currkey: + .int buffer +bufftop: + .int _initbufftop + /* BUILT-IN WORDS ---------------------------------------------------------------------- @@ -2072,723 +2079,11 @@ interpret_is_lit: words can be written as FORTH itself, including words like IF, THEN, .", etc which in most languages would be considered rather fundamental. - As a kind of trick, I prefill the input buffer with the initial FORTH code. Once this code - has run (when we get to the "OK" prompt), this input buffer is reused for reading any further - user input. - - Some notes about the code: - - \ (backslash) is the FORTH way to start a comment which goes up to the next newline. However - because this is a C-style string, I have to escape the backslash, which is why they appear as - \\ comment. - - Similarly, any backslashes in the code are doubled, and " becomes \" (eg. the definition of ." - is written as : .\" ... ;) - - I use indenting to show structure. The amount of whitespace has no meaning to FORTH however - except that you must use at least one whitespace character between words, and words themselves - cannot contain whitespace. - - FORTH is case-sensitive. Use capslock! + I used to append this here in the assembly file, but I got sick of fighting against gas's + stupid (lack of) multiline string syntax. So now that is in a separate file called jonesforth.f - Enjoy! + If you don't already have that file, download it from http://annexia.org/forth in order + to continue the tutorial. */ - .data - .align 4096 -buffer: - // Multi-line constant gives 'Warning: unterminated string; newline inserted' messages which you can ignore. - .ascii "\ -\\ Define some character constants -: '\\n' 10 ; -: 'SPACE' 32 ; - -\\ CR prints a carriage return -: CR '\\n' EMIT ; - -\\ SPACE prints a space -: SPACE 'SPACE' EMIT ; - -\\ DUP, DROP are defined in assembly for speed, but this is how you might define them -\\ in FORTH. Notice use of the scratch variables _X and _Y. -\\ : DUP _X ! _X @ _X @ ; -\\ : DROP _X ! ; - -\\ The built-in . (DOT) function doesn't print a space after the number (unlike the real FORTH word). -\\ However this is very easily fixed by redefining . (DOT). Any built-in word can be redefined. -: . - . \\ this refers back to the previous definition (but see also RECURSE below) - SPACE -; - -\\ The 2... versions of the standard operators work on pairs of stack entries. They're not used -\\ very commonly so not really worth writing in assembler. Here is how they are defined in FORTH. -: 2DUP OVER OVER ; -: 2DROP DROP DROP ; - -\\ More standard FORTH words. -: 2* 2 * ; -: 2/ 2 / ; - -\\ Standard words for manipulating BASE. -: DECIMAL 10 BASE ! ; -: HEX 16 BASE ! ; - -\\ Standard words for booleans. -: TRUE 1 ; -: FALSE 0 ; -: NOT 0= ; - -\\ LITERAL takes whatever is on the stack and compiles LIT -: LITERAL IMMEDIATE - ' LIT , \\ compile LIT - , \\ compile the literal itself (from the stack) - ; - -\\ Now we can use [ and ] to insert literals which are calculated at compile time. -\\ Within definitions, use [ ... ] LITERAL anywhere that '...' is a constant expression which you -\\ would rather only compute once (at compile time, rather than calculating it each time your word runs). -: ':' - [ \\ go into immediate mode temporarily - CHAR : \\ push the number 58 (ASCII code of colon) on the stack - ] \\ go back to compile mode - LITERAL \\ compile LIT 58 as the definition of ':' word -; - -\\ A few more character constants defined the same way as above. -: '(' [ CHAR ( ] LITERAL ; -: ')' [ CHAR ) ] LITERAL ; -: '\"' [ CHAR \" ] LITERAL ; - -\\ So far we have defined only very simple definitions. Before we can go further, we really need to -\\ make some control structures, like IF ... THEN and loops. Luckily we can define arbitrary control -\\ structures directly in FORTH. -\\ -\\ Please note that the control structures as I have defined them here will only work inside compiled -\\ words. If you try to type in expressions using IF, etc. in immediate mode, then they won't work. -\\ Making these work in immediate mode is left as an exercise for the reader. - -\\ condition IF true-part THEN rest -\\ -- compiles to: --> condition 0BRANCH OFFSET true-part rest -\\ where OFFSET is the offset of 'rest' -\\ condition IF true-part ELSE false-part THEN -\\ -- compiles to: --> condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest -\\ where OFFSET if the offset of false-part and OFFSET2 is the offset of rest - -\\ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places -\\ the address of the 0BRANCH on the stack. Later when we see THEN, we pop that address -\\ off the stack, calculate the offset, and back-fill the offset. -: IF IMMEDIATE - ' 0BRANCH , \\ compile 0BRANCH - HERE @ \\ save location of the offset on the stack - 0 , \\ compile a dummy offset -; - -: THEN IMMEDIATE - DUP - HERE @ SWAP - \\ calculate the offset from the address saved on the stack - SWAP ! \\ store the offset in the back-filled location -; - -: ELSE IMMEDIATE - ' BRANCH , \\ definite branch to just over the false-part - HERE @ \\ save location of the offset on the stack - 0 , \\ compile a dummy offset - SWAP \\ now back-fill the original (IF) offset - DUP \\ same as for THEN word above - HERE @ SWAP - - SWAP ! -; - -\\ BEGIN loop-part condition UNTIL -\\ -- compiles to: --> loop-part condition 0BRANCH OFFSET -\\ where OFFSET points back to the loop-part -\\ This is like do { loop-part } while (condition) in the C language -: BEGIN IMMEDIATE - HERE @ \\ save location on the stack -; - -: UNTIL IMMEDIATE - ' 0BRANCH , \\ compile 0BRANCH - HERE @ - \\ calculate the offset from the address saved on the stack - , \\ compile the offset here -; - -\\ BEGIN loop-part AGAIN -\\ -- compiles to: --> loop-part BRANCH OFFSET -\\ where OFFSET points back to the loop-part -\\ In other words, an infinite loop which can only be returned from with EXIT -: AGAIN IMMEDIATE - ' BRANCH , \\ compile BRANCH - HERE @ - \\ calculate the offset back - , \\ compile the offset here -; - -\\ BEGIN condition WHILE loop-part REPEAT -\\ -- compiles to: --> condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET -\\ where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code -\\ So this is like a while (condition) { loop-part } loop in the C language -: WHILE IMMEDIATE - ' 0BRANCH , \\ compile 0BRANCH - HERE @ \\ save location of the offset2 on the stack - 0 , \\ compile a dummy offset2 -; - -: REPEAT IMMEDIATE - ' BRANCH , \\ compile BRANCH - SWAP \\ get the original offset (from BEGIN) - HERE @ - , \\ and compile it after BRANCH - DUP - HERE @ SWAP - \\ calculate the offset2 - SWAP ! \\ and back-fill it in the original location -; - -\\ FORTH allows ( ... ) as comments within function definitions. This works by having an IMMEDIATE -\\ word called ( which just drops input characters until it hits the corresponding ). -: ( IMMEDIATE - 1 \\ allowed nested parens by keeping track of depth - BEGIN - KEY \\ read next character - DUP '(' = IF \\ open paren? - DROP \\ drop the open paren - 1+ \\ depth increases - ELSE - ')' = IF \\ close paren? - 1- \\ depth decreases - THEN - THEN - DUP 0= UNTIL \\ continue until we reach matching close paren, depth 0 - DROP \\ drop the depth counter -; - -( - From now on we can use ( ... ) for comments. - - In FORTH style we can also use ( ... -- ... ) to show the effects that a word has on the - parameter stack. For example: - - ( n -- ) means that the word consumes an integer (n) from the parameter stack. - ( b a -- c ) means that the word uses two integers (a and b, where a is at the top of stack) - and returns a single integer (c). - ( -- ) means the word has no effect on the stack -) - -( With the looping constructs, we can now write SPACES, which writes n spaces to stdout. ) -: SPACES ( n -- ) - BEGIN - DUP 0> ( while n > 0 ) - WHILE - SPACE ( print a space ) - 1- ( until we count down to 0 ) - REPEAT - DROP -; - -( c a b WITHIN returns true if a <= c and c < b ) -: WITHIN - ROT ( b c a ) - OVER ( b c a c ) - <= IF - > IF ( b c -- ) - TRUE - ELSE - FALSE - THEN - ELSE - 2DROP ( b c -- ) - FALSE - THEN -; - -( .S prints the contents of the stack. Very useful for debugging. ) -: .S ( -- ) - DSP@ ( get current stack pointer ) - BEGIN - DUP S0 @ < - WHILE - DUP @ . ( print the stack element ) - 4+ ( move up ) - REPEAT - DROP -; - -( DEPTH returns the depth of the stack. ) -: DEPTH ( -- n ) - S0 @ DSP@ - - 4- ( adjust because S0 was on the stack when we pushed DSP ) -; - -( - [NB. The following may be a bit confusing because of the need to use backslash before - each double quote character. The backslashes are there to keep the assembler happy. - They are NOT part of the final output. So here we are defining a function called - 'S double-quote' (not 'S backslash double-quote').] - - S\" string\" is used in FORTH to define strings. It leaves the address of the string and - its length on the stac,k with the address at the top. The space following S\" is the normal - space between FORTH words and is not a part of the string. - - In compile mode we append - LITSTRING - to the current word. The primitive LITSTRING does the right thing when the current - word is executed. - - In immediate mode there isn't a particularly good place to put the string, but in this - case we put the string at HERE (but we _don't_ change HERE). This is meant as a temporary - location, likely to be overwritten soon after. -) -: S\" IMMEDIATE ( -- len addr ) - STATE @ IF ( compiling? ) - ' LITSTRING , ( compile LITSTRING ) - HERE @ ( save the address of the length word on the stack ) - 0 , ( dummy length - we don't know what it is yet ) - BEGIN - KEY ( get next character of the string ) - DUP '\"' <> - WHILE - HERE @ !b ( store the character in the compiled image ) - 1 HERE +! ( increment HERE pointer by 1 byte ) - REPEAT - DROP ( drop the double quote character at the end ) - DUP ( get the saved address of the length word ) - HERE @ SWAP - ( calculate the length ) - 4- ( subtract 4 (because we measured from the start of the length word) ) - SWAP ! ( and back-fill the length location ) - HERE @ ( round up to next multiple of 4 bytes for the remaining code ) - 3 + - 3 INVERT AND - HERE ! - ELSE ( immediate mode ) - HERE @ ( get the start address of the temporary space ) - BEGIN - KEY - DUP '\"' <> - WHILE - OVER !b ( save next character ) - 1+ ( increment address ) - REPEAT - HERE @ - ( calculate the length ) - HERE @ ( push the start address ) - THEN -; - -( - .\" is the print string operator in FORTH. Example: .\" Something to print\" - The space after the operator is the ordinary space required between words. - - This is tricky to define because it has to do different things depending on whether - we are compiling or in immediate mode. (Thus the word is marked IMMEDIATE so it can - detect this and do different things). - - In immediate mode we just keep reading characters and printing them until we get to - the next double quote. - - In compile mode we have the problem of where we're going to store the string (remember - that the input buffer where the string comes from may be overwritten by the time we - come round to running the function). We store the string in the compiled function - like this: - ..., LITSTRING, string length, string rounded up to 4 bytes, EMITSTRING, ... -) -: .\" IMMEDIATE ( -- ) - STATE @ IF ( compiling? ) - ' LITSTRING , ( compile LITSTRING ) - HERE @ ( save the address of the length word on the stack ) - 0 , ( dummy length - we don't know what it is yet ) - BEGIN - KEY ( get next character of the string ) - DUP '\"' <> - WHILE - HERE @ !b ( store the character in the compiled image ) - 1 HERE +! ( increment HERE pointer by 1 byte ) - REPEAT - DROP ( drop the double quote character at the end ) - DUP ( get the saved address of the length word ) - HERE @ SWAP - ( calculate the length ) - 4- ( subtract 4 (because we measured from the start of the length word) ) - SWAP ! ( and back-fill the length location ) - HERE @ ( round up to next multiple of 4 bytes for the remaining code ) - 3 + - 3 INVERT AND - HERE ! - ' EMITSTRING , ( compile the final EMITSTRING ) - ELSE - ( In immediate mode, just read characters and print them until we get - to the ending double quote. Much simpler than the above code! ) - BEGIN - KEY - DUP '\"' = IF - DROP ( drop the double quote character ) - EXIT ( return from this function ) - THEN - EMIT - AGAIN - THEN -; - -( - In FORTH, global constants and variables are defined like this: - - 10 CONSTANT TEN when TEN is executed, it leaves the integer 10 on the stack - VARIABLE VAR when VAR is executed, it leaves the address of VAR on the stack - - Constants can be read by not written, eg: - - TEN . CR prints 10 - - You can read a variable (in this example called VAR) by doing: - - VAR @ leaves the value of VAR on the stack - VAR @ . CR prints the value of VAR - - and update the variable by doing: - - 20 VAR ! sets VAR to 20 - - Note that variables are uninitialised (but see VALUE later on which provides initialised - variables with a slightly simpler syntax). - - How can we define the words CONSTANT and VARIABLE? - - The trick is to define a new word for the variable itself (eg. if the variable was called - 'VAR' then we would define a new word called VAR). This is easy to do because we exposed - dictionary entry creation through the CREATE word (part of the definition of : above). - A call to CREATE TEN leaves the dictionary entry: - - +--- HERE - | - V - +---------+---+---+---+---+ - | LINK | 3 | T | E | N | - +---------+---+---+---+---+ - len - - For CONSTANT we can continue by appending DOCOL (the codeword), then LIT followed by - the constant itself and then EXIT, forming a little word definition that returns the - constant: - - +---------+---+---+---+---+------------+------------+------------+------------+ - | LINK | 3 | T | E | N | DOCOL | LIT | 10 | EXIT | - +---------+---+---+---+---+------------+------------+------------+------------+ - len codeword - - Notice that this word definition is exactly the same as you would have got if you had - written : TEN 10 ; -) -: CONSTANT - CREATE ( make the dictionary entry (the name follows CONSTANT) ) - DOCOL , ( append DOCOL (the codeword field of this word) ) - ' LIT , ( append the codeword LIT ) - , ( append the value on the top of the stack ) - ' EXIT , ( append the codeword EXIT ) -; - -( - VARIABLE is a little bit harder because we need somewhere to put the variable. There is - nothing particularly special about the 'user definitions area' (the area of memory pointed - to by HERE where we have previously just stored new word definitions). We can slice off - bits of this memory area to store anything we want, so one possible definition of - VARIABLE might create this: - - +--------------------------------------------------------------+ - | | - V | - +---------+---------+---+---+---+---+------------+------------+---|--------+------------+ - | | LINK | 3 | V | A | R | DOCOL | LIT | | EXIT | - +---------+---------+---+---+---+---+------------+------------+------------+------------+ - len codeword - - where is the place to store the variable, and points back to it. - - To make this more general let's define a couple of words which we can use to allocate - arbitrary memory from the user definitions area. - - First ALLOT, where n ALLOT allocates n bytes of memory. (Note when calling this that - it's a very good idea to make sure that n is a multiple of 4, or at least that next time - a word is compiled that n has been left as a multiple of 4). -) -: ALLOT ( n -- addr ) - HERE @ SWAP ( here n -- ) - HERE +! ( adds n to HERE, after this the old value of HERE is still on the stack ) -; - -( - Second, CELLS. In FORTH the phrase 'n CELLS ALLOT' means allocate n integers of whatever size - is the natural size for integers on this machine architecture. On this 32 bit machine therefore - CELLS just multiplies the top of stack by 4. -) -: CELLS ( n -- n ) 4 * ; - -( - So now we can define VARIABLE easily in much the same way as CONSTANT above. Refer to the - diagram above to see what the word that this creates will look like. -) -: VARIABLE - 1 CELLS ALLOT ( allocate 1 cell of memory, push the pointer to this memory ) - CREATE ( make the dictionary entry (the name follows VARIABLE) ) - DOCOL , ( append DOCOL (the codeword field of this word) ) - ' LIT , ( append the codeword LIT ) - , ( append the pointer to the new memory ) - ' EXIT , ( append the codeword EXIT ) -; - -( - VALUEs are like VARIABLEs but with a simpler syntax. You would generally use them when you - want a variable which is read often, and written infrequently. - - 20 VALUE VAL creates VAL with initial value 20 - VAL pushes the value directly on the stack - 30 TO VAL updates VAL, setting it to 30 - - Notice that 'VAL' on its own doesn't return the address of the value, but the value itself, - making values simpler and more obvious to use than variables (no indirection through '@'). - The price is a more complicated implementation, although despite the complexity there is no - particular performance penalty at runtime. - - A naive implementation of 'TO' would be quite slow, involving a dictionary search each time. - But because this is FORTH we have complete control of the compiler so we can compile TO more - efficiently, turning: - TO VAL - into: - LIT ! - and calculating (the address of the value) at compile time. - - Now this is the clever bit. We'll compile our value like this: - - +---------+---+---+---+---+------------+------------+------------+------------+ - | LINK | 3 | V | A | L | DOCOL | LIT | | EXIT | - +---------+---+---+---+---+------------+------------+------------+------------+ - len codeword - - where is the actual value itself. Note that when VAL executes, it will push the - value on the stack, which is what we want. - - But what will TO use for the address ? Why of course a pointer to that : - - code compiled - - - - --+------------+------------+------------+-- - - - - - by TO VAL | LIT | | ! | - - - - - --+------------+-----|------+------------+-- - - - - - | - V - +---------+---+---+---+---+------------+------------+------------+------------+ - | LINK | 3 | V | A | L | DOCOL | LIT | | EXIT | - +---------+---+---+---+---+------------+------------+------------+------------+ - len codeword - - In other words, this is a kind of self-modifying code. - - (Note to the people who want to modify this FORTH to add inlining: values defined this - way cannot be inlined). -) -: VALUE ( n -- ) - CREATE ( make the dictionary entry (the name follows VALUE) ) - DOCOL , ( append DOCOL ) - ' LIT , ( append the codeword LIT ) - , ( append the initial value ) - ' EXIT , ( append the codeword EXIT ) -; - -: TO IMMEDIATE ( n -- ) - WORD ( get the name of the value ) - FIND ( look it up in the dictionary ) - >DFA ( get a pointer to the first data field (the 'LIT') ) - 4+ ( increment to point at the value ) - STATE @ IF ( compiling? ) - ' LIT , ( compile LIT ) - , ( compile the address of the value ) - ' ! , ( compile ! ) - ELSE ( immediate mode ) - ! ( update it straightaway ) - THEN -; - -( x +TO VAL adds x to VAL ) -: +TO IMMEDIATE - WORD ( get the name of the value ) - FIND ( look it up in the dictionary ) - >DFA ( get a pointer to the first data field (the 'LIT') ) - 4+ ( increment to point at the value ) - STATE @ IF ( compiling? ) - ' LIT , ( compile LIT ) - , ( compile the address of the value ) - ' +! , ( compile +! ) - ELSE ( immediate mode ) - +! ( update it straightaway ) - THEN -; - -( - ID. takes an address of a dictionary entry and prints the word's name. - - For example: LATEST @ ID. would print the name of the last word that was defined. -) -: ID. - 4+ ( skip over the link pointer ) - DUP @b ( get the flags/length byte ) - F_LENMASK AND ( mask out the flags - just want the length ) - - BEGIN - DUP 0> ( length > 0? ) - WHILE - SWAP 1+ ( addr len -- len addr+1 ) - DUP @b ( len addr -- len addr char | get the next character) - EMIT ( len addr char -- len addr | and print it) - SWAP 1- ( len addr -- addr len-1 | subtract one from length ) - REPEAT - 2DROP ( len addr -- ) -; - -( - 'WORD word FIND ?HIDDEN' returns true if 'word' is flagged as hidden. - - 'WORD word FIND ?IMMEDIATE' returns true if 'word' is flagged as immediate. -) -: ?HIDDEN - 4+ ( skip over the link pointer ) - @b ( get the flags/length byte ) - F_HIDDEN AND ( mask the F_HIDDEN flag and return it (as a truth value) ) -; -: ?IMMEDIATE - 4+ ( skip over the link pointer ) - @b ( get the flags/length byte ) - F_IMMED AND ( mask the F_IMMED flag and return it (as a truth value) ) -; - -( - WORDS prints all the words defined in the dictionary, starting with the word defined most recently. - However it doesn't print hidden words. - - The implementation simply iterates backwards from LATEST using the link pointers. -) -: WORDS - LATEST @ ( start at LATEST dictionary entry ) - BEGIN - DUP 0<> ( while link pointer is not null ) - WHILE - DUP ?HIDDEN NOT IF - DUP ID. ( print the word ) - THEN - SPACE - @ ( dereference the link pointer - go to previous word ) - REPEAT - DROP - CR -; - -( - So far we have only allocated words and memory. FORTH provides a rather primitive method - to deallocate. - - 'FORGET word' deletes the definition of 'word' from the dictionary and everything defined - after it, including any variables and other memory allocated after. - - The implementation is very simple - we look up the word (which returns the dictionary entry - address). Then we set HERE to point to that address, so in effect all future allocations - and definitions will overwrite memory starting at the word. We also need to set LATEST to - point to the previous word. - - Note that you cannot FORGET built-in words (well, you can try but it will probably cause - a segfault). - - XXX: Because we wrote VARIABLE to store the variable in memory allocated before the word, - in the current implementation VARIABLE FOO FORGET FOO will leak 1 cell of memory. -) -: FORGET - WORD FIND ( find the word, gets the dictionary entry address ) - DUP @ LATEST ! ( set LATEST to point to the previous word ) - HERE ! ( and store HERE with the dictionary address ) -; - -( - While compiling, '[COMPILE] word' compiles 'word' if it would otherwise be IMMEDIATE. -) -: [COMPILE] IMMEDIATE - WORD ( get the next word ) - FIND ( find it in the dictionary ) - >CFA ( get its codeword ) - , ( and compile that ) -; - -( - RECURSE makes a recursive call to the current word that is being compiled. - - Normally while a word is being compiled, it is marked HIDDEN so that references to the - same word within are calls to the previous definition of the word. However we still have - access to the word which we are currently compiling through the LATEST pointer so we - can use that to compile a recursive call. -) -: RECURSE IMMEDIATE - LATEST @ >CFA ( LATEST points to the word being compiled at the moment ) - , ( compile it ) -; - -( - DUMP is used to dump out the contents of memory, in the 'traditional' hexdump format. -) -: DUMP ( addr len -- ) - BASE @ ROT ( save the current BASE at the bottom of the stack ) - HEX ( and switch the hexadecimal mode ) - - BEGIN - DUP 0> ( while len > 0 ) - WHILE - OVER . ( print the address ) - SPACE - - ( print up to 16 words on this line ) - 2DUP ( addr len addr len ) - 1- 15 AND 1+ ( addr len addr linelen ) - BEGIN - DUP 0> ( while linelen > 0 ) - WHILE - SWAP ( addr len linelen addr ) - DUP @b ( addr len linelen addr byte ) - . SPACE ( print the byte ) - 1+ SWAP 1- ( addr len linelen addr -- addr len addr+1 linelen-1 ) - REPEAT - 2DROP ( addr len ) - - ( print the ASCII equivalents ) - 2DUP 1- 15 AND 1+ ( addr len addr linelen ) - BEGIN - DUP 0> ( while linelen > 0) - WHILE - SWAP ( addr len linelen addr ) - DUP @b ( addr len linelen addr byte ) - DUP 32 128 WITHIN IF ( 32 <= c < 128? ) - EMIT - ELSE - DROP [ CHAR ? ] LITERAL EMIT - THEN - 1+ SWAP 1- ( addr len linelen addr -- addr len addr+1 linelen-1 ) - REPEAT - 2DROP ( addr len ) - CR - - DUP 1- 15 AND 1+ ( addr len linelen ) - DUP ( addr len linelen linelen ) - ROT ( addr linelen len linelen ) - - ( addr linelen len-linelen ) - ROT ( len-linelen addr linelen ) - + ( len-linelen addr+linelen ) - SWAP ( addr-linelen len-linelen ) - REPEAT - - 2DROP ( restore stack ) - BASE ! ( restore saved BASE ) -; - -( Finally print the welcome prompt. ) -.\" JONESFORTH VERSION \" VERSION . CR -.\" OK \" -" - -_initbufftop: - .align 4096 -buffend: - -currkey: - .int buffer -bufftop: - .int _initbufftop - /* END OF jonesforth.S */ diff --git a/jonesforth.f b/jonesforth.f new file mode 100644 index 0000000..b8c5a61 --- /dev/null +++ b/jonesforth.f @@ -0,0 +1,727 @@ +\ -*- forth -*- +\ A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*- +\ By Richard W.M. Jones http://annexia.org/forth +\ This is PUBLIC DOMAIN (see public domain release statement below). +\ $Id: jonesforth.f,v 1.1 2007-09-24 00:18:19 rich Exp $ +\ +\ The first part of this tutorial is in jonesforth.S. Get if from http://annexia.org/forth +\ +\ PUBLIC DOMAIN ---------------------------------------------------------------------- +\ +\ I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide. +\ +\ In case this is not legally possible, I grant any entity the right to use this work for any purpose, +\ without any conditions, unless such conditions are required by law. +\ +\ SETTING UP ---------------------------------------------------------------------- +\ +\ Let's get a few housekeeping things out of the way. Firstly because I need to draw lots of +\ ASCII-art diagrams to explain concepts, the best way to look at this is using a window which +\ uses a fixed width font and is at least this wide: +\ +\<------------------------------------------------------------------------------------------------------------------------> +\ +\ Secondly make sure TABS are set to 8 characters. The following should be a vertical +\ line. If not, sort out your tabs. +\ +\ | +\ | +\ | +\ +\ Thirdly I assume that your screen is at least 50 characters high. +\ +\ START OF FORTH CODE ---------------------------------------------------------------------- +\ +\ We've now reached the stage where the FORTH system is running and self-hosting. All further +\ words can be written as FORTH itself, including words like IF, THEN, .", etc which in most +\ languages would be considered rather fundamental. +\ +\ Some notes about the code: +\ +\ I use indenting to show structure. The amount of whitespace has no meaning to FORTH however +\ except that you must use at least one whitespace character between words, and words themselves +\ cannot contain whitespace. +\ +\ FORTH is case-sensitive. Use capslock! + +\ Define some character constants +: '\n' 10 ; +: 'SPACE' 32 ; + +\ CR prints a carriage return +: CR '\n' EMIT ; + +\ SPACE prints a space +: SPACE 'SPACE' EMIT ; + +\ DUP, DROP are defined in assembly for speed, but this is how you might define them +\ in FORTH. Notice use of the scratch variables _X and _Y. +\ : DUP _X ! _X @ _X @ ; +\ : DROP _X ! ; + +\ The built-in . (DOT) function doesn't print a space after the number (unlike the real FORTH word). +\ However this is very easily fixed by redefining . (DOT). Any built-in word can be redefined. +: . + . \ this refers back to the previous definition (but see also RECURSE below) + SPACE +; + +\ The 2... versions of the standard operators work on pairs of stack entries. They're not used +\ very commonly so not really worth writing in assembler. Here is how they are defined in FORTH. +: 2DUP OVER OVER ; +: 2DROP DROP DROP ; + +\ More standard FORTH words. +: 2* 2 * ; +: 2/ 2 / ; + +\ Standard words for manipulating BASE. +: DECIMAL 10 BASE ! ; +: HEX 16 BASE ! ; + +\ Standard words for booleans. +: TRUE 1 ; +: FALSE 0 ; +: NOT 0= ; + +\ LITERAL takes whatever is on the stack and compiles LIT +: LITERAL IMMEDIATE + ' LIT , \ compile LIT + , \ compile the literal itself (from the stack) + ; + +\ Now we can use [ and ] to insert literals which are calculated at compile time. +\ Within definitions, use [ ... ] LITERAL anywhere that '...' is a constant expression which you +\ would rather only compute once (at compile time, rather than calculating it each time your word runs). +: ':' + [ \ go into immediate mode temporarily + CHAR : \ push the number 58 (ASCII code of colon) on the stack + ] \ go back to compile mode + LITERAL \ compile LIT 58 as the definition of ':' word +; + +\ A few more character constants defined the same way as above. +: '(' [ CHAR ( ] LITERAL ; +: ')' [ CHAR ) ] LITERAL ; +: '"' [ CHAR " ] LITERAL ; + +\ So far we have defined only very simple definitions. Before we can go further, we really need to +\ make some control structures, like IF ... THEN and loops. Luckily we can define arbitrary control +\ structures directly in FORTH. +\ +\ Please note that the control structures as I have defined them here will only work inside compiled +\ words. If you try to type in expressions using IF, etc. in immediate mode, then they won't work. +\ Making these work in immediate mode is left as an exercise for the reader. + +\ condition IF true-part THEN rest +\ -- compiles to: --> condition 0BRANCH OFFSET true-part rest +\ where OFFSET is the offset of 'rest' +\ condition IF true-part ELSE false-part THEN +\ -- compiles to: --> condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest +\ where OFFSET if the offset of false-part and OFFSET2 is the offset of rest + +\ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places +\ the address of the 0BRANCH on the stack. Later when we see THEN, we pop that address +\ off the stack, calculate the offset, and back-fill the offset. +: IF IMMEDIATE + ' 0BRANCH , \ compile 0BRANCH + HERE @ \ save location of the offset on the stack + 0 , \ compile a dummy offset +; + +: THEN IMMEDIATE + DUP + HERE @ SWAP - \ calculate the offset from the address saved on the stack + SWAP ! \ store the offset in the back-filled location +; + +: ELSE IMMEDIATE + ' BRANCH , \ definite branch to just over the false-part + HERE @ \ save location of the offset on the stack + 0 , \ compile a dummy offset + SWAP \ now back-fill the original (IF) offset + DUP \ same as for THEN word above + HERE @ SWAP - + SWAP ! +; + +\ BEGIN loop-part condition UNTIL +\ -- compiles to: --> loop-part condition 0BRANCH OFFSET +\ where OFFSET points back to the loop-part +\ This is like do { loop-part } while (condition) in the C language +: BEGIN IMMEDIATE + HERE @ \ save location on the stack +; + +: UNTIL IMMEDIATE + ' 0BRANCH , \ compile 0BRANCH + HERE @ - \ calculate the offset from the address saved on the stack + , \ compile the offset here +; + +\ BEGIN loop-part AGAIN +\ -- compiles to: --> loop-part BRANCH OFFSET +\ where OFFSET points back to the loop-part +\ In other words, an infinite loop which can only be returned from with EXIT +: AGAIN IMMEDIATE + ' BRANCH , \ compile BRANCH + HERE @ - \ calculate the offset back + , \ compile the offset here +; + +\ BEGIN condition WHILE loop-part REPEAT +\ -- compiles to: --> condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET +\ where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code +\ So this is like a while (condition) { loop-part } loop in the C language +: WHILE IMMEDIATE + ' 0BRANCH , \ compile 0BRANCH + HERE @ \ save location of the offset2 on the stack + 0 , \ compile a dummy offset2 +; + +: REPEAT IMMEDIATE + ' BRANCH , \ compile BRANCH + SWAP \ get the original offset (from BEGIN) + HERE @ - , \ and compile it after BRANCH + DUP + HERE @ SWAP - \ calculate the offset2 + SWAP ! \ and back-fill it in the original location +; + +\ FORTH allows ( ... ) as comments within function definitions. This works by having an IMMEDIATE +\ word called ( which just drops input characters until it hits the corresponding ). +: ( IMMEDIATE + 1 \ allowed nested parens by keeping track of depth + BEGIN + KEY \ read next character + DUP '(' = IF \ open paren? + DROP \ drop the open paren + 1+ \ depth increases + ELSE + ')' = IF \ close paren? + 1- \ depth decreases + THEN + THEN + DUP 0= UNTIL \ continue until we reach matching close paren, depth 0 + DROP \ drop the depth counter +; + +( + From now on we can use ( ... ) for comments. + + In FORTH style we can also use ( ... -- ... ) to show the effects that a word has on the + parameter stack. For example: + + ( n -- ) means that the word consumes an integer (n) from the parameter stack. + ( b a -- c ) means that the word uses two integers (a and b, where a is at the top of stack) + and returns a single integer (c). + ( -- ) means the word has no effect on the stack +) + +( With the looping constructs, we can now write SPACES, which writes n spaces to stdout. ) +: SPACES ( n -- ) + BEGIN + DUP 0> ( while n > 0 ) + WHILE + SPACE ( print a space ) + 1- ( until we count down to 0 ) + REPEAT + DROP +; + +( c a b WITHIN returns true if a <= c and c < b ) +: WITHIN + ROT ( b c a ) + OVER ( b c a c ) + <= IF + > IF ( b c -- ) + TRUE + ELSE + FALSE + THEN + ELSE + 2DROP ( b c -- ) + FALSE + THEN +; + +( .S prints the contents of the stack. Very useful for debugging. ) +: .S ( -- ) + DSP@ ( get current stack pointer ) + BEGIN + DUP S0 @ < + WHILE + DUP @ . ( print the stack element ) + 4+ ( move up ) + REPEAT + DROP +; + +( DEPTH returns the depth of the stack. ) +: DEPTH ( -- n ) + S0 @ DSP@ - + 4- ( adjust because S0 was on the stack when we pushed DSP ) +; + +( + [NB. The following may be a bit confusing because of the need to use backslash before + each double quote character. The backslashes are there to keep the assembler happy. + They are NOT part of the final output. So here we are defining a function called + 'S double-quote' (not 'S backslash double-quote').] + + S" string" is used in FORTH to define strings. It leaves the address of the string and + its length on the stac,k with the address at the top. The space following S" is the normal + space between FORTH words and is not a part of the string. + + In compile mode we append + LITSTRING + to the current word. The primitive LITSTRING does the right thing when the current + word is executed. + + In immediate mode there isn't a particularly good place to put the string, but in this + case we put the string at HERE (but we _don't_ change HERE). This is meant as a temporary + location, likely to be overwritten soon after. +) +: S" IMMEDIATE ( -- len addr ) + STATE @ IF ( compiling? ) + ' LITSTRING , ( compile LITSTRING ) + HERE @ ( save the address of the length word on the stack ) + 0 , ( dummy length - we don't know what it is yet ) + BEGIN + KEY ( get next character of the string ) + DUP '"' <> + WHILE + HERE @ !b ( store the character in the compiled image ) + 1 HERE +! ( increment HERE pointer by 1 byte ) + REPEAT + DROP ( drop the double quote character at the end ) + DUP ( get the saved address of the length word ) + HERE @ SWAP - ( calculate the length ) + 4- ( subtract 4 (because we measured from the start of the length word) ) + SWAP ! ( and back-fill the length location ) + HERE @ ( round up to next multiple of 4 bytes for the remaining code ) + 3 + + 3 INVERT AND + HERE ! + ELSE ( immediate mode ) + HERE @ ( get the start address of the temporary space ) + BEGIN + KEY + DUP '"' <> + WHILE + OVER !b ( save next character ) + 1+ ( increment address ) + REPEAT + HERE @ - ( calculate the length ) + HERE @ ( push the start address ) + THEN +; + +( + ." is the print string operator in FORTH. Example: ." Something to print" + The space after the operator is the ordinary space required between words. + + This is tricky to define because it has to do different things depending on whether + we are compiling or in immediate mode. (Thus the word is marked IMMEDIATE so it can + detect this and do different things). + + In immediate mode we just keep reading characters and printing them until we get to + the next double quote. + + In compile mode we have the problem of where we're going to store the string (remember + that the input buffer where the string comes from may be overwritten by the time we + come round to running the function). We store the string in the compiled function + like this: + ..., LITSTRING, string length, string rounded up to 4 bytes, EMITSTRING, ... +) +: ." IMMEDIATE ( -- ) + STATE @ IF ( compiling? ) + ' LITSTRING , ( compile LITSTRING ) + HERE @ ( save the address of the length word on the stack ) + 0 , ( dummy length - we don't know what it is yet ) + BEGIN + KEY ( get next character of the string ) + DUP '"' <> + WHILE + HERE @ !b ( store the character in the compiled image ) + 1 HERE +! ( increment HERE pointer by 1 byte ) + REPEAT + DROP ( drop the double quote character at the end ) + DUP ( get the saved address of the length word ) + HERE @ SWAP - ( calculate the length ) + 4- ( subtract 4 (because we measured from the start of the length word) ) + SWAP ! ( and back-fill the length location ) + HERE @ ( round up to next multiple of 4 bytes for the remaining code ) + 3 + + 3 INVERT AND + HERE ! + ' EMITSTRING , ( compile the final EMITSTRING ) + ELSE + ( In immediate mode, just read characters and print them until we get + to the ending double quote. Much simpler than the above code! ) + BEGIN + KEY + DUP '"' = IF + DROP ( drop the double quote character ) + EXIT ( return from this function ) + THEN + EMIT + AGAIN + THEN +; + +( + In FORTH, global constants and variables are defined like this: + + 10 CONSTANT TEN when TEN is executed, it leaves the integer 10 on the stack + VARIABLE VAR when VAR is executed, it leaves the address of VAR on the stack + + Constants can be read by not written, eg: + + TEN . CR prints 10 + + You can read a variable (in this example called VAR) by doing: + + VAR @ leaves the value of VAR on the stack + VAR @ . CR prints the value of VAR + + and update the variable by doing: + + 20 VAR ! sets VAR to 20 + + Note that variables are uninitialised (but see VALUE later on which provides initialised + variables with a slightly simpler syntax). + + How can we define the words CONSTANT and VARIABLE? + + The trick is to define a new word for the variable itself (eg. if the variable was called + 'VAR' then we would define a new word called VAR). This is easy to do because we exposed + dictionary entry creation through the CREATE word (part of the definition of : above). + A call to CREATE TEN leaves the dictionary entry: + + +--- HERE + | + V + +---------+---+---+---+---+ + | LINK | 3 | T | E | N | + +---------+---+---+---+---+ + len + + For CONSTANT we can continue by appending DOCOL (the codeword), then LIT followed by + the constant itself and then EXIT, forming a little word definition that returns the + constant: + + +---------+---+---+---+---+------------+------------+------------+------------+ + | LINK | 3 | T | E | N | DOCOL | LIT | 10 | EXIT | + +---------+---+---+---+---+------------+------------+------------+------------+ + len codeword + + Notice that this word definition is exactly the same as you would have got if you had + written : TEN 10 ; +) +: CONSTANT + CREATE ( make the dictionary entry (the name follows CONSTANT) ) + DOCOL , ( append DOCOL (the codeword field of this word) ) + ' LIT , ( append the codeword LIT ) + , ( append the value on the top of the stack ) + ' EXIT , ( append the codeword EXIT ) +; + +( + VARIABLE is a little bit harder because we need somewhere to put the variable. There is + nothing particularly special about the 'user definitions area' (the area of memory pointed + to by HERE where we have previously just stored new word definitions). We can slice off + bits of this memory area to store anything we want, so one possible definition of + VARIABLE might create this: + + +--------------------------------------------------------------+ + | | + V | + +---------+---------+---+---+---+---+------------+------------+---|--------+------------+ + | | LINK | 3 | V | A | R | DOCOL | LIT | | EXIT | + +---------+---------+---+---+---+---+------------+------------+------------+------------+ + len codeword + + where is the place to store the variable, and points back to it. + + To make this more general let's define a couple of words which we can use to allocate + arbitrary memory from the user definitions area. + + First ALLOT, where n ALLOT allocates n bytes of memory. (Note when calling this that + it's a very good idea to make sure that n is a multiple of 4, or at least that next time + a word is compiled that n has been left as a multiple of 4). +) +: ALLOT ( n -- addr ) + HERE @ SWAP ( here n -- ) + HERE +! ( adds n to HERE, after this the old value of HERE is still on the stack ) +; + +( + Second, CELLS. In FORTH the phrase 'n CELLS ALLOT' means allocate n integers of whatever size + is the natural size for integers on this machine architecture. On this 32 bit machine therefore + CELLS just multiplies the top of stack by 4. +) +: CELLS ( n -- n ) 4 * ; + +( + So now we can define VARIABLE easily in much the same way as CONSTANT above. Refer to the + diagram above to see what the word that this creates will look like. +) +: VARIABLE + 1 CELLS ALLOT ( allocate 1 cell of memory, push the pointer to this memory ) + CREATE ( make the dictionary entry (the name follows VARIABLE) ) + DOCOL , ( append DOCOL (the codeword field of this word) ) + ' LIT , ( append the codeword LIT ) + , ( append the pointer to the new memory ) + ' EXIT , ( append the codeword EXIT ) +; + +( + VALUEs are like VARIABLEs but with a simpler syntax. You would generally use them when you + want a variable which is read often, and written infrequently. + + 20 VALUE VAL creates VAL with initial value 20 + VAL pushes the value directly on the stack + 30 TO VAL updates VAL, setting it to 30 + + Notice that 'VAL' on its own doesn't return the address of the value, but the value itself, + making values simpler and more obvious to use than variables (no indirection through '@'). + The price is a more complicated implementation, although despite the complexity there is no + particular performance penalty at runtime. + + A naive implementation of 'TO' would be quite slow, involving a dictionary search each time. + But because this is FORTH we have complete control of the compiler so we can compile TO more + efficiently, turning: + TO VAL + into: + LIT ! + and calculating (the address of the value) at compile time. + + Now this is the clever bit. We'll compile our value like this: + + +---------+---+---+---+---+------------+------------+------------+------------+ + | LINK | 3 | V | A | L | DOCOL | LIT | | EXIT | + +---------+---+---+---+---+------------+------------+------------+------------+ + len codeword + + where is the actual value itself. Note that when VAL executes, it will push the + value on the stack, which is what we want. + + But what will TO use for the address ? Why of course a pointer to that : + + code compiled - - - - --+------------+------------+------------+-- - - - - + by TO VAL | LIT | | ! | + - - - - --+------------+-----|------+------------+-- - - - - + | + V + +---------+---+---+---+---+------------+------------+------------+------------+ + | LINK | 3 | V | A | L | DOCOL | LIT | | EXIT | + +---------+---+---+---+---+------------+------------+------------+------------+ + len codeword + + In other words, this is a kind of self-modifying code. + + (Note to the people who want to modify this FORTH to add inlining: values defined this + way cannot be inlined). +) +: VALUE ( n -- ) + CREATE ( make the dictionary entry (the name follows VALUE) ) + DOCOL , ( append DOCOL ) + ' LIT , ( append the codeword LIT ) + , ( append the initial value ) + ' EXIT , ( append the codeword EXIT ) +; + +: TO IMMEDIATE ( n -- ) + WORD ( get the name of the value ) + FIND ( look it up in the dictionary ) + >DFA ( get a pointer to the first data field (the 'LIT') ) + 4+ ( increment to point at the value ) + STATE @ IF ( compiling? ) + ' LIT , ( compile LIT ) + , ( compile the address of the value ) + ' ! , ( compile ! ) + ELSE ( immediate mode ) + ! ( update it straightaway ) + THEN +; + +( x +TO VAL adds x to VAL ) +: +TO IMMEDIATE + WORD ( get the name of the value ) + FIND ( look it up in the dictionary ) + >DFA ( get a pointer to the first data field (the 'LIT') ) + 4+ ( increment to point at the value ) + STATE @ IF ( compiling? ) + ' LIT , ( compile LIT ) + , ( compile the address of the value ) + ' +! , ( compile +! ) + ELSE ( immediate mode ) + +! ( update it straightaway ) + THEN +; + +( + ID. takes an address of a dictionary entry and prints the word's name. + + For example: LATEST @ ID. would print the name of the last word that was defined. +) +: ID. + 4+ ( skip over the link pointer ) + DUP @b ( get the flags/length byte ) + F_LENMASK AND ( mask out the flags - just want the length ) + + BEGIN + DUP 0> ( length > 0? ) + WHILE + SWAP 1+ ( addr len -- len addr+1 ) + DUP @b ( len addr -- len addr char | get the next character) + EMIT ( len addr char -- len addr | and print it) + SWAP 1- ( len addr -- addr len-1 | subtract one from length ) + REPEAT + 2DROP ( len addr -- ) +; + +( + 'WORD word FIND ?HIDDEN' returns true if 'word' is flagged as hidden. + + 'WORD word FIND ?IMMEDIATE' returns true if 'word' is flagged as immediate. +) +: ?HIDDEN + 4+ ( skip over the link pointer ) + @b ( get the flags/length byte ) + F_HIDDEN AND ( mask the F_HIDDEN flag and return it (as a truth value) ) +; +: ?IMMEDIATE + 4+ ( skip over the link pointer ) + @b ( get the flags/length byte ) + F_IMMED AND ( mask the F_IMMED flag and return it (as a truth value) ) +; + +( + WORDS prints all the words defined in the dictionary, starting with the word defined most recently. + However it doesn't print hidden words. + + The implementation simply iterates backwards from LATEST using the link pointers. +) +: WORDS + LATEST @ ( start at LATEST dictionary entry ) + BEGIN + DUP 0<> ( while link pointer is not null ) + WHILE + DUP ?HIDDEN NOT IF + DUP ID. ( print the word ) + THEN + SPACE + @ ( dereference the link pointer - go to previous word ) + REPEAT + DROP + CR +; + +( + So far we have only allocated words and memory. FORTH provides a rather primitive method + to deallocate. + + 'FORGET word' deletes the definition of 'word' from the dictionary and everything defined + after it, including any variables and other memory allocated after. + + The implementation is very simple - we look up the word (which returns the dictionary entry + address). Then we set HERE to point to that address, so in effect all future allocations + and definitions will overwrite memory starting at the word. We also need to set LATEST to + point to the previous word. + + Note that you cannot FORGET built-in words (well, you can try but it will probably cause + a segfault). + + XXX: Because we wrote VARIABLE to store the variable in memory allocated before the word, + in the current implementation VARIABLE FOO FORGET FOO will leak 1 cell of memory. +) +: FORGET + WORD FIND ( find the word, gets the dictionary entry address ) + DUP @ LATEST ! ( set LATEST to point to the previous word ) + HERE ! ( and store HERE with the dictionary address ) +; + +( + While compiling, '[COMPILE] word' compiles 'word' if it would otherwise be IMMEDIATE. +) +: [COMPILE] IMMEDIATE + WORD ( get the next word ) + FIND ( find it in the dictionary ) + >CFA ( get its codeword ) + , ( and compile that ) +; + +( + RECURSE makes a recursive call to the current word that is being compiled. + + Normally while a word is being compiled, it is marked HIDDEN so that references to the + same word within are calls to the previous definition of the word. However we still have + access to the word which we are currently compiling through the LATEST pointer so we + can use that to compile a recursive call. +) +: RECURSE IMMEDIATE + LATEST @ >CFA ( LATEST points to the word being compiled at the moment ) + , ( compile it ) +; + +( + DUMP is used to dump out the contents of memory, in the 'traditional' hexdump format. +) +: DUMP ( addr len -- ) + BASE @ ROT ( save the current BASE at the bottom of the stack ) + HEX ( and switch the hexadecimal mode ) + + BEGIN + DUP 0> ( while len > 0 ) + WHILE + OVER . ( print the address ) + SPACE + + ( print up to 16 words on this line ) + 2DUP ( addr len addr len ) + 1- 15 AND 1+ ( addr len addr linelen ) + BEGIN + DUP 0> ( while linelen > 0 ) + WHILE + SWAP ( addr len linelen addr ) + DUP @b ( addr len linelen addr byte ) + . SPACE ( print the byte ) + 1+ SWAP 1- ( addr len linelen addr -- addr len addr+1 linelen-1 ) + REPEAT + 2DROP ( addr len ) + + ( print the ASCII equivalents ) + 2DUP 1- 15 AND 1+ ( addr len addr linelen ) + BEGIN + DUP 0> ( while linelen > 0) + WHILE + SWAP ( addr len linelen addr ) + DUP @b ( addr len linelen addr byte ) + DUP 32 128 WITHIN IF ( 32 <= c < 128? ) + EMIT + ELSE + DROP [ CHAR ? ] LITERAL EMIT + THEN + 1+ SWAP 1- ( addr len linelen addr -- addr len addr+1 linelen-1 ) + REPEAT + 2DROP ( addr len ) + CR + + DUP 1- 15 AND 1+ ( addr len linelen ) + DUP ( addr len linelen linelen ) + ROT ( addr linelen len linelen ) + - ( addr linelen len-linelen ) + ROT ( len-linelen addr linelen ) + + ( len-linelen addr+linelen ) + SWAP ( addr-linelen len-linelen ) + REPEAT + + 2DROP ( restore stack ) + BASE ! ( restore saved BASE ) +; + +( Finally print the welcome prompt. ) +." JONESFORTH VERSION " VERSION . CR +." OK " -- 1.8.3.1