/* A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
By Richard W.M. Jones <rich@annexia.org> http://annexia.org/forth
This is PUBLIC DOMAIN (see public domain release statement below).
- $Id: jonesforth.S,v 1.27 2007-09-24 00:01:38 rich Exp $
+ $Id: jonesforth.S,v 1.28 2007-09-24 00:18:19 rich Exp $
gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
*/
- .set JONES_VERSION,27
+ .set JONES_VERSION,28
/*
INTRODUCTION ----------------------------------------------------------------------
assemble and run the code (save this file as 'jonesforth.S') are:
gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
- ./jonesforth
-
- You will see lots of 'Warning: unterminated string; newline inserted' messages from the
- assembler. That's just because the GNU assembler doesn't have a good syntax for multi-line
- strings (or rather it used to, but the developers removed it!) so I've abused the syntax
- slightly to make things readable. Ignore these warnings.
+ cat jonesforth.f - | ./jonesforth
If you want to run your own FORTH programs you can do:
- ./jonesforth < myprog.f
+ cat jonesforth.f myprog.f | ./jonesforth
If you want to load your own FORTH code and then continue reading user commands, you can do:
- cat myfunctions.f - | ./jonesforth
+ cat jonesforth.f myfunctions.f - | ./jonesforth
ASSEMBLER ----------------------------------------------------------------------
user_defs_start:
.space USER_DEFS_SIZE
+/* This is used as a temporary input buffer when reading from files or the terminal. */
+ .set BUFFER_SIZE,4096
+ .align 4096
+buffer:
+_initbufftop:
+ .space BUFFER_SIZE
+buffend:
+currkey:
+ .int buffer
+bufftop:
+ .int _initbufftop
+
/*
BUILT-IN WORDS ----------------------------------------------------------------------
words can be written as FORTH itself, including words like IF, THEN, .", etc which in most
languages would be considered rather fundamental.
- As a kind of trick, I prefill the input buffer with the initial FORTH code. Once this code
- has run (when we get to the "OK" prompt), this input buffer is reused for reading any further
- user input.
-
- Some notes about the code:
-
- \ (backslash) is the FORTH way to start a comment which goes up to the next newline. However
- because this is a C-style string, I have to escape the backslash, which is why they appear as
- \\ comment.
-
- Similarly, any backslashes in the code are doubled, and " becomes \" (eg. the definition of ."
- is written as : .\" ... ;)
-
- I use indenting to show structure. The amount of whitespace has no meaning to FORTH however
- except that you must use at least one whitespace character between words, and words themselves
- cannot contain whitespace.
-
- FORTH is case-sensitive. Use capslock!
+ I used to append this here in the assembly file, but I got sick of fighting against gas's
+ stupid (lack of) multiline string syntax. So now that is in a separate file called jonesforth.f
- Enjoy!
+ If you don't already have that file, download it from http://annexia.org/forth in order
+ to continue the tutorial.
*/
- .data
- .align 4096
-buffer:
- // Multi-line constant gives 'Warning: unterminated string; newline inserted' messages which you can ignore.
- .ascii "\
-\\ Define some character constants
-: '\\n' 10 ;
-: 'SPACE' 32 ;
-
-\\ CR prints a carriage return
-: CR '\\n' EMIT ;
-
-\\ SPACE prints a space
-: SPACE 'SPACE' EMIT ;
-
-\\ DUP, DROP are defined in assembly for speed, but this is how you might define them
-\\ in FORTH. Notice use of the scratch variables _X and _Y.
-\\ : DUP _X ! _X @ _X @ ;
-\\ : DROP _X ! ;
-
-\\ The built-in . (DOT) function doesn't print a space after the number (unlike the real FORTH word).
-\\ However this is very easily fixed by redefining . (DOT). Any built-in word can be redefined.
-: .
- . \\ this refers back to the previous definition (but see also RECURSE below)
- SPACE
-;
-
-\\ The 2... versions of the standard operators work on pairs of stack entries. They're not used
-\\ very commonly so not really worth writing in assembler. Here is how they are defined in FORTH.
-: 2DUP OVER OVER ;
-: 2DROP DROP DROP ;
-
-\\ More standard FORTH words.
-: 2* 2 * ;
-: 2/ 2 / ;
-
-\\ Standard words for manipulating BASE.
-: DECIMAL 10 BASE ! ;
-: HEX 16 BASE ! ;
-
-\\ Standard words for booleans.
-: TRUE 1 ;
-: FALSE 0 ;
-: NOT 0= ;
-
-\\ LITERAL takes whatever is on the stack and compiles LIT <foo>
-: LITERAL IMMEDIATE
- ' LIT , \\ compile LIT
- , \\ compile the literal itself (from the stack)
- ;
-
-\\ Now we can use [ and ] to insert literals which are calculated at compile time.
-\\ Within definitions, use [ ... ] LITERAL anywhere that '...' is a constant expression which you
-\\ would rather only compute once (at compile time, rather than calculating it each time your word runs).
-: ':'
- [ \\ go into immediate mode temporarily
- CHAR : \\ push the number 58 (ASCII code of colon) on the stack
- ] \\ go back to compile mode
- LITERAL \\ compile LIT 58 as the definition of ':' word
-;
-
-\\ A few more character constants defined the same way as above.
-: '(' [ CHAR ( ] LITERAL ;
-: ')' [ CHAR ) ] LITERAL ;
-: '\"' [ CHAR \" ] LITERAL ;
-
-\\ So far we have defined only very simple definitions. Before we can go further, we really need to
-\\ make some control structures, like IF ... THEN and loops. Luckily we can define arbitrary control
-\\ structures directly in FORTH.
-\\
-\\ Please note that the control structures as I have defined them here will only work inside compiled
-\\ words. If you try to type in expressions using IF, etc. in immediate mode, then they won't work.
-\\ Making these work in immediate mode is left as an exercise for the reader.
-
-\\ condition IF true-part THEN rest
-\\ -- compiles to: --> condition 0BRANCH OFFSET true-part rest
-\\ where OFFSET is the offset of 'rest'
-\\ condition IF true-part ELSE false-part THEN
-\\ -- compiles to: --> condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest
-\\ where OFFSET if the offset of false-part and OFFSET2 is the offset of rest
-
-\\ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places
-\\ the address of the 0BRANCH on the stack. Later when we see THEN, we pop that address
-\\ off the stack, calculate the offset, and back-fill the offset.
-: IF IMMEDIATE
- ' 0BRANCH , \\ compile 0BRANCH
- HERE @ \\ save location of the offset on the stack
- 0 , \\ compile a dummy offset
-;
-
-: THEN IMMEDIATE
- DUP
- HERE @ SWAP - \\ calculate the offset from the address saved on the stack
- SWAP ! \\ store the offset in the back-filled location
-;
-
-: ELSE IMMEDIATE
- ' BRANCH , \\ definite branch to just over the false-part
- HERE @ \\ save location of the offset on the stack
- 0 , \\ compile a dummy offset
- SWAP \\ now back-fill the original (IF) offset
- DUP \\ same as for THEN word above
- HERE @ SWAP -
- SWAP !
-;
-
-\\ BEGIN loop-part condition UNTIL
-\\ -- compiles to: --> loop-part condition 0BRANCH OFFSET
-\\ where OFFSET points back to the loop-part
-\\ This is like do { loop-part } while (condition) in the C language
-: BEGIN IMMEDIATE
- HERE @ \\ save location on the stack
-;
-
-: UNTIL IMMEDIATE
- ' 0BRANCH , \\ compile 0BRANCH
- HERE @ - \\ calculate the offset from the address saved on the stack
- , \\ compile the offset here
-;
-
-\\ BEGIN loop-part AGAIN
-\\ -- compiles to: --> loop-part BRANCH OFFSET
-\\ where OFFSET points back to the loop-part
-\\ In other words, an infinite loop which can only be returned from with EXIT
-: AGAIN IMMEDIATE
- ' BRANCH , \\ compile BRANCH
- HERE @ - \\ calculate the offset back
- , \\ compile the offset here
-;
-
-\\ BEGIN condition WHILE loop-part REPEAT
-\\ -- compiles to: --> condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET
-\\ where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code
-\\ So this is like a while (condition) { loop-part } loop in the C language
-: WHILE IMMEDIATE
- ' 0BRANCH , \\ compile 0BRANCH
- HERE @ \\ save location of the offset2 on the stack
- 0 , \\ compile a dummy offset2
-;
-
-: REPEAT IMMEDIATE
- ' BRANCH , \\ compile BRANCH
- SWAP \\ get the original offset (from BEGIN)
- HERE @ - , \\ and compile it after BRANCH
- DUP
- HERE @ SWAP - \\ calculate the offset2
- SWAP ! \\ and back-fill it in the original location
-;
-
-\\ FORTH allows ( ... ) as comments within function definitions. This works by having an IMMEDIATE
-\\ word called ( which just drops input characters until it hits the corresponding ).
-: ( IMMEDIATE
- 1 \\ allowed nested parens by keeping track of depth
- BEGIN
- KEY \\ read next character
- DUP '(' = IF \\ open paren?
- DROP \\ drop the open paren
- 1+ \\ depth increases
- ELSE
- ')' = IF \\ close paren?
- 1- \\ depth decreases
- THEN
- THEN
- DUP 0= UNTIL \\ continue until we reach matching close paren, depth 0
- DROP \\ drop the depth counter
-;
-
-(
- From now on we can use ( ... ) for comments.
-
- In FORTH style we can also use ( ... -- ... ) to show the effects that a word has on the
- parameter stack. For example:
-
- ( n -- ) means that the word consumes an integer (n) from the parameter stack.
- ( b a -- c ) means that the word uses two integers (a and b, where a is at the top of stack)
- and returns a single integer (c).
- ( -- ) means the word has no effect on the stack
-)
-
-( With the looping constructs, we can now write SPACES, which writes n spaces to stdout. )
-: SPACES ( n -- )
- BEGIN
- DUP 0> ( while n > 0 )
- WHILE
- SPACE ( print a space )
- 1- ( until we count down to 0 )
- REPEAT
- DROP
-;
-
-( c a b WITHIN returns true if a <= c and c < b )
-: WITHIN
- ROT ( b c a )
- OVER ( b c a c )
- <= IF
- > IF ( b c -- )
- TRUE
- ELSE
- FALSE
- THEN
- ELSE
- 2DROP ( b c -- )
- FALSE
- THEN
-;
-
-( .S prints the contents of the stack. Very useful for debugging. )
-: .S ( -- )
- DSP@ ( get current stack pointer )
- BEGIN
- DUP S0 @ <
- WHILE
- DUP @ . ( print the stack element )
- 4+ ( move up )
- REPEAT
- DROP
-;
-
-( DEPTH returns the depth of the stack. )
-: DEPTH ( -- n )
- S0 @ DSP@ -
- 4- ( adjust because S0 was on the stack when we pushed DSP )
-;
-
-(
- [NB. The following may be a bit confusing because of the need to use backslash before
- each double quote character. The backslashes are there to keep the assembler happy.
- They are NOT part of the final output. So here we are defining a function called
- 'S double-quote' (not 'S backslash double-quote').]
-
- S\" string\" is used in FORTH to define strings. It leaves the address of the string and
- its length on the stac,k with the address at the top. The space following S\" is the normal
- space between FORTH words and is not a part of the string.
-
- In compile mode we append
- LITSTRING <string length> <string rounded up 4 bytes>
- to the current word. The primitive LITSTRING does the right thing when the current
- word is executed.
-
- In immediate mode there isn't a particularly good place to put the string, but in this
- case we put the string at HERE (but we _don't_ change HERE). This is meant as a temporary
- location, likely to be overwritten soon after.
-)
-: S\" IMMEDIATE ( -- len addr )
- STATE @ IF ( compiling? )
- ' LITSTRING , ( compile LITSTRING )
- HERE @ ( save the address of the length word on the stack )
- 0 , ( dummy length - we don't know what it is yet )
- BEGIN
- KEY ( get next character of the string )
- DUP '\"' <>
- WHILE
- HERE @ !b ( store the character in the compiled image )
- 1 HERE +! ( increment HERE pointer by 1 byte )
- REPEAT
- DROP ( drop the double quote character at the end )
- DUP ( get the saved address of the length word )
- HERE @ SWAP - ( calculate the length )
- 4- ( subtract 4 (because we measured from the start of the length word) )
- SWAP ! ( and back-fill the length location )
- HERE @ ( round up to next multiple of 4 bytes for the remaining code )
- 3 +
- 3 INVERT AND
- HERE !
- ELSE ( immediate mode )
- HERE @ ( get the start address of the temporary space )
- BEGIN
- KEY
- DUP '\"' <>
- WHILE
- OVER !b ( save next character )
- 1+ ( increment address )
- REPEAT
- HERE @ - ( calculate the length )
- HERE @ ( push the start address )
- THEN
-;
-
-(
- .\" is the print string operator in FORTH. Example: .\" Something to print\"
- The space after the operator is the ordinary space required between words.
-
- This is tricky to define because it has to do different things depending on whether
- we are compiling or in immediate mode. (Thus the word is marked IMMEDIATE so it can
- detect this and do different things).
-
- In immediate mode we just keep reading characters and printing them until we get to
- the next double quote.
-
- In compile mode we have the problem of where we're going to store the string (remember
- that the input buffer where the string comes from may be overwritten by the time we
- come round to running the function). We store the string in the compiled function
- like this:
- ..., LITSTRING, string length, string rounded up to 4 bytes, EMITSTRING, ...
-)
-: .\" IMMEDIATE ( -- )
- STATE @ IF ( compiling? )
- ' LITSTRING , ( compile LITSTRING )
- HERE @ ( save the address of the length word on the stack )
- 0 , ( dummy length - we don't know what it is yet )
- BEGIN
- KEY ( get next character of the string )
- DUP '\"' <>
- WHILE
- HERE @ !b ( store the character in the compiled image )
- 1 HERE +! ( increment HERE pointer by 1 byte )
- REPEAT
- DROP ( drop the double quote character at the end )
- DUP ( get the saved address of the length word )
- HERE @ SWAP - ( calculate the length )
- 4- ( subtract 4 (because we measured from the start of the length word) )
- SWAP ! ( and back-fill the length location )
- HERE @ ( round up to next multiple of 4 bytes for the remaining code )
- 3 +
- 3 INVERT AND
- HERE !
- ' EMITSTRING , ( compile the final EMITSTRING )
- ELSE
- ( In immediate mode, just read characters and print them until we get
- to the ending double quote. Much simpler than the above code! )
- BEGIN
- KEY
- DUP '\"' = IF
- DROP ( drop the double quote character )
- EXIT ( return from this function )
- THEN
- EMIT
- AGAIN
- THEN
-;
-
-(
- In FORTH, global constants and variables are defined like this:
-
- 10 CONSTANT TEN when TEN is executed, it leaves the integer 10 on the stack
- VARIABLE VAR when VAR is executed, it leaves the address of VAR on the stack
-
- Constants can be read by not written, eg:
-
- TEN . CR prints 10
-
- You can read a variable (in this example called VAR) by doing:
-
- VAR @ leaves the value of VAR on the stack
- VAR @ . CR prints the value of VAR
-
- and update the variable by doing:
-
- 20 VAR ! sets VAR to 20
-
- Note that variables are uninitialised (but see VALUE later on which provides initialised
- variables with a slightly simpler syntax).
-
- How can we define the words CONSTANT and VARIABLE?
-
- The trick is to define a new word for the variable itself (eg. if the variable was called
- 'VAR' then we would define a new word called VAR). This is easy to do because we exposed
- dictionary entry creation through the CREATE word (part of the definition of : above).
- A call to CREATE TEN leaves the dictionary entry:
-
- +--- HERE
- |
- V
- +---------+---+---+---+---+
- | LINK | 3 | T | E | N |
- +---------+---+---+---+---+
- len
-
- For CONSTANT we can continue by appending DOCOL (the codeword), then LIT followed by
- the constant itself and then EXIT, forming a little word definition that returns the
- constant:
-
- +---------+---+---+---+---+------------+------------+------------+------------+
- | LINK | 3 | T | E | N | DOCOL | LIT | 10 | EXIT |
- +---------+---+---+---+---+------------+------------+------------+------------+
- len codeword
-
- Notice that this word definition is exactly the same as you would have got if you had
- written : TEN 10 ;
-)
-: CONSTANT
- CREATE ( make the dictionary entry (the name follows CONSTANT) )
- DOCOL , ( append DOCOL (the codeword field of this word) )
- ' LIT , ( append the codeword LIT )
- , ( append the value on the top of the stack )
- ' EXIT , ( append the codeword EXIT )
-;
-
-(
- VARIABLE is a little bit harder because we need somewhere to put the variable. There is
- nothing particularly special about the 'user definitions area' (the area of memory pointed
- to by HERE where we have previously just stored new word definitions). We can slice off
- bits of this memory area to store anything we want, so one possible definition of
- VARIABLE might create this:
-
- +--------------------------------------------------------------+
- | |
- V |
- +---------+---------+---+---+---+---+------------+------------+---|--------+------------+
- | <var> | LINK | 3 | V | A | R | DOCOL | LIT | <addr var> | EXIT |
- +---------+---------+---+---+---+---+------------+------------+------------+------------+
- len codeword
-
- where <var> is the place to store the variable, and <addr var> points back to it.
-
- To make this more general let's define a couple of words which we can use to allocate
- arbitrary memory from the user definitions area.
-
- First ALLOT, where n ALLOT allocates n bytes of memory. (Note when calling this that
- it's a very good idea to make sure that n is a multiple of 4, or at least that next time
- a word is compiled that n has been left as a multiple of 4).
-)
-: ALLOT ( n -- addr )
- HERE @ SWAP ( here n -- )
- HERE +! ( adds n to HERE, after this the old value of HERE is still on the stack )
-;
-
-(
- Second, CELLS. In FORTH the phrase 'n CELLS ALLOT' means allocate n integers of whatever size
- is the natural size for integers on this machine architecture. On this 32 bit machine therefore
- CELLS just multiplies the top of stack by 4.
-)
-: CELLS ( n -- n ) 4 * ;
-
-(
- So now we can define VARIABLE easily in much the same way as CONSTANT above. Refer to the
- diagram above to see what the word that this creates will look like.
-)
-: VARIABLE
- 1 CELLS ALLOT ( allocate 1 cell of memory, push the pointer to this memory )
- CREATE ( make the dictionary entry (the name follows VARIABLE) )
- DOCOL , ( append DOCOL (the codeword field of this word) )
- ' LIT , ( append the codeword LIT )
- , ( append the pointer to the new memory )
- ' EXIT , ( append the codeword EXIT )
-;
-
-(
- VALUEs are like VARIABLEs but with a simpler syntax. You would generally use them when you
- want a variable which is read often, and written infrequently.
-
- 20 VALUE VAL creates VAL with initial value 20
- VAL pushes the value directly on the stack
- 30 TO VAL updates VAL, setting it to 30
-
- Notice that 'VAL' on its own doesn't return the address of the value, but the value itself,
- making values simpler and more obvious to use than variables (no indirection through '@').
- The price is a more complicated implementation, although despite the complexity there is no
- particular performance penalty at runtime.
-
- A naive implementation of 'TO' would be quite slow, involving a dictionary search each time.
- But because this is FORTH we have complete control of the compiler so we can compile TO more
- efficiently, turning:
- TO VAL
- into:
- LIT <addr> !
- and calculating <addr> (the address of the value) at compile time.
-
- Now this is the clever bit. We'll compile our value like this:
-
- +---------+---+---+---+---+------------+------------+------------+------------+
- | LINK | 3 | V | A | L | DOCOL | LIT | <value> | EXIT |
- +---------+---+---+---+---+------------+------------+------------+------------+
- len codeword
-
- where <value> is the actual value itself. Note that when VAL executes, it will push the
- value on the stack, which is what we want.
-
- But what will TO use for the address <addr>? Why of course a pointer to that <value>:
-
- code compiled - - - - --+------------+------------+------------+-- - - - -
- by TO VAL | LIT | <addr> | ! |
- - - - - --+------------+-----|------+------------+-- - - - -
- |
- V
- +---------+---+---+---+---+------------+------------+------------+------------+
- | LINK | 3 | V | A | L | DOCOL | LIT | <value> | EXIT |
- +---------+---+---+---+---+------------+------------+------------+------------+
- len codeword
-
- In other words, this is a kind of self-modifying code.
-
- (Note to the people who want to modify this FORTH to add inlining: values defined this
- way cannot be inlined).
-)
-: VALUE ( n -- )
- CREATE ( make the dictionary entry (the name follows VALUE) )
- DOCOL , ( append DOCOL )
- ' LIT , ( append the codeword LIT )
- , ( append the initial value )
- ' EXIT , ( append the codeword EXIT )
-;
-
-: TO IMMEDIATE ( n -- )
- WORD ( get the name of the value )
- FIND ( look it up in the dictionary )
- >DFA ( get a pointer to the first data field (the 'LIT') )
- 4+ ( increment to point at the value )
- STATE @ IF ( compiling? )
- ' LIT , ( compile LIT )
- , ( compile the address of the value )
- ' ! , ( compile ! )
- ELSE ( immediate mode )
- ! ( update it straightaway )
- THEN
-;
-
-( x +TO VAL adds x to VAL )
-: +TO IMMEDIATE
- WORD ( get the name of the value )
- FIND ( look it up in the dictionary )
- >DFA ( get a pointer to the first data field (the 'LIT') )
- 4+ ( increment to point at the value )
- STATE @ IF ( compiling? )
- ' LIT , ( compile LIT )
- , ( compile the address of the value )
- ' +! , ( compile +! )
- ELSE ( immediate mode )
- +! ( update it straightaway )
- THEN
-;
-
-(
- ID. takes an address of a dictionary entry and prints the word's name.
-
- For example: LATEST @ ID. would print the name of the last word that was defined.
-)
-: ID.
- 4+ ( skip over the link pointer )
- DUP @b ( get the flags/length byte )
- F_LENMASK AND ( mask out the flags - just want the length )
-
- BEGIN
- DUP 0> ( length > 0? )
- WHILE
- SWAP 1+ ( addr len -- len addr+1 )
- DUP @b ( len addr -- len addr char | get the next character)
- EMIT ( len addr char -- len addr | and print it)
- SWAP 1- ( len addr -- addr len-1 | subtract one from length )
- REPEAT
- 2DROP ( len addr -- )
-;
-
-(
- 'WORD word FIND ?HIDDEN' returns true if 'word' is flagged as hidden.
-
- 'WORD word FIND ?IMMEDIATE' returns true if 'word' is flagged as immediate.
-)
-: ?HIDDEN
- 4+ ( skip over the link pointer )
- @b ( get the flags/length byte )
- F_HIDDEN AND ( mask the F_HIDDEN flag and return it (as a truth value) )
-;
-: ?IMMEDIATE
- 4+ ( skip over the link pointer )
- @b ( get the flags/length byte )
- F_IMMED AND ( mask the F_IMMED flag and return it (as a truth value) )
-;
-
-(
- WORDS prints all the words defined in the dictionary, starting with the word defined most recently.
- However it doesn't print hidden words.
-
- The implementation simply iterates backwards from LATEST using the link pointers.
-)
-: WORDS
- LATEST @ ( start at LATEST dictionary entry )
- BEGIN
- DUP 0<> ( while link pointer is not null )
- WHILE
- DUP ?HIDDEN NOT IF
- DUP ID. ( print the word )
- THEN
- SPACE
- @ ( dereference the link pointer - go to previous word )
- REPEAT
- DROP
- CR
-;
-
-(
- So far we have only allocated words and memory. FORTH provides a rather primitive method
- to deallocate.
-
- 'FORGET word' deletes the definition of 'word' from the dictionary and everything defined
- after it, including any variables and other memory allocated after.
-
- The implementation is very simple - we look up the word (which returns the dictionary entry
- address). Then we set HERE to point to that address, so in effect all future allocations
- and definitions will overwrite memory starting at the word. We also need to set LATEST to
- point to the previous word.
-
- Note that you cannot FORGET built-in words (well, you can try but it will probably cause
- a segfault).
-
- XXX: Because we wrote VARIABLE to store the variable in memory allocated before the word,
- in the current implementation VARIABLE FOO FORGET FOO will leak 1 cell of memory.
-)
-: FORGET
- WORD FIND ( find the word, gets the dictionary entry address )
- DUP @ LATEST ! ( set LATEST to point to the previous word )
- HERE ! ( and store HERE with the dictionary address )
-;
-
-(
- While compiling, '[COMPILE] word' compiles 'word' if it would otherwise be IMMEDIATE.
-)
-: [COMPILE] IMMEDIATE
- WORD ( get the next word )
- FIND ( find it in the dictionary )
- >CFA ( get its codeword )
- , ( and compile that )
-;
-
-(
- RECURSE makes a recursive call to the current word that is being compiled.
-
- Normally while a word is being compiled, it is marked HIDDEN so that references to the
- same word within are calls to the previous definition of the word. However we still have
- access to the word which we are currently compiling through the LATEST pointer so we
- can use that to compile a recursive call.
-)
-: RECURSE IMMEDIATE
- LATEST @ >CFA ( LATEST points to the word being compiled at the moment )
- , ( compile it )
-;
-
-(
- DUMP is used to dump out the contents of memory, in the 'traditional' hexdump format.
-)
-: DUMP ( addr len -- )
- BASE @ ROT ( save the current BASE at the bottom of the stack )
- HEX ( and switch the hexadecimal mode )
-
- BEGIN
- DUP 0> ( while len > 0 )
- WHILE
- OVER . ( print the address )
- SPACE
-
- ( print up to 16 words on this line )
- 2DUP ( addr len addr len )
- 1- 15 AND 1+ ( addr len addr linelen )
- BEGIN
- DUP 0> ( while linelen > 0 )
- WHILE
- SWAP ( addr len linelen addr )
- DUP @b ( addr len linelen addr byte )
- . SPACE ( print the byte )
- 1+ SWAP 1- ( addr len linelen addr -- addr len addr+1 linelen-1 )
- REPEAT
- 2DROP ( addr len )
-
- ( print the ASCII equivalents )
- 2DUP 1- 15 AND 1+ ( addr len addr linelen )
- BEGIN
- DUP 0> ( while linelen > 0)
- WHILE
- SWAP ( addr len linelen addr )
- DUP @b ( addr len linelen addr byte )
- DUP 32 128 WITHIN IF ( 32 <= c < 128? )
- EMIT
- ELSE
- DROP [ CHAR ? ] LITERAL EMIT
- THEN
- 1+ SWAP 1- ( addr len linelen addr -- addr len addr+1 linelen-1 )
- REPEAT
- 2DROP ( addr len )
- CR
-
- DUP 1- 15 AND 1+ ( addr len linelen )
- DUP ( addr len linelen linelen )
- ROT ( addr linelen len linelen )
- - ( addr linelen len-linelen )
- ROT ( len-linelen addr linelen )
- + ( len-linelen addr+linelen )
- SWAP ( addr-linelen len-linelen )
- REPEAT
-
- 2DROP ( restore stack )
- BASE ! ( restore saved BASE )
-;
-
-( Finally print the welcome prompt. )
-.\" JONESFORTH VERSION \" VERSION . CR
-.\" OK \"
-"
-
-_initbufftop:
- .align 4096
-buffend:
-
-currkey:
- .int buffer
-bufftop:
- .int _initbufftop
-
/* END OF jonesforth.S */
--- /dev/null
+\ -*- forth -*-
+\ A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*-
+\ By Richard W.M. Jones <rich@annexia.org> http://annexia.org/forth
+\ This is PUBLIC DOMAIN (see public domain release statement below).
+\ $Id: jonesforth.f,v 1.1 2007-09-24 00:18:19 rich Exp $
+\
+\ The first part of this tutorial is in jonesforth.S. Get if from http://annexia.org/forth
+\
+\ PUBLIC DOMAIN ----------------------------------------------------------------------
+\
+\ I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.
+\
+\ In case this is not legally possible, I grant any entity the right to use this work for any purpose,
+\ without any conditions, unless such conditions are required by law.
+\
+\ SETTING UP ----------------------------------------------------------------------
+\
+\ Let's get a few housekeeping things out of the way. Firstly because I need to draw lots of
+\ ASCII-art diagrams to explain concepts, the best way to look at this is using a window which
+\ uses a fixed width font and is at least this wide:
+\
+\<------------------------------------------------------------------------------------------------------------------------>
+\
+\ Secondly make sure TABS are set to 8 characters. The following should be a vertical
+\ line. If not, sort out your tabs.
+\
+\ |
+\ |
+\ |
+\
+\ Thirdly I assume that your screen is at least 50 characters high.
+\
+\ START OF FORTH CODE ----------------------------------------------------------------------
+\
+\ We've now reached the stage where the FORTH system is running and self-hosting. All further
+\ words can be written as FORTH itself, including words like IF, THEN, .", etc which in most
+\ languages would be considered rather fundamental.
+\
+\ Some notes about the code:
+\
+\ I use indenting to show structure. The amount of whitespace has no meaning to FORTH however
+\ except that you must use at least one whitespace character between words, and words themselves
+\ cannot contain whitespace.
+\
+\ FORTH is case-sensitive. Use capslock!
+
+\ Define some character constants
+: '\n' 10 ;
+: 'SPACE' 32 ;
+
+\ CR prints a carriage return
+: CR '\n' EMIT ;
+
+\ SPACE prints a space
+: SPACE 'SPACE' EMIT ;
+
+\ DUP, DROP are defined in assembly for speed, but this is how you might define them
+\ in FORTH. Notice use of the scratch variables _X and _Y.
+\ : DUP _X ! _X @ _X @ ;
+\ : DROP _X ! ;
+
+\ The built-in . (DOT) function doesn't print a space after the number (unlike the real FORTH word).
+\ However this is very easily fixed by redefining . (DOT). Any built-in word can be redefined.
+: .
+ . \ this refers back to the previous definition (but see also RECURSE below)
+ SPACE
+;
+
+\ The 2... versions of the standard operators work on pairs of stack entries. They're not used
+\ very commonly so not really worth writing in assembler. Here is how they are defined in FORTH.
+: 2DUP OVER OVER ;
+: 2DROP DROP DROP ;
+
+\ More standard FORTH words.
+: 2* 2 * ;
+: 2/ 2 / ;
+
+\ Standard words for manipulating BASE.
+: DECIMAL 10 BASE ! ;
+: HEX 16 BASE ! ;
+
+\ Standard words for booleans.
+: TRUE 1 ;
+: FALSE 0 ;
+: NOT 0= ;
+
+\ LITERAL takes whatever is on the stack and compiles LIT <foo>
+: LITERAL IMMEDIATE
+ ' LIT , \ compile LIT
+ , \ compile the literal itself (from the stack)
+ ;
+
+\ Now we can use [ and ] to insert literals which are calculated at compile time.
+\ Within definitions, use [ ... ] LITERAL anywhere that '...' is a constant expression which you
+\ would rather only compute once (at compile time, rather than calculating it each time your word runs).
+: ':'
+ [ \ go into immediate mode temporarily
+ CHAR : \ push the number 58 (ASCII code of colon) on the stack
+ ] \ go back to compile mode
+ LITERAL \ compile LIT 58 as the definition of ':' word
+;
+
+\ A few more character constants defined the same way as above.
+: '(' [ CHAR ( ] LITERAL ;
+: ')' [ CHAR ) ] LITERAL ;
+: '"' [ CHAR " ] LITERAL ;
+
+\ So far we have defined only very simple definitions. Before we can go further, we really need to
+\ make some control structures, like IF ... THEN and loops. Luckily we can define arbitrary control
+\ structures directly in FORTH.
+\
+\ Please note that the control structures as I have defined them here will only work inside compiled
+\ words. If you try to type in expressions using IF, etc. in immediate mode, then they won't work.
+\ Making these work in immediate mode is left as an exercise for the reader.
+
+\ condition IF true-part THEN rest
+\ -- compiles to: --> condition 0BRANCH OFFSET true-part rest
+\ where OFFSET is the offset of 'rest'
+\ condition IF true-part ELSE false-part THEN
+\ -- compiles to: --> condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest
+\ where OFFSET if the offset of false-part and OFFSET2 is the offset of rest
+
+\ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places
+\ the address of the 0BRANCH on the stack. Later when we see THEN, we pop that address
+\ off the stack, calculate the offset, and back-fill the offset.
+: IF IMMEDIATE
+ ' 0BRANCH , \ compile 0BRANCH
+ HERE @ \ save location of the offset on the stack
+ 0 , \ compile a dummy offset
+;
+
+: THEN IMMEDIATE
+ DUP
+ HERE @ SWAP - \ calculate the offset from the address saved on the stack
+ SWAP ! \ store the offset in the back-filled location
+;
+
+: ELSE IMMEDIATE
+ ' BRANCH , \ definite branch to just over the false-part
+ HERE @ \ save location of the offset on the stack
+ 0 , \ compile a dummy offset
+ SWAP \ now back-fill the original (IF) offset
+ DUP \ same as for THEN word above
+ HERE @ SWAP -
+ SWAP !
+;
+
+\ BEGIN loop-part condition UNTIL
+\ -- compiles to: --> loop-part condition 0BRANCH OFFSET
+\ where OFFSET points back to the loop-part
+\ This is like do { loop-part } while (condition) in the C language
+: BEGIN IMMEDIATE
+ HERE @ \ save location on the stack
+;
+
+: UNTIL IMMEDIATE
+ ' 0BRANCH , \ compile 0BRANCH
+ HERE @ - \ calculate the offset from the address saved on the stack
+ , \ compile the offset here
+;
+
+\ BEGIN loop-part AGAIN
+\ -- compiles to: --> loop-part BRANCH OFFSET
+\ where OFFSET points back to the loop-part
+\ In other words, an infinite loop which can only be returned from with EXIT
+: AGAIN IMMEDIATE
+ ' BRANCH , \ compile BRANCH
+ HERE @ - \ calculate the offset back
+ , \ compile the offset here
+;
+
+\ BEGIN condition WHILE loop-part REPEAT
+\ -- compiles to: --> condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET
+\ where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code
+\ So this is like a while (condition) { loop-part } loop in the C language
+: WHILE IMMEDIATE
+ ' 0BRANCH , \ compile 0BRANCH
+ HERE @ \ save location of the offset2 on the stack
+ 0 , \ compile a dummy offset2
+;
+
+: REPEAT IMMEDIATE
+ ' BRANCH , \ compile BRANCH
+ SWAP \ get the original offset (from BEGIN)
+ HERE @ - , \ and compile it after BRANCH
+ DUP
+ HERE @ SWAP - \ calculate the offset2
+ SWAP ! \ and back-fill it in the original location
+;
+
+\ FORTH allows ( ... ) as comments within function definitions. This works by having an IMMEDIATE
+\ word called ( which just drops input characters until it hits the corresponding ).
+: ( IMMEDIATE
+ 1 \ allowed nested parens by keeping track of depth
+ BEGIN
+ KEY \ read next character
+ DUP '(' = IF \ open paren?
+ DROP \ drop the open paren
+ 1+ \ depth increases
+ ELSE
+ ')' = IF \ close paren?
+ 1- \ depth decreases
+ THEN
+ THEN
+ DUP 0= UNTIL \ continue until we reach matching close paren, depth 0
+ DROP \ drop the depth counter
+;
+
+(
+ From now on we can use ( ... ) for comments.
+
+ In FORTH style we can also use ( ... -- ... ) to show the effects that a word has on the
+ parameter stack. For example:
+
+ ( n -- ) means that the word consumes an integer (n) from the parameter stack.
+ ( b a -- c ) means that the word uses two integers (a and b, where a is at the top of stack)
+ and returns a single integer (c).
+ ( -- ) means the word has no effect on the stack
+)
+
+( With the looping constructs, we can now write SPACES, which writes n spaces to stdout. )
+: SPACES ( n -- )
+ BEGIN
+ DUP 0> ( while n > 0 )
+ WHILE
+ SPACE ( print a space )
+ 1- ( until we count down to 0 )
+ REPEAT
+ DROP
+;
+
+( c a b WITHIN returns true if a <= c and c < b )
+: WITHIN
+ ROT ( b c a )
+ OVER ( b c a c )
+ <= IF
+ > IF ( b c -- )
+ TRUE
+ ELSE
+ FALSE
+ THEN
+ ELSE
+ 2DROP ( b c -- )
+ FALSE
+ THEN
+;
+
+( .S prints the contents of the stack. Very useful for debugging. )
+: .S ( -- )
+ DSP@ ( get current stack pointer )
+ BEGIN
+ DUP S0 @ <
+ WHILE
+ DUP @ . ( print the stack element )
+ 4+ ( move up )
+ REPEAT
+ DROP
+;
+
+( DEPTH returns the depth of the stack. )
+: DEPTH ( -- n )
+ S0 @ DSP@ -
+ 4- ( adjust because S0 was on the stack when we pushed DSP )
+;
+
+(
+ [NB. The following may be a bit confusing because of the need to use backslash before
+ each double quote character. The backslashes are there to keep the assembler happy.
+ They are NOT part of the final output. So here we are defining a function called
+ 'S double-quote' (not 'S backslash double-quote').]
+
+ S" string" is used in FORTH to define strings. It leaves the address of the string and
+ its length on the stac,k with the address at the top. The space following S" is the normal
+ space between FORTH words and is not a part of the string.
+
+ In compile mode we append
+ LITSTRING <string length> <string rounded up 4 bytes>
+ to the current word. The primitive LITSTRING does the right thing when the current
+ word is executed.
+
+ In immediate mode there isn't a particularly good place to put the string, but in this
+ case we put the string at HERE (but we _don't_ change HERE). This is meant as a temporary
+ location, likely to be overwritten soon after.
+)
+: S" IMMEDIATE ( -- len addr )
+ STATE @ IF ( compiling? )
+ ' LITSTRING , ( compile LITSTRING )
+ HERE @ ( save the address of the length word on the stack )
+ 0 , ( dummy length - we don't know what it is yet )
+ BEGIN
+ KEY ( get next character of the string )
+ DUP '"' <>
+ WHILE
+ HERE @ !b ( store the character in the compiled image )
+ 1 HERE +! ( increment HERE pointer by 1 byte )
+ REPEAT
+ DROP ( drop the double quote character at the end )
+ DUP ( get the saved address of the length word )
+ HERE @ SWAP - ( calculate the length )
+ 4- ( subtract 4 (because we measured from the start of the length word) )
+ SWAP ! ( and back-fill the length location )
+ HERE @ ( round up to next multiple of 4 bytes for the remaining code )
+ 3 +
+ 3 INVERT AND
+ HERE !
+ ELSE ( immediate mode )
+ HERE @ ( get the start address of the temporary space )
+ BEGIN
+ KEY
+ DUP '"' <>
+ WHILE
+ OVER !b ( save next character )
+ 1+ ( increment address )
+ REPEAT
+ HERE @ - ( calculate the length )
+ HERE @ ( push the start address )
+ THEN
+;
+
+(
+ ." is the print string operator in FORTH. Example: ." Something to print"
+ The space after the operator is the ordinary space required between words.
+
+ This is tricky to define because it has to do different things depending on whether
+ we are compiling or in immediate mode. (Thus the word is marked IMMEDIATE so it can
+ detect this and do different things).
+
+ In immediate mode we just keep reading characters and printing them until we get to
+ the next double quote.
+
+ In compile mode we have the problem of where we're going to store the string (remember
+ that the input buffer where the string comes from may be overwritten by the time we
+ come round to running the function). We store the string in the compiled function
+ like this:
+ ..., LITSTRING, string length, string rounded up to 4 bytes, EMITSTRING, ...
+)
+: ." IMMEDIATE ( -- )
+ STATE @ IF ( compiling? )
+ ' LITSTRING , ( compile LITSTRING )
+ HERE @ ( save the address of the length word on the stack )
+ 0 , ( dummy length - we don't know what it is yet )
+ BEGIN
+ KEY ( get next character of the string )
+ DUP '"' <>
+ WHILE
+ HERE @ !b ( store the character in the compiled image )
+ 1 HERE +! ( increment HERE pointer by 1 byte )
+ REPEAT
+ DROP ( drop the double quote character at the end )
+ DUP ( get the saved address of the length word )
+ HERE @ SWAP - ( calculate the length )
+ 4- ( subtract 4 (because we measured from the start of the length word) )
+ SWAP ! ( and back-fill the length location )
+ HERE @ ( round up to next multiple of 4 bytes for the remaining code )
+ 3 +
+ 3 INVERT AND
+ HERE !
+ ' EMITSTRING , ( compile the final EMITSTRING )
+ ELSE
+ ( In immediate mode, just read characters and print them until we get
+ to the ending double quote. Much simpler than the above code! )
+ BEGIN
+ KEY
+ DUP '"' = IF
+ DROP ( drop the double quote character )
+ EXIT ( return from this function )
+ THEN
+ EMIT
+ AGAIN
+ THEN
+;
+
+(
+ In FORTH, global constants and variables are defined like this:
+
+ 10 CONSTANT TEN when TEN is executed, it leaves the integer 10 on the stack
+ VARIABLE VAR when VAR is executed, it leaves the address of VAR on the stack
+
+ Constants can be read by not written, eg:
+
+ TEN . CR prints 10
+
+ You can read a variable (in this example called VAR) by doing:
+
+ VAR @ leaves the value of VAR on the stack
+ VAR @ . CR prints the value of VAR
+
+ and update the variable by doing:
+
+ 20 VAR ! sets VAR to 20
+
+ Note that variables are uninitialised (but see VALUE later on which provides initialised
+ variables with a slightly simpler syntax).
+
+ How can we define the words CONSTANT and VARIABLE?
+
+ The trick is to define a new word for the variable itself (eg. if the variable was called
+ 'VAR' then we would define a new word called VAR). This is easy to do because we exposed
+ dictionary entry creation through the CREATE word (part of the definition of : above).
+ A call to CREATE TEN leaves the dictionary entry:
+
+ +--- HERE
+ |
+ V
+ +---------+---+---+---+---+
+ | LINK | 3 | T | E | N |
+ +---------+---+---+---+---+
+ len
+
+ For CONSTANT we can continue by appending DOCOL (the codeword), then LIT followed by
+ the constant itself and then EXIT, forming a little word definition that returns the
+ constant:
+
+ +---------+---+---+---+---+------------+------------+------------+------------+
+ | LINK | 3 | T | E | N | DOCOL | LIT | 10 | EXIT |
+ +---------+---+---+---+---+------------+------------+------------+------------+
+ len codeword
+
+ Notice that this word definition is exactly the same as you would have got if you had
+ written : TEN 10 ;
+)
+: CONSTANT
+ CREATE ( make the dictionary entry (the name follows CONSTANT) )
+ DOCOL , ( append DOCOL (the codeword field of this word) )
+ ' LIT , ( append the codeword LIT )
+ , ( append the value on the top of the stack )
+ ' EXIT , ( append the codeword EXIT )
+;
+
+(
+ VARIABLE is a little bit harder because we need somewhere to put the variable. There is
+ nothing particularly special about the 'user definitions area' (the area of memory pointed
+ to by HERE where we have previously just stored new word definitions). We can slice off
+ bits of this memory area to store anything we want, so one possible definition of
+ VARIABLE might create this:
+
+ +--------------------------------------------------------------+
+ | |
+ V |
+ +---------+---------+---+---+---+---+------------+------------+---|--------+------------+
+ | <var> | LINK | 3 | V | A | R | DOCOL | LIT | <addr var> | EXIT |
+ +---------+---------+---+---+---+---+------------+------------+------------+------------+
+ len codeword
+
+ where <var> is the place to store the variable, and <addr var> points back to it.
+
+ To make this more general let's define a couple of words which we can use to allocate
+ arbitrary memory from the user definitions area.
+
+ First ALLOT, where n ALLOT allocates n bytes of memory. (Note when calling this that
+ it's a very good idea to make sure that n is a multiple of 4, or at least that next time
+ a word is compiled that n has been left as a multiple of 4).
+)
+: ALLOT ( n -- addr )
+ HERE @ SWAP ( here n -- )
+ HERE +! ( adds n to HERE, after this the old value of HERE is still on the stack )
+;
+
+(
+ Second, CELLS. In FORTH the phrase 'n CELLS ALLOT' means allocate n integers of whatever size
+ is the natural size for integers on this machine architecture. On this 32 bit machine therefore
+ CELLS just multiplies the top of stack by 4.
+)
+: CELLS ( n -- n ) 4 * ;
+
+(
+ So now we can define VARIABLE easily in much the same way as CONSTANT above. Refer to the
+ diagram above to see what the word that this creates will look like.
+)
+: VARIABLE
+ 1 CELLS ALLOT ( allocate 1 cell of memory, push the pointer to this memory )
+ CREATE ( make the dictionary entry (the name follows VARIABLE) )
+ DOCOL , ( append DOCOL (the codeword field of this word) )
+ ' LIT , ( append the codeword LIT )
+ , ( append the pointer to the new memory )
+ ' EXIT , ( append the codeword EXIT )
+;
+
+(
+ VALUEs are like VARIABLEs but with a simpler syntax. You would generally use them when you
+ want a variable which is read often, and written infrequently.
+
+ 20 VALUE VAL creates VAL with initial value 20
+ VAL pushes the value directly on the stack
+ 30 TO VAL updates VAL, setting it to 30
+
+ Notice that 'VAL' on its own doesn't return the address of the value, but the value itself,
+ making values simpler and more obvious to use than variables (no indirection through '@').
+ The price is a more complicated implementation, although despite the complexity there is no
+ particular performance penalty at runtime.
+
+ A naive implementation of 'TO' would be quite slow, involving a dictionary search each time.
+ But because this is FORTH we have complete control of the compiler so we can compile TO more
+ efficiently, turning:
+ TO VAL
+ into:
+ LIT <addr> !
+ and calculating <addr> (the address of the value) at compile time.
+
+ Now this is the clever bit. We'll compile our value like this:
+
+ +---------+---+---+---+---+------------+------------+------------+------------+
+ | LINK | 3 | V | A | L | DOCOL | LIT | <value> | EXIT |
+ +---------+---+---+---+---+------------+------------+------------+------------+
+ len codeword
+
+ where <value> is the actual value itself. Note that when VAL executes, it will push the
+ value on the stack, which is what we want.
+
+ But what will TO use for the address <addr>? Why of course a pointer to that <value>:
+
+ code compiled - - - - --+------------+------------+------------+-- - - - -
+ by TO VAL | LIT | <addr> | ! |
+ - - - - --+------------+-----|------+------------+-- - - - -
+ |
+ V
+ +---------+---+---+---+---+------------+------------+------------+------------+
+ | LINK | 3 | V | A | L | DOCOL | LIT | <value> | EXIT |
+ +---------+---+---+---+---+------------+------------+------------+------------+
+ len codeword
+
+ In other words, this is a kind of self-modifying code.
+
+ (Note to the people who want to modify this FORTH to add inlining: values defined this
+ way cannot be inlined).
+)
+: VALUE ( n -- )
+ CREATE ( make the dictionary entry (the name follows VALUE) )
+ DOCOL , ( append DOCOL )
+ ' LIT , ( append the codeword LIT )
+ , ( append the initial value )
+ ' EXIT , ( append the codeword EXIT )
+;
+
+: TO IMMEDIATE ( n -- )
+ WORD ( get the name of the value )
+ FIND ( look it up in the dictionary )
+ >DFA ( get a pointer to the first data field (the 'LIT') )
+ 4+ ( increment to point at the value )
+ STATE @ IF ( compiling? )
+ ' LIT , ( compile LIT )
+ , ( compile the address of the value )
+ ' ! , ( compile ! )
+ ELSE ( immediate mode )
+ ! ( update it straightaway )
+ THEN
+;
+
+( x +TO VAL adds x to VAL )
+: +TO IMMEDIATE
+ WORD ( get the name of the value )
+ FIND ( look it up in the dictionary )
+ >DFA ( get a pointer to the first data field (the 'LIT') )
+ 4+ ( increment to point at the value )
+ STATE @ IF ( compiling? )
+ ' LIT , ( compile LIT )
+ , ( compile the address of the value )
+ ' +! , ( compile +! )
+ ELSE ( immediate mode )
+ +! ( update it straightaway )
+ THEN
+;
+
+(
+ ID. takes an address of a dictionary entry and prints the word's name.
+
+ For example: LATEST @ ID. would print the name of the last word that was defined.
+)
+: ID.
+ 4+ ( skip over the link pointer )
+ DUP @b ( get the flags/length byte )
+ F_LENMASK AND ( mask out the flags - just want the length )
+
+ BEGIN
+ DUP 0> ( length > 0? )
+ WHILE
+ SWAP 1+ ( addr len -- len addr+1 )
+ DUP @b ( len addr -- len addr char | get the next character)
+ EMIT ( len addr char -- len addr | and print it)
+ SWAP 1- ( len addr -- addr len-1 | subtract one from length )
+ REPEAT
+ 2DROP ( len addr -- )
+;
+
+(
+ 'WORD word FIND ?HIDDEN' returns true if 'word' is flagged as hidden.
+
+ 'WORD word FIND ?IMMEDIATE' returns true if 'word' is flagged as immediate.
+)
+: ?HIDDEN
+ 4+ ( skip over the link pointer )
+ @b ( get the flags/length byte )
+ F_HIDDEN AND ( mask the F_HIDDEN flag and return it (as a truth value) )
+;
+: ?IMMEDIATE
+ 4+ ( skip over the link pointer )
+ @b ( get the flags/length byte )
+ F_IMMED AND ( mask the F_IMMED flag and return it (as a truth value) )
+;
+
+(
+ WORDS prints all the words defined in the dictionary, starting with the word defined most recently.
+ However it doesn't print hidden words.
+
+ The implementation simply iterates backwards from LATEST using the link pointers.
+)
+: WORDS
+ LATEST @ ( start at LATEST dictionary entry )
+ BEGIN
+ DUP 0<> ( while link pointer is not null )
+ WHILE
+ DUP ?HIDDEN NOT IF
+ DUP ID. ( print the word )
+ THEN
+ SPACE
+ @ ( dereference the link pointer - go to previous word )
+ REPEAT
+ DROP
+ CR
+;
+
+(
+ So far we have only allocated words and memory. FORTH provides a rather primitive method
+ to deallocate.
+
+ 'FORGET word' deletes the definition of 'word' from the dictionary and everything defined
+ after it, including any variables and other memory allocated after.
+
+ The implementation is very simple - we look up the word (which returns the dictionary entry
+ address). Then we set HERE to point to that address, so in effect all future allocations
+ and definitions will overwrite memory starting at the word. We also need to set LATEST to
+ point to the previous word.
+
+ Note that you cannot FORGET built-in words (well, you can try but it will probably cause
+ a segfault).
+
+ XXX: Because we wrote VARIABLE to store the variable in memory allocated before the word,
+ in the current implementation VARIABLE FOO FORGET FOO will leak 1 cell of memory.
+)
+: FORGET
+ WORD FIND ( find the word, gets the dictionary entry address )
+ DUP @ LATEST ! ( set LATEST to point to the previous word )
+ HERE ! ( and store HERE with the dictionary address )
+;
+
+(
+ While compiling, '[COMPILE] word' compiles 'word' if it would otherwise be IMMEDIATE.
+)
+: [COMPILE] IMMEDIATE
+ WORD ( get the next word )
+ FIND ( find it in the dictionary )
+ >CFA ( get its codeword )
+ , ( and compile that )
+;
+
+(
+ RECURSE makes a recursive call to the current word that is being compiled.
+
+ Normally while a word is being compiled, it is marked HIDDEN so that references to the
+ same word within are calls to the previous definition of the word. However we still have
+ access to the word which we are currently compiling through the LATEST pointer so we
+ can use that to compile a recursive call.
+)
+: RECURSE IMMEDIATE
+ LATEST @ >CFA ( LATEST points to the word being compiled at the moment )
+ , ( compile it )
+;
+
+(
+ DUMP is used to dump out the contents of memory, in the 'traditional' hexdump format.
+)
+: DUMP ( addr len -- )
+ BASE @ ROT ( save the current BASE at the bottom of the stack )
+ HEX ( and switch the hexadecimal mode )
+
+ BEGIN
+ DUP 0> ( while len > 0 )
+ WHILE
+ OVER . ( print the address )
+ SPACE
+
+ ( print up to 16 words on this line )
+ 2DUP ( addr len addr len )
+ 1- 15 AND 1+ ( addr len addr linelen )
+ BEGIN
+ DUP 0> ( while linelen > 0 )
+ WHILE
+ SWAP ( addr len linelen addr )
+ DUP @b ( addr len linelen addr byte )
+ . SPACE ( print the byte )
+ 1+ SWAP 1- ( addr len linelen addr -- addr len addr+1 linelen-1 )
+ REPEAT
+ 2DROP ( addr len )
+
+ ( print the ASCII equivalents )
+ 2DUP 1- 15 AND 1+ ( addr len addr linelen )
+ BEGIN
+ DUP 0> ( while linelen > 0)
+ WHILE
+ SWAP ( addr len linelen addr )
+ DUP @b ( addr len linelen addr byte )
+ DUP 32 128 WITHIN IF ( 32 <= c < 128? )
+ EMIT
+ ELSE
+ DROP [ CHAR ? ] LITERAL EMIT
+ THEN
+ 1+ SWAP 1- ( addr len linelen addr -- addr len addr+1 linelen-1 )
+ REPEAT
+ 2DROP ( addr len )
+ CR
+
+ DUP 1- 15 AND 1+ ( addr len linelen )
+ DUP ( addr len linelen linelen )
+ ROT ( addr linelen len linelen )
+ - ( addr linelen len-linelen )
+ ROT ( len-linelen addr linelen )
+ + ( len-linelen addr+linelen )
+ SWAP ( addr-linelen len-linelen )
+ REPEAT
+
+ 2DROP ( restore stack )
+ BASE ! ( restore saved BASE )
+;
+
+( Finally print the welcome prompt. )
+." JONESFORTH VERSION " VERSION . CR
+." OK "