From: rich <rich>
Date: Thu, 6 Sep 2007 21:02:31 +0000 (+0000)
Subject: Added FORTH interpreter and older versions.
X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=5d5f02cd86cf885be486427a450b999086c6b1ec;p=jonesforth.git

Added FORTH interpreter and older versions.
---

5d5f02cd86cf885be486427a450b999086c6b1ec
diff --git a/jonesforth.S b/jonesforth.S
new file mode 100644
index 0000000..11b0cfd
--- /dev/null
+++ b/jonesforth.S
@@ -0,0 +1,984 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NOTES-------------------------------------------------------------------------------------------------------------------
+
+Need to say something about $ before constants.
+
+And about je/jne/ja/jb/jbe/etc
+
+
+
+
+*/
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	lea -4(%ebp),%ebp	// push reg on to return stack
+	movl \reg,(%ebp)
+	.endm
+
+	.macro POPRSP reg
+	mov (%ebp),\reg		// pop top of return stack to reg
+	lea 4(%ebp),%ebp
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov %esp,var_S0		// Store the initial data stack pointer.
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	PUSHRSP %esi		// push %esi on to the return stack
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x20
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int code_\label	// codeword
+	.text
+	.align 4
+	.globl code_\label
+code_\label :			// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	.macro defvar name, namelen, flags=0, label, initial=0
+	defcode \name,\namelen,\flags,\label
+	push $var_\name
+	NEXT
+	.data
+	.align 4
+var_\name :
+	.int \initial
+	.endm
+
+	// Some easy ones, written in assembly for speed
+	defcode "DROP",4,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+	defcode "OVER",4,,OVER
+	mov 4(%esp),%eax	// get the second element of stack
+	push %eax		// and push it on top
+	NEXT
+
+	defcode "ROT",3,,ROT
+	pop %eax
+	pop %ebx
+	pop %ecx
+	push %eax
+	push %ecx
+	push %ebx
+	NEXT
+
+	defcode "-ROT",4,,NROT
+	pop %eax
+	pop %ebx
+	pop %ecx
+	push %ebx
+	push %eax
+	push %ecx
+	NEXT
+
+	defcode "1+",2,,INCR
+	incl (%esp)		// increment top of stack
+	NEXT
+
+	defcode "1-",2,,DECR
+	decl (%esp)		// decrement top of stack
+	NEXT
+
+	defcode "4+",2,,INCR4
+	addl $4,(%esp)		// increment top of stack
+	NEXT
+
+	defcode "4-",2,,DECR4
+	subl $4,(%esp)		// decrement top of stack
+	NEXT
+
+	defcode "+",1,,ADD
+	pop %eax
+	addl %eax,(%esp)
+	NEXT
+
+	defcode "-",1,,SUB
+	pop %eax
+	subl %eax,(%esp)
+	NEXT
+
+	defcode "*",1,,MUL
+	pop %eax
+	pop %ebx
+	imull %ebx,%eax
+	push %eax		// ignore overflow
+	NEXT
+
+	defcode "/",1,,DIV
+	xor %edx,%edx
+	pop %ebx
+	pop %eax
+	idivl %ebx
+	push %eax		// push quotient
+	NEXT
+
+	defcode "MOD",3,,MOD
+	xor %edx,%edx
+	pop %ebx
+	pop %eax
+	idivl %ebx
+	push %edx		// push remainder
+	NEXT
+
+	defcode "=",1,,EQU	// top two words are equal?
+	pop %eax
+	pop %ebx
+	cmp %ebx,%eax
+	je 1f
+	pushl $0
+	NEXT
+1:	pushl $1
+	NEXT
+
+	defcode "<>",2,,NEQU	// top two words are not equal?
+	pop %eax
+	pop %ebx
+	cmp %ebx,%eax
+	je 1f
+	pushl $1
+	NEXT
+1:	pushl $0
+	NEXT
+
+	defcode "0=",2,,ZEQU	// top of stack equals 0?
+	pop %eax
+	test %eax,%eax
+	jz 1f
+	pushl $0
+	NEXT
+1:	pushl $1
+	NEXT
+
+	defcode "AND",3,,AND
+	pop %eax
+	andl %eax,(%esp)
+	NEXT
+
+	defcode "OR",2,,OR
+	pop %eax
+	orl %eax,(%esp)
+	NEXT
+
+	defcode "INVERT",6,,INVERT
+	notl (%esp)
+	NEXT
+
+	// COLD must not return (ie. must not call EXIT).
+	defword "COLD",4,,COLD
+	// XXX reinitialisation of the interpreter
+	.int INTERPRETER	// call the interpreter loop (never returns)
+	.int LIT,1,SYSEXIT	// hmmm, but in case it does, exit(1).
+
+	defcode "EXIT",4,,EXIT
+	POPRSP %esi		// pop return stack into %esi
+	NEXT
+
+	defcode "LIT",3,,LIT
+	// %esi points to the next command, but in this case it points to the next
+	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
+	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
+	lodsl
+	push %eax		// push the literal number on to stack
+	NEXT
+
+	defcode "LITSTRING",9,,LITSTRING
+	lodsl			// get the length of the string
+	push %eax		// push it on the stack
+	push %esi		// push the address of the start of the string
+	addl %eax,%esi		// skip past the string
+	addl $3,%esi		// but round up to next 4 byte boundary
+	andl $~3,%esi
+	NEXT
+
+	defcode "BRANCH",6,,BRANCH
+	add (%esi),%esi		// add the offset to the instruction pointer
+	NEXT
+
+	defcode "0BRANCH",7,,ZBRANCH
+	pop %eax
+	test %eax,%eax		// top of stack is zero?
+	jz code_BRANCH		// if so, jump back to the branch function above
+	lodsl			// otherwise we need to skip the offset
+	NEXT
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+/* ! and @ (STORE and FETCH) store 32-bit words.  It's also useful to be able to read and write bytes.
+ * I don't know whether FORTH has these words, so I invented my own, called !b and @b.
+ * Byte-oriented operations only work on architectures which permit them (i386 is one of those).
+ */
+	defcode "!b",2,,STOREBYTE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	movb %al,(%ebx)		// store it
+	NEXT
+
+	defcode "@b",2,,FETCHBYTE
+	pop %ebx		// address to fetch
+	xor %eax,%eax
+	movb (%ebx),%al		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	// The STATE variable is 0 for execute mode, != 0 for compile mode
+	defvar "STATE",5,,STATE
+
+	// This points to where compiled words go.
+	defvar "HERE",4,,HERE,user_defs_start
+
+	// This is the last definition in the dictionary.
+	defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
+
+	// _X, _Y and _Z are scratch variables used by standard words.
+	defvar "_X",2,,TX
+	defvar "_Y",2,,TY
+	defvar "_Z",2,,TZ
+
+	// This stores the top of the data stack.
+	defvar "S0",2,,SZ
+
+	// This stores the top of the return stack.
+	defvar "R0",2,,RZ,return_stack
+
+	defcode "DSP@",4,,DSPFETCH
+	mov %esp,%eax
+	push %eax
+	NEXT
+
+	defcode "DSP!",4,,DSPSTORE
+	pop %esp
+	NEXT
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	PUSHRSP %eax		// push it on to the return stack
+	NEXT
+
+	defcode "R>",2,,FROMR
+	POPRSP %eax		// pop return stack on to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop return stack and throw away
+	NEXT
+
+	defcode "KEY",3,,KEY
+	call _KEY
+	push %eax		// push return value on stack
+	NEXT
+_KEY:
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	inc %ebx
+	mov %ebx,(currkey)
+	ret
+
+1:	// out of input; use read(2) to fetch more input from stdin
+	xor %ebx,%ebx		// 1st param: stdin
+	mov $buffer,%ecx	// 2nd param: buffer
+	mov %ecx,currkey
+	mov $buffend-buffer,%edx // 3rd param: max length
+	mov $__NR_read,%eax	// syscall: read
+	int $0x80
+	test %eax,%eax		// If %eax <= 0, then exit.
+	jbe 2f
+	addl %eax,%ecx		// buffer+%eax = bufftop
+	mov %ecx,bufftop
+	jmp _KEY
+
+2:	// error or out of input: exit
+	xor %ebx,%ebx
+	mov $__NR_exit,%eax	// syscall: exit
+	int $0x80
+
+	defcode "EMIT",4,,EMIT
+	pop %eax
+	call _EMIT
+	NEXT
+_EMIT:
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+	ret
+
+	.bss
+2:	.space 1		// scratch used by EMIT
+
+	defcode "WORD",4,,WORD
+	call _WORD
+	push %ecx		// push length
+	push %edi		// push base address
+	NEXT
+
+_WORD:
+	/* Search for first non-blank character.  Also skip \ comments. */
+1:
+	call _KEY		// get next key, returned in %eax
+	cmpb $'\\',%al		// start of a comment?
+	je 3f			// if so, skip the comment
+	cmpb $' ',%al
+	jbe 1b			// if so, keep looking
+
+	/* Search for the end of the word, storing chars as we go. */
+	mov $5f,%edi		// pointer to return buffer
+2:
+	stosb			// add character to return buffer
+	call _KEY		// get next key, returned in %al
+	cmpb $' ',%al		// is blank?
+	ja 2b			// if not, keep looping
+
+	/* Return the word (well, the static buffer) and length. */
+	sub $5f,%edi
+	mov %edi,%ecx		// return length of the word
+	mov $5f,%edi		// return address of the word
+	ret
+
+	/* Code to skip \ comments to end of the current line. */
+3:
+	call _KEY
+	cmpb $'\n',%al		// end of line yet?
+	jne 3b
+	jmp 1b
+
+	.bss
+	// A static buffer where WORD returns.  Subsequent calls
+	// overwrite this buffer.  Maximum word length is 32 chars.
+5:	.space 32
+
+	defcode "EMITSTRING",10,,EMITSTRING
+	mov $1,%ebx		// 1st param: stdout
+	pop %ecx		// 2nd param: address of string
+	pop %edx		// 3rd param: length of string
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	defcode ".",1,,DOT
+	pop %eax		// Get the number to print into %eax
+	call _DOT		// Easier to do this recursively ...
+	NEXT
+_DOT:
+	mov $10,%ecx		// Base 10
+1:
+	cmp %ecx,%eax
+	jb 2f
+	xor %edx,%edx		// %edx:%eax / %ecx -> quotient %eax, remainder %edx
+	idivl %ecx
+	pushl %edx
+	call _DOT
+	popl %eax
+	jmp 1b
+2:
+	xor %ah,%ah
+	aam $10
+	cwde
+	addl $'0',%eax
+	call _EMIT
+	ret
+
+	// Parse a number from a string on the stack -- almost the opposite of . (DOT)
+	// Note that there is absolutely no error checking.  In particular the length of the
+	// string must be >= 1 bytes.
+	defcode "SNUMBER",7,,SNUMBER
+	pop %edi
+	pop %ecx
+	call _SNUMBER
+	push %eax
+	NEXT
+_SNUMBER:
+	xor %eax,%eax
+	xor %ebx,%ebx
+1:
+	imull $10,%eax		// %eax *= 10
+	movb (%edi),%bl
+	inc %edi
+	subb $'0',%bl		// ASCII -> digit
+	add %ebx,%eax
+	dec %ecx
+	jnz 1b
+	ret
+
+	defcode "FIND",4,,FIND
+	pop %edi		// %edi = address
+	pop %ecx		// %ecx = length
+	call _FIND
+	push %eax
+	NEXT
+
+_FIND:
+	push %esi		// Save %esi so we can use it in string comparison.
+
+	// Now we start searching backwards through the dictionary for this word.
+	mov var_LATEST,%edx	// LATEST points to name header of the latest word in the dictionary
+1:
+	test %edx,%edx		// NULL pointer?  (end of the linked list)
+	je 4f
+
+	// Compare the length expected and the length of the word.
+	// Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
+	// this won't pick the word (the length will appear to be wrong).
+	xor %eax,%eax
+	movb 4(%edx),%al	// %al = flags+length field
+	andb $(F_HIDDEN|0x1f),%al // %al = name length
+	cmpb %cl,%al		// Length is the same?
+	jne 2f
+
+	// Compare the strings in detail.
+	push %ecx		// Save the length
+	push %edi		// Save the address (repe cmpsb will move this pointer)
+	lea 5(%edx),%esi	// Dictionary string we are checking against.
+	repe cmpsb		// Compare the strings.
+	pop %edi
+	pop %ecx
+	jne 2f			// Not the same.
+
+	// The strings are the same - return the header pointer in %eax
+	pop %esi
+	mov %edx,%eax
+	ret
+
+2:
+	mov (%edx),%edx		// Move back through the link field to the previous word
+	jmp 1b			// .. and loop.
+
+4:	// Not found.
+	pop %esi
+	xor %eax,%eax		// Return zero to indicate not found.
+	ret
+
+	defcode ">CFA",4,,TCFA	// DEA -> Codeword address
+	pop %edi
+	call _TCFA
+	push %edi
+	NEXT
+_TCFA:
+	xor %eax,%eax
+	add $4,%edi		// Skip link pointer.
+	movb (%edi),%al		// Load flags+len into %al.
+	inc %edi		// Skip flags+len byte.
+	andb $0x1f,%al		// Just the length, not the flags.
+	add %eax,%edi		// Skip the name.
+	addl $3,%edi		// The codeword is 4-byte aligned.
+	andl $~3,%edi
+	ret
+
+	defcode "CHAR",4,,CHAR
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+	xor %eax,%eax
+	movb (%edi),%al		// Get the first character of the word.
+	push %eax		// Push it onto the stack.
+	NEXT
+
+	defcode ":",1,,COLON
+
+	// Get the word and create a dictionary entry header for it.
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+	mov %edi,%ebx		// %ebx = address of the word
+
+	movl var_HERE,%edi	// %edi is the address of the header
+	movl var_LATEST,%eax	// Get link pointer
+	stosl			// and store it in the header.
+
+	mov %cl,%al		// Get the length.
+	orb $F_HIDDEN,%al	// Set the HIDDEN flag on this entry.
+	stosb			// Store the length/flags byte.
+	push %esi
+	mov %ebx,%esi		// %esi = word
+	rep movsb		// Copy the word
+	pop %esi
+	addl $3,%edi		// Align to next 4 byte boundary.
+	andl $~3,%edi
+
+	movl $DOCOL,%eax	// The codeword for user-created words is always DOCOL (the interpreter)
+	stosl
+
+	// Header built, so now update LATEST and HERE.
+	// We'll be compiling words and putting them HERE.
+	movl var_HERE,%eax
+	movl %eax,var_LATEST
+	movl %edi,var_HERE
+
+	// And go into compile mode by setting STATE to 1.
+	movl $1,var_STATE
+	NEXT
+
+	defcode ",",1,,COMMA
+	pop %eax		// Code pointer to store.
+	call _COMMA
+	NEXT
+_COMMA:
+	movl var_HERE,%edi	// HERE
+	stosl			// Store it.
+	movl %edi,var_HERE	// Update HERE (incremented)
+	ret
+
+	defcode "HIDDEN",6,,HIDDEN
+	call _HIDDEN
+	NEXT
+_HIDDEN:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_HIDDEN,(%edi)	// Toggle the HIDDEN bit.
+	ret
+
+	defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
+	call _IMMEDIATE
+	NEXT
+_IMMEDIATE:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_IMMED,(%edi)	// Toggle the IMMED bit.
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	movl $EXIT,%eax		// EXIT is the final codeword in compiled words.
+	call _COMMA		// Store it.
+	call _HIDDEN		// Toggle the HIDDEN flag (unhides the new word).
+	xor %eax,%eax		// Set STATE to 0 (back to execute mode).
+	movl %eax,var_STATE
+	NEXT
+
+/* This definiton of ' (TICK) is strictly cheating - it also only works in compiled code. */
+	defcode "'",1,,TICK
+	lodsl			// Get the address of the next word and skip it.
+	pushl %eax		// Push it on the stack.
+	NEXT
+
+/* This interpreter is pretty simple, but remember that in FORTH you can always override
+ * it later with a more powerful one!
+ */
+	defword "INTERPRETER",11,,INTERPRETER
+	.int INTERPRET,RDROP,INTERPRETER
+
+	defcode "INTERPRET",9,,INTERPRET
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+
+	// Is it in the dictionary?
+	xor %eax,%eax
+	movl %eax,interpret_is_lit // Not a literal number (not yet anyway ...)
+	call _FIND		// Returns %eax = pointer to header or 0 if not found.
+	test %eax,%eax		// Found?
+	jz 1f
+
+	// In the dictionary.  Is it an IMMEDIATE codeword?
+	mov %eax,%edi		// %edi = dictionary entry
+	movb 4(%edi),%al	// Get name+flags.
+	push %ax		// Just save it for now.
+	call _TCFA		// Convert dictionary entry (in %edi) to codeword pointer.
+	pop %ax
+	andb $F_IMMED,%al	// Is IMMED flag set?
+	mov %edi,%eax
+	jnz 4f			// If IMMED, jump straight to executing.
+
+	jmp 2f
+
+1:	// Not in the dictionary (not a word) so assume it's a literal number.
+	incl interpret_is_lit
+	call _SNUMBER		// Returns the parsed number in %eax
+	mov %eax,%ebx
+	mov $LIT,%eax		// The word is LIT
+
+2:	// Are we compiling or executing?
+	movl var_STATE,%edx
+	test %edx,%edx
+	jz 4f			// Jump if executing.
+
+	// Compiling - just append the word to the current dictionary definition.
+	call _COMMA
+	mov interpret_is_lit,%ecx // Was it a literal?
+	test %ecx,%ecx
+	jz 3f
+	mov %ebx,%eax		// Yes, so LIT is followed by a number.
+	call _COMMA
+3:	NEXT
+
+4:	// Executing - run it!
+	mov interpret_is_lit,%ecx // Literal?
+	test %ecx,%ecx		// Literal?
+	jnz 5f
+
+	// Not a literal, execute it now.  This never returns, but the codeword will
+	// eventually call NEXT which will reenter the loop in INTERPRETER.
+	jmp *(%eax)
+
+5:	// Executing a literal, which means push it on the stack.
+	push %ebx
+	NEXT
+
+	.data
+	.align 4
+interpret_is_lit:
+	.int 0			// Flag used to record if reading a literal
+
+	// NB: SYSEXIT must be the last entry in the built-in dictionary.
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	// XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore
+	.ascii "\
+\\ Define some character constants
+: '\\n'   10 ;
+: 'SPACE' 32 ;
+: '\"'    34 ;
+: ':'     58 ;
+
+\\ CR prints a carriage return
+: CR '\\n' EMIT ;
+
+\\ SPACE prints a space
+: SPACE 'SPACE' EMIT ;
+
+\\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH.
+\\ Notice how we can trivially redefine existing functions.
+: . . SPACE ;
+
+\\ DUP, DROP are defined in assembly for speed, but this is how you might define them
+\\ in FORTH.  Notice use of the scratch variables _X and _Y.
+\\ : DUP _X ! _X @ _X @ ;
+\\ : DROP _X ! ;
+
+\\ The 2... versions of the standard operators work on pairs of stack entries.  They're not used
+\\ very commonly so not really worth writing in assembler.  Here is how they are defined in FORTH.
+: 2DUP OVER OVER ;
+: 2DROP DROP DROP ;
+
+\\ More standard FORTH words.
+: 2* 2 * ;
+: 2/ 2 / ;
+
+\\ [ and ] allow you to break into immediate mode while compiling a word.
+: [ IMMEDIATE		\\ define [ as an immediate word
+	0 STATE !	\\ go into immediate mode
+	;
+
+: ]
+	1 STATE !	\\ go back to compile mode
+	;
+
+\\ LITERAL takes whatever is on the stack and compiles LIT <foo>
+: LITERAL IMMEDIATE
+	' LIT ,		\\ compile LIT
+	,		\\ compile the literal itself (from the stack)
+	;
+
+\\ condition IF true-part THEN rest
+\\   compiles to:
+\\ condition 0BRANCH OFFSET true-part rest
+\\   where OFFSET is the offset of 'rest'
+\\ condition IF true-part ELSE false-part THEN
+\\   compiles to:
+\\ condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest
+\\   where OFFSET if the offset of false-part and OFFSET2 is the offset of rest
+
+\\ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places
+\\ the address of the 0BRANCH on the stack.  Later when we see THEN, we pop that address
+\\ off the stack, calculate the offset, and back-fill the offset.
+: IF IMMEDIATE
+	' 0BRANCH ,	\\ compile 0BRANCH
+	HERE @		\\ save location of the offset on the stack
+	0 ,		\\ compile a dummy offset
+;
+
+: THEN IMMEDIATE
+	DUP
+	HERE @ SWAP -	\\ calculate the offset from the address saved on the stack
+	SWAP !		\\ store the offset in the back-filled location
+;
+
+: ELSE IMMEDIATE
+	' BRANCH ,	\\ definite branch to just over the false-part
+	HERE @		\\ save location of the offset on the stack
+	0 ,		\\ compile a dummy offset
+	SWAP		\\ now back-fill the original (IF) offset
+	DUP		\\ same as for THEN word above
+	HERE @ SWAP -
+	SWAP !
+;
+
+\\ BEGIN loop-part condition UNTIL
+\\   compiles to:
+\\ loop-part condition 0BRANCH OFFSET
+\\   where OFFSET points back to the loop-part
+\\ This is like do { loop-part } while (condition) in the C language
+: BEGIN IMMEDIATE
+	HERE @		\\ save location on the stack
+;
+
+: UNTIL IMMEDIATE
+	' 0BRANCH ,	\\ compile 0BRANCH
+	HERE @ -	\\ calculate the offset from the address saved on the stack
+	,		\\ compile the offset here
+;
+
+\\ BEGIN loop-part AGAIN
+\\   compiles to:
+\\ loop-part BRANCH OFFSET
+\\   where OFFSET points back to the loop-part
+\\ In other words, an infinite loop which can only be returned from with EXIT
+: AGAIN IMMEDIATE
+	' BRANCH ,	\\ compile BRANCH
+	HERE @ -	\\ calculate the offset back
+	,		\\ compile the offset here
+;
+
+\\ BEGIN condition WHILE loop-part REPEAT
+\\   compiles to:
+\\ condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET
+\\   where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code
+\\ So this is like a while (condition) { loop-part } loop in the C language
+: WHILE IMMEDIATE
+	' 0BRANCH ,	\\ compile 0BRANCH
+	HERE @		\\ save location of the offset2 on the stack
+	0 ,		\\ compile a dummy offset2
+;
+
+: REPEAT IMMEDIATE
+	' BRANCH ,	\\ compile BRANCH
+	SWAP		\\ get the original offset (from BEGIN)
+	HERE @ - ,	\\ and compile it after BRANCH
+	DUP
+	HERE @ SWAP -	\\ calculate the offset2
+	SWAP !		\\ and back-fill it in the original location
+;
+
+\\ With the looping constructs, we can now write SPACES, which writes n spaces to stdout.
+: SPACES
+	BEGIN
+		SPACE	\\ print a space
+		1-	\\ until we count down to 0
+		DUP 0=
+	UNTIL
+;
+
+\\ .S prints the contents of the stack.  Very useful for debugging.
+: .S
+	DSP@		\\ get current stack pointer
+	BEGIN
+		DUP @ .		\\ print the stack element
+		4+		\\ move up
+		DUP S0 @ 4- =	\\ stop when we get to the top
+	UNTIL
+	DROP
+;
+
+\\ DEPTH returns the depth of the stack.
+: DEPTH S0 @ DSP@ - ;
+
+\\ .\" is the print string operator in FORTH.  Example: .\" Something to print\"
+\\ The space after the operator is the ordinary space required between words.
+\\ This is tricky to define because it has to do different things depending on whether
+\\ we are compiling or in immediate mode.  (Thus the word is marked IMMEDIATE so it can
+\\ detect this and do different things).
+\\ In immediate mode we just keep reading characters and printing them until we get to
+\\ the next double quote.
+\\ In compile mode we have the problem of where we're going to store the string (remember
+\\ that the input buffer where the string comes from may be overwritten by the time we
+\\ come round to running the function).  We store the string in the compiled function
+\\ like this:
+\\   LITSTRING, string length, string rounded up to 4 bytes, EMITSTRING, ...
+: .\" IMMEDIATE
+	STATE @		\\ compiling?
+	IF
+		' LITSTRING ,	\\ compile LITSTRING
+		HERE @		\\ save the address of the length word on the stack
+		0 ,		\\ dummy length - we don't know what it is yet
+		BEGIN
+			KEY 		\\ get next character of the string
+			DUP '\"' <>
+		WHILE
+			HERE @ !b	\\ store the character in the compiled image
+			HERE @ 1+ HERE ! \\ increment HERE pointer by 1 byte
+		REPEAT
+		DROP		\\ drop the double quote character at the end
+		DUP		\\ get the saved address of the length word
+		HERE @ SWAP -	\\ calculate the length
+		4-		\\ subtract 4 (because we measured from the start of the length word)
+		SWAP !		\\ and back-fill the length location
+		HERE @		\\ round up to next multiple of 4 bytes for the remaining code
+		3 +
+		3 INVERT AND
+		HERE !
+		' EMITSTRING ,	\\ compile the final EMITSTRING
+	ELSE
+		\\ In immediate mode, just read characters and print them until we get
+		\\ to the ending double quote.  Much simpler than the above code!
+		BEGIN
+			KEY
+			DUP '\"' = IF EXIT THEN
+			EMIT
+		AGAIN
+	THEN
+;
+
+\\ While compiling, [COMPILE] WORD compiles WORD if it would otherwise be IMMEDIATE.
+: [COMPILE] IMMEDIATE
+	WORD		\\ get the next word
+	FIND		\\ find it in the dictionary
+	>CFA		\\ get its codeword
+	,		\\ and compile that
+;
+
+\\ RECURSE makes a recursive call to the current word that is being compiled.
+\\ Normally while a word is being compiled, it is marked HIDDEN so that references to the
+\\ same word within are calls to the previous definition of the word.
+: RECURSE IMMEDIATE
+	LATEST @ >CFA	\\ LATEST points to the word being compiled at the moment
+	,		\\ compile it
+;
+
+
+\\ Finally print the welcome prompt.
+.\" OK \"
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.1 b/jonesforth.S.1
new file mode 100644
index 0000000..76fd353
--- /dev/null
+++ b/jonesforth.S.1
@@ -0,0 +1,217 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+DOCOL:
+	lea -4(%ebp),%ebp	// push %esi on to the return stack
+	movl %esi,(%ebp)
+
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x40
+
+	.macro defcode name, namelen, flags=0, label, link=0
+	.section .rodata
+N_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int \link		// link
+\label :
+	.int 1f			// codeword
+	.text
+1:				// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label, link=0
+	.section .rodata
+N_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int \link		// link
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	defword "COLD",4,,COLD,
+	.int KEY,ECHO,RDROP,COLD
+
+	defcode "KEY",3,,KEY,N_COLD
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	push %ax
+	inc %ebx
+	mov %ebx,(currkey)
+	NEXT
+1:
+	mov $0,%ebx		// out of input, exit (0)
+	mov $__NR_exit,%eax
+	int $0x80
+
+	defcode "ECHO",4,,ECHO,N_KEY
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	pop %eax
+	mov %al,(_echo_tmp)
+	mov $_echo_tmp,%ecx	// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	.bss
+_echo_tmp: .space 1
+
+	defcode "RDROP",5,,RDROP,N_ECHO
+	lea 4(%ebp),%ebp	// pop the return stack
+	NEXT
+
+#if 0
+	defcode "R>",2,,FROMR,N_TAIL
+
+	defcode ">R",2,,TOR,N_FROMR
+
+	defcode ":",1,,COLON,
+	call nextword		// get next word, the procedure name
+	// The next word is returned in %ebx and has length %ecx bytes.
+
+	// Save the current value of VOCAB.
+	mov v_vocab,%eax
+	push %eax
+
+	// Change VOCAB to point to our new word's header (at LATEST).
+	mov v_latest,%edi
+	mov %edi,v_vocab
+
+	// We'll start by writing the word's header at LATEST; the header
+	// is just length byte, the word itself, link pointer.
+	mov %ecx,(%edi)		// Length byte
+	inc %edi
+	mov %ebx,%esi		// Copy the string.
+	rep movsb
+	// Round up to the next multiple of 4 so that the link pointer
+	// is aligned.
+	or $3,%edi
+	inc %edi
+	pop %eax		// Link pointer, points to old VOCAB.
+	mov %eax,(%edi)
+	add $4,%edi
+	// Write the codeword, which for user-defined words is always a
+	// pointer to the FORTH indirect threaded interpreter.
+	movl $DOCOL,(%edi)
+	add $4,%edi
+
+	// Finally, update LATEST.  As we go along compiling, we'll be
+	// writing compiled codewords to the LATEST pointer (and moving
+	// it along each time).
+	mov %edi,v_latest
+
+	movl $1,v_state		// go into compiling mode
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON,N_COLON
+	// XXX
+
+#endif
+
+	defcode SYSEXIT,7,,SYSEXIT, //N_COLON
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Variables containing the interpreter's state.
+ */
+	.data
+
+	.align 4
+v_state:
+	.int 0		// 0 = immediate, 1 = compiling
+v_vocab:
+	.int N_SYSEXIT	// last word in the dictionary
+v_latest:
+	.int user_defs_start	// pointer to next space for user definition or current compiled def
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	.ascii "TEST OF READING WORDS   1 2 3"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.10 b/jonesforth.S.10
new file mode 100644
index 0000000..0a40b05
--- /dev/null
+++ b/jonesforth.S.10
@@ -0,0 +1,673 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NOTES-------------------------------------------------------------------------------------------------------------------
+
+Need to say something about $ before constants.
+
+And about je/jne/ja/jb/jbe/etc
+
+
+
+
+*/
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	lea -4(%ebp),%ebp	// push reg on to return stack
+	movl \reg,(%ebp)
+	.endm
+
+	.macro POPRSP reg
+	mov (%ebp),\reg		// pop top of return stack to reg
+	lea 4(%ebp),%ebp
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	PUSHRSP %esi		// push %esi on to the return stack
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x20
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int code_\label	// codeword
+	.text
+	.align 4
+	.globl code_\label
+code_\label :			// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	.macro defvar name, namelen, flags=0, label, initial=0
+	defcode \name,\namelen,\flags,\label
+	push $var_\name
+	NEXT
+	.data
+	.align 4
+var_\name :
+	.int \initial
+	.endm
+
+	// Some easy ones, written in assembly for speed
+	defcode "DROP",4,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+	defcode "OVER",4,,OVER
+	mov 4(%esp),%eax	// get the second element of stack
+	push %eax		// and push it on top
+	NEXT
+
+	defcode "1+",2,,INCR
+	incl (%esp)		// increment top of stack
+	NEXT
+
+	defcode "1-",2,,DECR
+	decl (%esp)		// decrement top of stack
+	NEXT
+
+	defcode "+",1,,ADD
+	pop %eax
+	addl %eax,(%esp)
+	NEXT
+
+	defcode "-",1,,SUB
+	pop %eax
+	subl %eax,(%esp)
+	NEXT
+
+	defcode "*",1,,MUL
+	pop %eax
+	pop %ebx
+	imull %ebx,%eax
+	push %eax		// ignore overflow
+	NEXT
+
+	defcode "/",1,,DIV
+	xor %edx,%edx
+	pop %ebx
+	pop %eax
+	idivl %ebx
+	push %eax		// push quotient
+	NEXT
+
+	defcode "MOD",3,,MOD
+	xor %edx,%edx
+	pop %ebx
+	pop %eax
+	idivl %ebx
+	push %edx		// push remainder
+	NEXT
+
+	// COLD must not return (ie. must not call EXIT).
+	defword "COLD",4,,COLD
+	// XXX reinitialisation of the interpreter
+	.int INTERPRETER	// call the interpreter loop (never returns)
+	.int LIT,1,SYSEXIT	// hmmm, but in case it does, exit(1).
+
+	defcode "EXIT",4,,EXIT
+	POPRSP %esi		// pop return stack into %esi
+	NEXT
+
+	defcode "LIT",3,,LIT
+	// %esi points to the next command, but in this case it points to the next
+	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
+	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
+	lodsl
+	push %eax		// push the literal number on to stack
+	NEXT
+
+#if 0
+	defcode "0SKIP",5,,ZSKIP
+	// If the top of stack is zero, skip the next instruction.
+	pop %eax
+	test %eax,%eax
+	jnz 1f
+	lodsl			// this does the skip
+1:	NEXT
+#endif
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	// The STATE variable is 0 for execute mode, != 0 for compile mode
+	defvar "STATE",5,,STATE
+
+	// This points to where compiled words go.
+	defvar "HERE",4,,HERE,user_defs_start
+
+	// This is the last definition in the dictionary.
+	defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
+
+	// _X, _Y and _Z are scratch variables used by standard words.
+	defvar "_X",2,,TX
+	defvar "_Y",2,,TY
+	defvar "_Z",2,,TZ
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	PUSHRSP %eax		// push it on to the return stack
+	NEXT
+
+	defcode "R>",2,,FROMR
+	POPRSP %eax		// pop return stack on to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop return stack and throw away
+	NEXT
+
+	defcode "KEY",3,,KEY
+	call _KEY
+	push %eax		// push return value on stack
+	NEXT
+_KEY:
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	inc %ebx
+	mov %ebx,(currkey)
+	ret
+
+1:	// out of input; use read(2) to fetch more input from stdin
+	xor %ebx,%ebx		// 1st param: stdin
+	mov $buffer,%ecx	// 2nd param: buffer
+	mov %ecx,currkey
+	mov $buffend-buffer,%edx // 3rd param: max length
+	mov $__NR_read,%eax	// syscall: read
+	int $0x80
+	test %eax,%eax		// If %eax <= 0, then exit.
+	jbe 2f
+	addl %eax,%ecx		// buffer+%eax = bufftop
+	mov %ecx,bufftop
+	jmp _KEY
+
+2:	// error or out of input: exit
+	xor %ebx,%ebx
+	mov $__NR_exit,%eax	// syscall: exit
+	int $0x80
+
+	defcode "EMIT",4,,EMIT
+	pop %eax
+	call _EMIT
+	NEXT
+_EMIT:
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+	ret
+
+	.bss
+2:	.space 1		// scratch used by EMIT
+
+	defcode "WORD",4,,WORD
+	call _WORD
+	push %ecx		// push length
+	push %edi		// push base address
+	NEXT
+
+_WORD:
+	/* Search for first non-blank character.  Also skip \ comments. */
+1:
+	call _KEY		// get next key, returned in %eax
+	cmpb $'\\',%al		// start of a comment?
+	je 3f			// if so, skip the comment
+	cmpb $' ',%al
+	jbe 1b			// if so, keep looking
+
+	/* Search for the end of the word, storing chars as we go. */
+	mov $5f,%edi		// pointer to return buffer
+2:
+	stosb			// add character to return buffer
+	call _KEY		// get next key, returned in %al
+	cmpb $' ',%al		// is blank?
+	ja 2b			// if not, keep looping
+
+	/* Return the word (well, the static buffer) and length. */
+	sub $5f,%edi
+	mov %edi,%ecx		// return length of the word
+	mov $5f,%edi		// return address of the word
+	ret
+
+	/* Code to skip \ comments to end of the current line. */
+3:
+	call _KEY
+	cmpb $'\n',%al		// end of line yet?
+	jne 3b
+	jmp 1b
+
+	.bss
+	// A static buffer where WORD returns.  Subsequent calls
+	// overwrite this buffer.  Maximum word length is 32 chars.
+5:	.space 32
+
+	defcode "EMITWORD",8,,EMITWORD
+	mov $1,%ebx		// 1st param: stdout
+	pop %ecx		// 2nd param: address of string
+	pop %edx		// 3rd param: length of string
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	defcode ".",1,,DOT
+	pop %eax		// Get the number to print into %eax
+	call _DOT		// Easier to do this recursively ...
+	NEXT
+_DOT:
+	mov $10,%ecx		// Base 10
+1:
+	cmp %ecx,%eax
+	jb 2f
+	xor %edx,%edx		// %edx:%eax / %ecx -> quotient %eax, remainder %edx
+	idivl %ecx
+	pushl %edx
+	call _DOT
+	popl %eax
+	jmp 1b
+2:
+	xor %ah,%ah
+	aam $10
+	cwde
+	addl $'0',%eax
+	call _EMIT
+	ret
+
+	// Parse a number from a string on the stack -- almost the opposite of . (DOT)
+	// Note that there is absolutely no error checking.  In particular the length of the
+	// string must be >= 1 bytes.
+	defcode "SNUMBER",7,,SNUMBER
+	pop %edi
+	pop %ecx
+	call _SNUMBER
+	push %eax
+	NEXT
+_SNUMBER:
+	xor %eax,%eax
+	xor %ebx,%ebx
+1:
+	imull $10,%eax		// %eax *= 10
+	movb (%edi),%bl
+	inc %edi
+	subb $'0',%bl		// ASCII -> digit
+	add %ebx,%eax
+	dec %ecx
+	jnz 1b
+	ret
+
+	defcode "FIND",4,,FIND
+	pop %edi		// %edi = address
+	pop %ecx		// %ecx = length
+	call _FIND
+	push %eax
+	NEXT
+
+_FIND:
+	push %esi		// Save %esi so we can use it in string comparison.
+
+	// Now we start searching backwards through the dictionary for this word.
+	mov var_LATEST,%edx	// LATEST points to name header of the latest word in the dictionary
+1:
+	test %edx,%edx		// NULL pointer?  (end of the linked list)
+	je 4f
+
+	// Compare the length expected and the length of the word.
+	// Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
+	// this won't pick the word (the length will appear to be wrong).
+	xor %eax,%eax
+	movb 4(%edx),%al	// %al = flags+length field
+	andb $(F_HIDDEN|0x1f),%al // %al = name length
+	cmpb %cl,%al		// Length is the same?
+	jne 2f
+
+	// Compare the strings in detail.
+	push %ecx		// Save the length
+	push %edi		// Save the address (repe cmpsb will move this pointer)
+	lea 5(%edx),%esi	// Dictionary string we are checking against.
+	repe cmpsb		// Compare the strings.
+	pop %edi
+	pop %ecx
+	jne 2f			// Not the same.
+
+	// The strings are the same - return the header pointer in %eax
+	pop %esi
+	mov %edx,%eax
+	ret
+
+2:
+	mov (%edx),%edx		// Move back through the link field to the previous word
+	jmp 1b			// .. and loop.
+
+4:	// Not found.
+	pop %esi
+	xor %eax,%eax		// Return zero to indicate not found.
+	ret
+
+	defcode ">CFA",4,,TCFA	// DEA -> Codeword address
+	pop %edi
+	call _TCFA
+	push %edi
+	NEXT
+_TCFA:
+	xor %eax,%eax
+	add $4,%edi		// Skip link pointer.
+	movb (%edi),%al		// Load flags+len into %al.
+	inc %edi		// Skip flags+len byte.
+	andb $0x1f,%al		// Just the length, not the flags.
+	add %eax,%edi		// Skip the name.
+	addl $3,%edi		// The codeword is 4-byte aligned.
+	andl $~3,%edi
+	ret
+
+#if 0
+	defword "'",1,,TICK
+	.int WORD		// Get the following word.
+	.int FIND		// Look it up in the dictionary.
+	.int DUP		// If not found, skip >CFA (TCFA) instruction.
+	.int ZSKIP
+	.int TCFA		// Convert to a codeword pointer.
+	.int EXIT		// Return.
+#endif
+
+	defcode ":",1,,COLON
+
+	// Get the word and create a dictionary entry header for it.
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+	mov %edi,%ebx		// %ebx = address of the word
+
+	movl var_HERE,%edi	// %edi is the address of the header
+	movl var_LATEST,%eax	// Get link pointer
+	stosl			// and store it in the header.
+
+	mov %cl,%al		// Get the length.
+	orb $F_HIDDEN,%al	// Set the HIDDEN flag on this entry.
+	stosb			// Store the length/flags byte.
+	push %esi
+	mov %ebx,%esi		// %esi = word
+	rep movsb		// Copy the word
+	pop %esi
+	addl $3,%edi		// Align to next 4 byte boundary.
+	andl $~3,%edi
+
+	movl $DOCOL,%eax	// The codeword for user-created words is always DOCOL (the interpreter)
+	stosl
+
+	// Header built, so now update LATEST and HERE.
+	// We'll be compiling words and putting them HERE.
+	movl var_HERE,%eax
+	movl %eax,var_LATEST
+	movl %edi,var_HERE
+
+	// And go into compile mode by setting STATE to 1.
+	xor %eax,%eax
+	inc %eax
+	mov %eax,var_STATE
+	NEXT
+
+	defcode ",",1,,COMMA
+	pop %eax		// Code pointer to store.
+	call _COMMA
+	NEXT
+_COMMA:
+	movl var_HERE,%edi	// HERE
+	stosl			// Store it.
+	movl %edi,var_HERE	// Update HERE (incremented)
+	ret
+
+	defcode "HIDDEN",6,,HIDDEN
+	call _HIDDEN
+	NEXT
+_HIDDEN:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_HIDDEN,(%edi)	// Toggle the HIDDEN bit.
+	ret
+
+	defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
+	call _IMMEDIATE
+	NEXT
+_IMMEDIATE:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_IMMED,(%edi)	// Toggle the IMMED bit.
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	movl $EXIT,%eax		// EXIT is the final codeword in compiled words.
+	call _COMMA		// Store it.
+	call _HIDDEN		// Toggle the HIDDEN flag (unhides the new word).
+	xor %eax,%eax		// Set STATE to 0 (back to execute mode).
+	movl %eax,var_STATE
+	NEXT
+
+/* This interpreter is pretty simple, but remember that in FORTH you can always override
+ * it later with a more powerful one!
+ */
+	defword "INTERPRETER",11,,INTERPRETER
+	.int INTERPRET,RDROP,INTERPRETER
+
+	defcode "INTERPRET",9,,INTERPRET
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+
+	// Is it in the dictionary?
+	call _FIND		// Returns %eax = pointer to header or 0 if not found.
+	test %eax,%eax		// Found?
+	jz 1f
+
+	// In the dictionary.  Is it an IMMEDIATE codeword?
+	mov %eax,%edi		// %edi = dictionary entry
+	movb 4(%edi),%al	// Get name+flags.
+	push %ax		// Just save it for now.
+	call _TCFA		// Convert dictionary entry (in %edi) to codeword pointer.
+	pop %ax
+	andb $F_IMMED,%al	// Is IMMED flag set?
+	mov %edi,%eax
+	jnz 3f			// If IMMED, jump straight to executing.
+
+	jmp 2f
+
+1:	// Not in the dictionary (not a word) so assume it's a number.
+	call _SNUMBER		// Returns the parsed number in %eax
+	mov %eax,%ebx
+	mov $LIT,%eax		// The word is LIT
+
+2:	// Are we compiling or executing?
+	movl var_STATE,%edx
+	test %edx,%edx
+	jz 3f			// Jump if executing.
+
+	// Compiling - just append the word to the current dictionary definition.
+	call _COMMA
+	cmp $LIT,%eax		// Was it LIT?
+	jne 4f
+	mov %ebx,%eax		// Yes, so LIT is followed by a number.
+	call _COMMA
+	NEXT
+
+3:	// Executing - run it!
+	cmp $LIT,%eax		// Literal?
+	je 4f
+	// Not a literal, execute it now.  This never returns, but the codeword will
+	// eventually call NEXT which will reenter the loop in INTERPRETER.
+	jmp *(%eax)
+
+4:	// Executing a literal, which means push it on the stack.
+	push %ebx
+	NEXT
+
+	// NB: SYSEXIT must be the last entry in the built-in dictionary.
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	// XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore
+	.ascii "\
+\\ Define some character constants
+: '\\n'   10 ;
+: 'SPACE' 32 ;
+: '\"'    34 ;
+
+\\ CR prints a carriage return
+: CR '\\n' EMIT ;
+
+\\ SPACE prints a space
+: SPACE 'SPACE' EMIT ;
+
+\\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH.
+\\ Notice how we can trivially redefine existing functions.
+: . . SPACE ;
+
+\\ XXX SPACES
+
+\\ DUP, DROP are defined in assembly for speed, but this is how you might define them
+\\ in FORTH.  Notice use of the scratch variables _X and _Y.
+\\ : DUP _X ! _X @ _X @ ;
+\\ : DROP _X ! ;
+
+
+
+\\ Finally print the welcome prompt.
+79 EMIT 75 EMIT 'SPACE' EMIT
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.11 b/jonesforth.S.11
new file mode 100644
index 0000000..3f87d36
--- /dev/null
+++ b/jonesforth.S.11
@@ -0,0 +1,914 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NOTES-------------------------------------------------------------------------------------------------------------------
+
+Need to say something about $ before constants.
+
+And about je/jne/ja/jb/jbe/etc
+
+
+
+
+*/
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	lea -4(%ebp),%ebp	// push reg on to return stack
+	movl \reg,(%ebp)
+	.endm
+
+	.macro POPRSP reg
+	mov (%ebp),\reg		// pop top of return stack to reg
+	lea 4(%ebp),%ebp
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	PUSHRSP %esi		// push %esi on to the return stack
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x20
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int code_\label	// codeword
+	.text
+	.align 4
+	.globl code_\label
+code_\label :			// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	.macro defvar name, namelen, flags=0, label, initial=0
+	defcode \name,\namelen,\flags,\label
+	push $var_\name
+	NEXT
+	.data
+	.align 4
+var_\name :
+	.int \initial
+	.endm
+
+	// Some easy ones, written in assembly for speed
+	defcode "DROP",4,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+	defcode "OVER",4,,OVER
+	mov 4(%esp),%eax	// get the second element of stack
+	push %eax		// and push it on top
+	NEXT
+
+	defcode "1+",2,,INCR
+	incl (%esp)		// increment top of stack
+	NEXT
+
+	defcode "1-",2,,DECR
+	decl (%esp)		// decrement top of stack
+	NEXT
+
+	defcode "+",1,,ADD
+	pop %eax
+	addl %eax,(%esp)
+	NEXT
+
+	defcode "-",1,,SUB
+	pop %eax
+	subl %eax,(%esp)
+	NEXT
+
+	defcode "*",1,,MUL
+	pop %eax
+	pop %ebx
+	imull %ebx,%eax
+	push %eax		// ignore overflow
+	NEXT
+
+	defcode "/",1,,DIV
+	xor %edx,%edx
+	pop %ebx
+	pop %eax
+	idivl %ebx
+	push %eax		// push quotient
+	NEXT
+
+	defcode "MOD",3,,MOD
+	xor %edx,%edx
+	pop %ebx
+	pop %eax
+	idivl %ebx
+	push %edx		// push remainder
+	NEXT
+
+	defcode "=",1,,EQU	// top two words are equal?
+	pop %eax
+	pop %ebx
+	cmp %ebx,%eax
+	je 1f
+	pushl $0
+	NEXT
+1:	pushl $1
+	NEXT
+
+	defcode "<>",2,,NEQU	// top two words are not equal?
+	pop %eax
+	pop %ebx
+	cmp %ebx,%eax
+	je 1f
+	pushl $1
+	NEXT
+1:	pushl $0
+	NEXT
+
+	defcode "0=",2,,ZEQU	// top of stack equals 0?
+	pop %eax
+	test %eax,%eax
+	jz 1f
+	pushl $0
+	NEXT
+1:	pushl $1
+	NEXT
+
+	defcode "AND",3,,AND
+	pop %eax
+	andl %eax,(%esp)
+	NEXT
+
+	defcode "OR",2,,OR
+	pop %eax
+	orl %eax,(%esp)
+	NEXT
+
+	defcode "INVERT",6,,INVERT
+	notl (%esp)
+	NEXT
+
+	// COLD must not return (ie. must not call EXIT).
+	defword "COLD",4,,COLD
+	// XXX reinitialisation of the interpreter
+	.int INTERPRETER	// call the interpreter loop (never returns)
+	.int LIT,1,SYSEXIT	// hmmm, but in case it does, exit(1).
+
+	defcode "EXIT",4,,EXIT
+	POPRSP %esi		// pop return stack into %esi
+	NEXT
+
+	defcode "LIT",3,,LIT
+	// %esi points to the next command, but in this case it points to the next
+	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
+	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
+	lodsl
+	push %eax		// push the literal number on to stack
+	NEXT
+
+	defcode "LITSTRING",9,,LITSTRING
+	lodsl			// get the length of the string
+	push %eax		// push it on the stack
+	push %esi		// push the address of the start of the string
+	addl %eax,%esi		// skip past the string
+	addl $3,%esi		// but round up to next 4 byte boundary
+	andl $~3,%esi
+	NEXT
+
+	defcode "BRANCH",6,,BRANCH
+	add (%esi),%esi		// add the offset to the instruction pointer
+	NEXT
+
+	defcode "0BRANCH",7,,ZBRANCH
+	pop %eax
+	test %eax,%eax		// top of stack is zero?
+	jz code_BRANCH		// if so, jump back to the branch function above
+	lodsl			// otherwise we need to skip the offset
+	NEXT
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+/* ! and @ (STORE and FETCH) store 32-bit words.  It's also useful to be able to read and write bytes.
+ * I don't know whether FORTH has these words, so I invented my own, called !b and @b.
+ * Byte-oriented operations only work on architectures which permit them (i386 is one of those).
+ */
+	defcode "!b",2,,STOREBYTE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	movb %al,(%ebx)		// store it
+	NEXT
+
+	defcode "@b",2,,FETCHBYTE
+	pop %ebx		// address to fetch
+	xor %eax,%eax
+	movb (%ebx),%al		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	// The STATE variable is 0 for execute mode, != 0 for compile mode
+	defvar "STATE",5,,STATE
+
+	// This points to where compiled words go.
+	defvar "HERE",4,,HERE,user_defs_start
+
+	// This is the last definition in the dictionary.
+	defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
+
+	// _X, _Y and _Z are scratch variables used by standard words.
+	defvar "_X",2,,TX
+	defvar "_Y",2,,TY
+	defvar "_Z",2,,TZ
+
+	defcode "DSP@",4,,DSPFETCH
+	mov %esp,%eax
+	push %eax
+	NEXT
+
+	defcode "DSP!",4,,DSPSTORE
+	pop %esp
+	NEXT
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	PUSHRSP %eax		// push it on to the return stack
+	NEXT
+
+	defcode "R>",2,,FROMR
+	POPRSP %eax		// pop return stack on to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop return stack and throw away
+	NEXT
+
+	defcode "KEY",3,,KEY
+	call _KEY
+	push %eax		// push return value on stack
+	NEXT
+_KEY:
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	inc %ebx
+	mov %ebx,(currkey)
+	ret
+
+1:	// out of input; use read(2) to fetch more input from stdin
+	xor %ebx,%ebx		// 1st param: stdin
+	mov $buffer,%ecx	// 2nd param: buffer
+	mov %ecx,currkey
+	mov $buffend-buffer,%edx // 3rd param: max length
+	mov $__NR_read,%eax	// syscall: read
+	int $0x80
+	test %eax,%eax		// If %eax <= 0, then exit.
+	jbe 2f
+	addl %eax,%ecx		// buffer+%eax = bufftop
+	mov %ecx,bufftop
+	jmp _KEY
+
+2:	// error or out of input: exit
+	xor %ebx,%ebx
+	mov $__NR_exit,%eax	// syscall: exit
+	int $0x80
+
+	defcode "EMIT",4,,EMIT
+	pop %eax
+	call _EMIT
+	NEXT
+_EMIT:
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+	ret
+
+	.bss
+2:	.space 1		// scratch used by EMIT
+
+	defcode "WORD",4,,WORD
+	call _WORD
+	push %ecx		// push length
+	push %edi		// push base address
+	NEXT
+
+_WORD:
+	/* Search for first non-blank character.  Also skip \ comments. */
+1:
+	call _KEY		// get next key, returned in %eax
+	cmpb $'\\',%al		// start of a comment?
+	je 3f			// if so, skip the comment
+	cmpb $' ',%al
+	jbe 1b			// if so, keep looking
+
+	/* Search for the end of the word, storing chars as we go. */
+	mov $5f,%edi		// pointer to return buffer
+2:
+	stosb			// add character to return buffer
+	call _KEY		// get next key, returned in %al
+	cmpb $' ',%al		// is blank?
+	ja 2b			// if not, keep looping
+
+	/* Return the word (well, the static buffer) and length. */
+	sub $5f,%edi
+	mov %edi,%ecx		// return length of the word
+	mov $5f,%edi		// return address of the word
+	ret
+
+	/* Code to skip \ comments to end of the current line. */
+3:
+	call _KEY
+	cmpb $'\n',%al		// end of line yet?
+	jne 3b
+	jmp 1b
+
+	.bss
+	// A static buffer where WORD returns.  Subsequent calls
+	// overwrite this buffer.  Maximum word length is 32 chars.
+5:	.space 32
+
+	defcode "EMITSTRING",10,,EMITSTRING
+	mov $1,%ebx		// 1st param: stdout
+	pop %ecx		// 2nd param: address of string
+	pop %edx		// 3rd param: length of string
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	defcode ".",1,,DOT
+	pop %eax		// Get the number to print into %eax
+	call _DOT		// Easier to do this recursively ...
+	NEXT
+_DOT:
+	mov $10,%ecx		// Base 10
+1:
+	cmp %ecx,%eax
+	jb 2f
+	xor %edx,%edx		// %edx:%eax / %ecx -> quotient %eax, remainder %edx
+	idivl %ecx
+	pushl %edx
+	call _DOT
+	popl %eax
+	jmp 1b
+2:
+	xor %ah,%ah
+	aam $10
+	cwde
+	addl $'0',%eax
+	call _EMIT
+	ret
+
+	// Parse a number from a string on the stack -- almost the opposite of . (DOT)
+	// Note that there is absolutely no error checking.  In particular the length of the
+	// string must be >= 1 bytes.
+	defcode "SNUMBER",7,,SNUMBER
+	pop %edi
+	pop %ecx
+	call _SNUMBER
+	push %eax
+	NEXT
+_SNUMBER:
+	xor %eax,%eax
+	xor %ebx,%ebx
+1:
+	imull $10,%eax		// %eax *= 10
+	movb (%edi),%bl
+	inc %edi
+	subb $'0',%bl		// ASCII -> digit
+	add %ebx,%eax
+	dec %ecx
+	jnz 1b
+	ret
+
+	defcode "FIND",4,,FIND
+	pop %edi		// %edi = address
+	pop %ecx		// %ecx = length
+	call _FIND
+	push %eax
+	NEXT
+
+_FIND:
+	push %esi		// Save %esi so we can use it in string comparison.
+
+	// Now we start searching backwards through the dictionary for this word.
+	mov var_LATEST,%edx	// LATEST points to name header of the latest word in the dictionary
+1:
+	test %edx,%edx		// NULL pointer?  (end of the linked list)
+	je 4f
+
+	// Compare the length expected and the length of the word.
+	// Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
+	// this won't pick the word (the length will appear to be wrong).
+	xor %eax,%eax
+	movb 4(%edx),%al	// %al = flags+length field
+	andb $(F_HIDDEN|0x1f),%al // %al = name length
+	cmpb %cl,%al		// Length is the same?
+	jne 2f
+
+	// Compare the strings in detail.
+	push %ecx		// Save the length
+	push %edi		// Save the address (repe cmpsb will move this pointer)
+	lea 5(%edx),%esi	// Dictionary string we are checking against.
+	repe cmpsb		// Compare the strings.
+	pop %edi
+	pop %ecx
+	jne 2f			// Not the same.
+
+	// The strings are the same - return the header pointer in %eax
+	pop %esi
+	mov %edx,%eax
+	ret
+
+2:
+	mov (%edx),%edx		// Move back through the link field to the previous word
+	jmp 1b			// .. and loop.
+
+4:	// Not found.
+	pop %esi
+	xor %eax,%eax		// Return zero to indicate not found.
+	ret
+
+	defcode ">CFA",4,,TCFA	// DEA -> Codeword address
+	pop %edi
+	call _TCFA
+	push %edi
+	NEXT
+_TCFA:
+	xor %eax,%eax
+	add $4,%edi		// Skip link pointer.
+	movb (%edi),%al		// Load flags+len into %al.
+	inc %edi		// Skip flags+len byte.
+	andb $0x1f,%al		// Just the length, not the flags.
+	add %eax,%edi		// Skip the name.
+	addl $3,%edi		// The codeword is 4-byte aligned.
+	andl $~3,%edi
+	ret
+
+	defcode "CHAR",4,,CHAR
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+	xor %eax,%eax
+	movb (%edi),%al		// Get the first character of the word.
+	push %eax		// Push it onto the stack.
+	NEXT
+
+	defcode ":",1,,COLON
+
+	// Get the word and create a dictionary entry header for it.
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+	mov %edi,%ebx		// %ebx = address of the word
+
+	movl var_HERE,%edi	// %edi is the address of the header
+	movl var_LATEST,%eax	// Get link pointer
+	stosl			// and store it in the header.
+
+	mov %cl,%al		// Get the length.
+	orb $F_HIDDEN,%al	// Set the HIDDEN flag on this entry.
+	stosb			// Store the length/flags byte.
+	push %esi
+	mov %ebx,%esi		// %esi = word
+	rep movsb		// Copy the word
+	pop %esi
+	addl $3,%edi		// Align to next 4 byte boundary.
+	andl $~3,%edi
+
+	movl $DOCOL,%eax	// The codeword for user-created words is always DOCOL (the interpreter)
+	stosl
+
+	// Header built, so now update LATEST and HERE.
+	// We'll be compiling words and putting them HERE.
+	movl var_HERE,%eax
+	movl %eax,var_LATEST
+	movl %edi,var_HERE
+
+	// And go into compile mode by setting STATE to 1.
+	movl $1,var_STATE
+	NEXT
+
+	defcode ",",1,,COMMA
+	pop %eax		// Code pointer to store.
+	call _COMMA
+	NEXT
+_COMMA:
+	movl var_HERE,%edi	// HERE
+	stosl			// Store it.
+	movl %edi,var_HERE	// Update HERE (incremented)
+	ret
+
+	defcode "HIDDEN",6,,HIDDEN
+	call _HIDDEN
+	NEXT
+_HIDDEN:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_HIDDEN,(%edi)	// Toggle the HIDDEN bit.
+	ret
+
+	defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
+	call _IMMEDIATE
+	NEXT
+_IMMEDIATE:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_IMMED,(%edi)	// Toggle the IMMED bit.
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	movl $EXIT,%eax		// EXIT is the final codeword in compiled words.
+	call _COMMA		// Store it.
+	call _HIDDEN		// Toggle the HIDDEN flag (unhides the new word).
+	xor %eax,%eax		// Set STATE to 0 (back to execute mode).
+	movl %eax,var_STATE
+	NEXT
+
+/* This definiton of ' (TICK) is strictly cheating - it also only works in compiled code. */
+	defcode "'",1,,TICK
+	lodsl			// Get the address of the next word and skip it.
+	pushl %eax		// Push it on the stack.
+	NEXT
+
+/* This interpreter is pretty simple, but remember that in FORTH you can always override
+ * it later with a more powerful one!
+ */
+	defword "INTERPRETER",11,,INTERPRETER
+	.int INTERPRET,RDROP,INTERPRETER
+
+	defcode "INTERPRET",9,,INTERPRET
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+
+	// Is it in the dictionary?
+	xor %eax,%eax
+	movl %eax,interpret_is_lit // Not a literal number (not yet anyway ...)
+	call _FIND		// Returns %eax = pointer to header or 0 if not found.
+	test %eax,%eax		// Found?
+	jz 1f
+
+	// In the dictionary.  Is it an IMMEDIATE codeword?
+	mov %eax,%edi		// %edi = dictionary entry
+	movb 4(%edi),%al	// Get name+flags.
+	push %ax		// Just save it for now.
+	call _TCFA		// Convert dictionary entry (in %edi) to codeword pointer.
+	pop %ax
+	andb $F_IMMED,%al	// Is IMMED flag set?
+	mov %edi,%eax
+	jnz 4f			// If IMMED, jump straight to executing.
+
+	jmp 2f
+
+1:	// Not in the dictionary (not a word) so assume it's a literal number.
+	incl interpret_is_lit
+	call _SNUMBER		// Returns the parsed number in %eax
+	mov %eax,%ebx
+	mov $LIT,%eax		// The word is LIT
+
+2:	// Are we compiling or executing?
+	movl var_STATE,%edx
+	test %edx,%edx
+	jz 4f			// Jump if executing.
+
+	// Compiling - just append the word to the current dictionary definition.
+	call _COMMA
+	mov interpret_is_lit,%ecx // Was it a literal?
+	test %ecx,%ecx
+	jz 3f
+	mov %ebx,%eax		// Yes, so LIT is followed by a number.
+	call _COMMA
+3:	NEXT
+
+4:	// Executing - run it!
+	mov interpret_is_lit,%ecx // Literal?
+	test %ecx,%ecx		// Literal?
+	jnz 5f
+
+	// Not a literal, execute it now.  This never returns, but the codeword will
+	// eventually call NEXT which will reenter the loop in INTERPRETER.
+	jmp *(%eax)
+
+5:	// Executing a literal, which means push it on the stack.
+	push %ebx
+	NEXT
+
+	.data
+	.align 4
+interpret_is_lit:
+	.int 0			// Flag used to record if reading a literal
+
+	// NB: SYSEXIT must be the last entry in the built-in dictionary.
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	// XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore
+	.ascii "\
+\\ Define some character constants
+: '\\n'   10 ;
+: 'SPACE' 32 ;
+: '\"'    34 ;
+: ':'     58 ;
+
+\\ CR prints a carriage return
+: CR '\\n' EMIT ;
+
+\\ SPACE prints a space
+: SPACE 'SPACE' EMIT ;
+
+\\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH.
+\\ Notice how we can trivially redefine existing functions.
+: . . SPACE ;
+
+\\ DUP, DROP are defined in assembly for speed, but this is how you might define them
+\\ in FORTH.  Notice use of the scratch variables _X and _Y.
+\\ : DUP _X ! _X @ _X @ ;
+\\ : DROP _X ! ;
+
+\\ [ and ] allow you to break into immediate mode while compiling a word.
+: [ IMMEDIATE		\\ define [ as an immediate word
+	0 STATE !	\\ go into immediate mode
+	;
+
+: ]
+	1 STATE !	\\ go back to compile mode
+	;
+
+\\ LITERAL takes whatever is on the stack and compiles LIT <foo>
+: LITERAL IMMEDIATE
+	' LIT ,		\\ compile LIT
+	,		\\ compile the literal itself (from the stack)
+	;
+
+\\ condition IF true-part THEN rest
+\\   compiles to:
+\\ condition 0BRANCH OFFSET true-part rest
+\\   where OFFSET is the offset of 'rest'
+\\ condition IF true-part ELSE false-part THEN
+\\   compiles to:
+\\ condition 0BRANCH OFFSET true-part BRANCH OFFSET2 false-part rest
+\\   where OFFSET if the offset of false-part and OFFSET2 is the offset of rest
+
+\\ IF is an IMMEDIATE word which compiles 0BRANCH followed by a dummy offset, and places
+\\ the address of the 0BRANCH on the stack.  Later when we see THEN, we pop that address
+\\ off the stack, calculate the offset, and back-fill the offset.
+: IF IMMEDIATE
+	' 0BRANCH ,	\\ compile 0BRANCH
+	HERE @		\\ save location of the offset on the stack
+	0 ,		\\ compile a dummy offset
+;
+
+: THEN IMMEDIATE
+	DUP
+	HERE @ SWAP -	\\ calculate the offset from the address saved on the stack
+	SWAP !		\\ store the offset in the back-filled location
+;
+
+: ELSE IMMEDIATE
+	' BRANCH ,	\\ definite branch to just over the false-part
+	HERE @		\\ save location of the offset on the stack
+	0 ,		\\ compile a dummy offset
+	SWAP		\\ now back-fill the original (IF) offset
+	DUP		\\ same as for THEN word above
+	HERE @ SWAP -
+	SWAP !
+;
+
+\\ BEGIN loop-part condition UNTIL
+\\   compiles to:
+\\ loop-part condition 0BRANCH OFFSET
+\\   where OFFSET points back to the loop-part
+\\ This is like do { loop-part } while (condition) in the C language
+: BEGIN IMMEDIATE
+	HERE @		\\ save location on the stack
+;
+
+: UNTIL IMMEDIATE
+	' 0BRANCH ,	\\ compile 0BRANCH
+	HERE @ -	\\ calculate the offset from the address saved on the stack
+	,		\\ compile the offset here
+;
+
+\\ BEGIN loop-part AGAIN
+\\   compiles to:
+\\ loop-part BRANCH OFFSET
+\\   where OFFSET points back to the loop-part
+\\ In other words, an infinite loop which can only be returned from with EXIT
+: AGAIN IMMEDIATE
+	' BRANCH ,	\\ compile BRANCH
+	HERE @ -	\\ calculate the offset back
+	,		\\ compile the offset here
+;
+
+\\ BEGIN condition WHILE loop-part REPEAT
+\\   compiles to:
+\\ condition 0BRANCH OFFSET2 loop-part BRANCH OFFSET
+\\   where OFFSET points back to condition (the beginning) and OFFSET2 points to after the whole piece of code
+\\ So this is like a while (condition) { loop-part } loop in the C language
+: WHILE IMMEDIATE
+	' 0BRANCH ,	\\ compile 0BRANCH
+	HERE @		\\ save location of the offset2 on the stack
+	0 ,		\\ compile a dummy offset2
+;
+
+: REPEAT IMMEDIATE
+	' BRANCH ,	\\ compile BRANCH
+	SWAP		\\ get the original offset (from BEGIN)
+	HERE @ - ,	\\ and compile it after BRANCH
+	DUP
+	HERE @ SWAP -	\\ calculate the offset2
+	SWAP !		\\ and back-fill it in the original location
+;
+
+\\ With the looping constructs, we can now write SPACES, which writes n spaces to stdout.
+: SPACES
+	BEGIN
+		SPACE	\\ print a space
+		1-	\\ until we count down to 0
+		DUP 0=
+	UNTIL
+;
+
+\\ .\" is the print string operator in FORTH.  Example: .\" Something to print\"
+\\ The space after the operator is the ordinary space required between words.
+\\ This is tricky to define because it has to do different things depending on whether
+\\ we are compiling or in immediate mode.  (Thus the word is marked IMMEDIATE so it can
+\\ detect this and do different things).
+\\ In immediate mode we just keep reading characters and printing them until we get to
+\\ the next double quote.
+\\ In compile mode we have the problem of where we're going to store the string (remember
+\\ that the input buffer where the string comes from may be overwritten by the time we
+\\ come round to running the function).  We store the string in the compiled function
+\\ like this:
+\\   LITSTRING, string length, string rounded up to 4 bytes, EMITSTRING, ...
+: .\" IMMEDIATE
+	STATE @		\\ compiling?
+	IF
+		' LITSTRING ,	\\ compile LITSTRING
+		HERE @		\\ save the address of the length word on the stack
+		0 ,		\\ dummy length - we don't know what it is yet
+		BEGIN
+			KEY 		\\ get next character of the string
+			DUP '\"' <>
+		WHILE
+			HERE @ !b	\\ store the character in the compiled image
+			HERE @ 1+ HERE ! \\ increment HERE pointer by 1 byte
+		REPEAT
+		DROP		\\ drop the double quote character at the end
+		DUP		\\ get the saved address of the length word
+		HERE @ SWAP -	\\ calculate the length
+		4 -		\\ subtract 4 (because we measured from the start of the length word)
+		SWAP !		\\ and back-fill the length location
+		HERE @		\\ round up to next multiple of 4 bytes for the remaining code
+		3 +
+		3 INVERT AND
+		HERE !
+		' EMITSTRING ,	\\ compile the final EMITSTRING
+	ELSE
+		\\ In immediate mode, just read characters and print them until we get
+		\\ to the ending double quote.  Much simpler!
+		BEGIN
+			KEY
+			DUP '\"' = IF EXIT THEN
+			EMIT
+		AGAIN
+	THEN
+;
+
+: TEST .\" hello, world..!\" CR ;
+
+
+\\ Finally print the welcome prompt.
+.\" OK \"
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.2 b/jonesforth.S.2
new file mode 100644
index 0000000..f7b8d17
--- /dev/null
+++ b/jonesforth.S.2
@@ -0,0 +1,222 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+DOCOL:
+	lea -4(%ebp),%ebp	// push %esi on to the return stack
+	movl %esi,(%ebp)
+
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x40
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+N_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int link		// link
+	.set link,N_\label
+\label :
+	.int 1f			// codeword
+	.text
+1:				// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+N_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int link		// link
+	.set link,N_\label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	defword "COLD",4,,COLD
+	.int KEY,ECHO,RDROP,COLD
+
+	defcode "KEY",3,,KEY
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	push %ax
+	inc %ebx
+	mov %ebx,(currkey)
+	NEXT
+1:
+	mov $0,%ebx		// out of input, exit (0)
+	mov $__NR_exit,%eax
+	int $0x80
+
+	defcode "ECHO",4,,ECHO
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	pop %eax
+	mov %al,(_echo_tmp)
+	mov $_echo_tmp,%ecx	// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	.bss
+_echo_tmp: .space 1
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop the return stack
+	NEXT
+
+#if 0
+	defcode "R>",2,,FROMR
+
+	defcode ">R",2,,TOR
+
+	defcode ":",1,,COLON
+	call nextword		// get next word, the procedure name
+	// The next word is returned in %ebx and has length %ecx bytes.
+
+	// Save the current value of VOCAB.
+	mov v_vocab,%eax
+	push %eax
+
+	// Change VOCAB to point to our new word's header (at LATEST).
+	mov v_latest,%edi
+	mov %edi,v_vocab
+
+	// We'll start by writing the word's header at LATEST; the header
+	// is just length byte, the word itself, link pointer.
+	mov %ecx,(%edi)		// Length byte
+	inc %edi
+	mov %ebx,%esi		// Copy the string.
+	rep movsb
+	// Round up to the next multiple of 4 so that the link pointer
+	// is aligned.
+	or $3,%edi
+	inc %edi
+	pop %eax		// Link pointer, points to old VOCAB.
+	mov %eax,(%edi)
+	add $4,%edi
+	// Write the codeword, which for user-defined words is always a
+	// pointer to the FORTH indirect threaded interpreter.
+	movl $DOCOL,(%edi)
+	add $4,%edi
+
+	// Finally, update LATEST.  As we go along compiling, we'll be
+	// writing compiled codewords to the LATEST pointer (and moving
+	// it along each time).
+	mov %edi,v_latest
+
+	movl $1,v_state		// go into compiling mode
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	// XXX
+
+#endif
+
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Variables containing the interpreter's state.
+ */
+	.data
+
+	.align 4
+v_state:
+	.int 0		// 0 = immediate, 1 = compiling
+v_vocab:
+	.int N_SYSEXIT	// last word in the dictionary
+v_latest:
+	.int user_defs_start	// pointer to next space for user definition or current compiled def
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	.ascii "TEST OF READING WORDS   1 2 3"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.3 b/jonesforth.S.3
new file mode 100644
index 0000000..29e31b0
--- /dev/null
+++ b/jonesforth.S.3
@@ -0,0 +1,250 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+DOCOL:
+	lea -4(%ebp),%ebp	// push %esi on to the return stack
+	movl %esi,(%ebp)
+
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x40
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+N_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int link		// link
+	.set link,N_\label
+\label :
+	.int 1f			// codeword
+	.text
+1:				// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+N_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int link		// link
+	.set link,N_\label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	defword "COLD",4,,COLD
+	.int KEY,KEY,SWAP,ECHO,ECHO,RDROP,COLD
+
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "DROP",3,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+	defcode "KEY",3,,KEY
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	push %eax
+	inc %ebx
+	mov %ebx,(currkey)
+	NEXT
+1:
+	mov $0,%ebx		// out of input, exit (0)
+	mov $__NR_exit,%eax
+	int $0x80
+
+	defcode "ECHO",4,,ECHO
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	pop %eax
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	.bss
+2:	.space 1		// scratch used by ECHO
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop the return stack
+	NEXT
+
+#if 0
+	defcode "R>",2,,FROMR
+
+	defcode ">R",2,,TOR
+
+	defcode ":",1,,COLON
+	call nextword		// get next word, the procedure name
+	// The next word is returned in %ebx and has length %ecx bytes.
+
+	// Save the current value of VOCAB.
+	mov v_vocab,%eax
+	push %eax
+
+	// Change VOCAB to point to our new word's header (at LATEST).
+	mov v_latest,%edi
+	mov %edi,v_vocab
+
+	// We'll start by writing the word's header at LATEST; the header
+	// is just length byte, the word itself, link pointer.
+	mov %ecx,(%edi)		// Length byte
+	inc %edi
+	mov %ebx,%esi		// Copy the string.
+	rep movsb
+	// Round up to the next multiple of 4 so that the link pointer
+	// is aligned.
+	or $3,%edi
+	inc %edi
+	pop %eax		// Link pointer, points to old VOCAB.
+	mov %eax,(%edi)
+	add $4,%edi
+	// Write the codeword, which for user-defined words is always a
+	// pointer to the FORTH indirect threaded interpreter.
+	movl $DOCOL,(%edi)
+	add $4,%edi
+
+	// Finally, update LATEST.  As we go along compiling, we'll be
+	// writing compiled codewords to the LATEST pointer (and moving
+	// it along each time).
+	mov %edi,v_latest
+
+	movl $1,v_state		// go into compiling mode
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	// XXX
+
+#endif
+
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Variables containing the interpreter's state.
+ */
+	.data
+
+	.align 4
+v_state:
+	.int 0		// 0 = immediate, 1 = compiling
+v_vocab:
+	.int N_SYSEXIT	// last word in the dictionary
+v_latest:
+	.int user_defs_start	// pointer to next space for user definition or current compiled def
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	.ascii "		\n\
+\\ Define some constants	\n\
+: '\\n'   10 ;			\n\
+: ')'     41 ;			\n\
+: 'space' 32 ;			\n\
+: '\"'    34 ;			\n\
+: '-'     45 ;			\n\
+: '0'     48 ;			\n\
+				\n\
+\\ CR command			\n\
+: CR '\\n' ECHO ;		\n\
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.4 b/jonesforth.S.4
new file mode 100644
index 0000000..839d52c
--- /dev/null
+++ b/jonesforth.S.4
@@ -0,0 +1,310 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	.endm
+
+	.macro POPRSP reg
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	lea -4(%ebp),%ebp	// push %esi on to the return stack
+	movl %esi,(%ebp)
+
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x40
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+N_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int link		// link
+	.set link,N_\label
+\label :
+	.int 1f			// codeword
+	.text
+	.align 4
+1:				// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+N_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int link		// link
+	.set link,N_\label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	/* COLD must not return (ie. must not call EXIT). */
+	defword "COLD",4,,COLD
+	.int KEY,ECHO,RDROP,COLD
+
+	defcode "EXIT",4,,EXIT
+	movl (%ebp),%esi	// pop return stack into %esi
+	lea 4(%ebp),%ebp
+	NEXT
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	defcode "STATE",5,,STATE
+	push $v_state
+	NEXT
+
+	defcode "HERE",4,,HERE
+	push $v_here
+	NEXT
+
+	defcode "LATEST",6,,LATEST
+	push $v_latest
+	NEXT
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	lea -4(%ebp),%ebp	// push %eax on to return stack
+	movl %eax,(%ebp)
+	NEXT
+
+	defcode "R>",2,,FROMR
+	mov (%ebp),%eax		// pop top of return stack to %eax
+	lea 4(%ebp),%ebp
+	push %eax		// and push on to parameter stack
+	NEXT
+
+#if 0 /* This definition is wrong. */
+	defcode "R",1,,R
+	mov %(ebp),%eax		// copy (don't pop) top of return stack to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+#endif
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop the return stack
+	NEXT
+
+	defcode "KEY",3,,KEY
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	push %eax
+	inc %ebx
+	mov %ebx,(currkey)
+	NEXT
+1:
+	mov $0,%ebx		// out of input, exit (0)
+	mov $__NR_exit,%eax
+	int $0x80
+
+	defcode "ECHO",4,,ECHO
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	pop %eax
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	.bss
+2:	.space 1		// scratch used by ECHO
+
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "DROP",3,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+#if 0
+	defcode ":",1,,COLON
+	call nextword		// get next word, the procedure name
+	// The next word is returned in %ebx and has length %ecx bytes.
+
+	// Save the current value of VOCAB.
+	mov v_vocab,%eax
+	push %eax
+
+	// Change VOCAB to point to our new word's header (at LATEST).
+	mov v_latest,%edi
+	mov %edi,v_vocab
+
+	// We'll start by writing the word's header at LATEST; the header
+	// is just length byte, the word itself, link pointer.
+	mov %ecx,(%edi)		// Length byte
+	inc %edi
+	mov %ebx,%esi		// Copy the string.
+	rep movsb
+	// Round up to the next multiple of 4 so that the link pointer
+	// is aligned.
+	or $3,%edi
+	inc %edi
+	pop %eax		// Link pointer, points to old VOCAB.
+	mov %eax,(%edi)
+	add $4,%edi
+	// Write the codeword, which for user-defined words is always a
+	// pointer to the FORTH indirect threaded interpreter.
+	movl $DOCOL,(%edi)
+	add $4,%edi
+
+	// Finally, update LATEST.  As we go along compiling, we'll be
+	// writing compiled codewords to the LATEST pointer (and moving
+	// it along each time).
+	mov %edi,v_latest
+
+	movl $1,v_state		// go into compiling mode
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	// XXX
+
+#endif
+
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Variables containing the interpreter's state.
+ */
+	.data
+
+	.align 4
+v_state:
+	.int 0		// 0 = immediate, non-zero = compiling
+v_latest:
+	.int N_SYSEXIT	// last word in the dictionary
+v_here:
+	.int user_defs_start	// pointer to next space for user definition or current compiled def
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	.ascii "		\n\
+\\ Define some constants	\n\
+: '\\n'   10 ;			\n\
+: ')'     41 ;			\n\
+: 'space' 32 ;			\n\
+: '\"'    34 ;			\n\
+: '-'     45 ;			\n\
+: '0'     48 ;			\n\
+				\n\
+\\ CR command			\n\
+: CR '\\n' ECHO ;		\n\
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.5 b/jonesforth.S.5
new file mode 100644
index 0000000..c5c61d9
--- /dev/null
+++ b/jonesforth.S.5
@@ -0,0 +1,370 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	lea -4(%ebp),%ebp	// push reg on to return stack
+	movl \reg,(%ebp)
+	.endm
+
+	.macro POPRSP reg
+	mov (%ebp),\reg		// pop top of return stack to reg
+	lea 4(%ebp),%ebp
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	PUSHRSP %esi		// push %esi on to the return stack
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x40
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+	.globl name_\label
+name_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int link		// link
+	.set link,name_\label
+	.globl \label
+\label :
+	.int code_\label	// codeword
+	.text
+	.align 4
+	.globl code_\label
+code_\label :			// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+	.globl name_\label
+name_\label :
+	.align 4
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.int link		// link
+	.set link,name_\label
+	.globl \label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	/* COLD must not return (ie. must not call EXIT). */
+	defword "COLD",4,,COLD
+	.int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD
+
+	defcode "EXIT",4,,EXIT
+	POPRSP %esi		// pop return stack into %esi
+	NEXT
+
+	defcode "LIT",3,,LIT
+	// %esi points to the next command, but in this case it points to the next
+	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
+	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
+	lodsl
+	push %eax		// push %eax on to stack
+	NEXT
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	defcode "STATE",5,,STATE
+	push $v_state
+	NEXT
+
+	defcode "HERE",4,,HERE
+	push $v_here
+	NEXT
+
+	defcode "LATEST",6,,LATEST
+	push $v_latest
+	NEXT
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	PUSHRSP %eax		// push it on to the return stack
+	NEXT
+
+	defcode "R>",2,,FROMR
+	POPRSP %eax		// pop return stack on to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+
+#if 0 /* This definition is wrong. */
+	defcode "R",1,,R
+	mov %(ebp),%eax		// copy (don't pop) top of return stack to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+#endif
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop return stack and throw away
+	NEXT
+
+	defcode "KEY",3,,KEY
+	call _KEY
+	push %eax		// push return value on stack
+	NEXT
+_KEY:
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	inc %ebx
+	mov %ebx,(currkey)
+	ret
+1:
+	mov $0,%ebx		// out of input, exit (0)
+	mov $__NR_exit,%eax
+	int $0x80
+
+	defcode "WORD",4,,WORD
+	/* Search for first non-blank character.  Also skip \ comments. */
+1:
+	call _KEY		// get next key, returned in %eax
+	cmpb $'\\',%al		// start of a comment?
+	je 3f			// if so, skip the comment
+	cmpb $' ',%al
+	jbe 1b			// if so, keep looking
+
+	/* Search for the end of the word, storing chars as we go. */
+	mov $5f,%edi		// pointer to return buffer
+2:
+	stosb			// add character to return buffer
+	call _KEY		// get next key, returned in %al
+	cmpb $' ',%al		// is blank?
+	ja 2b			// if not, keep looping
+
+	/* Return the word (well, the static buffer) and length. */
+	sub $5f,%edi
+	push %edi		// push length
+	push $5f		// push base address
+	NEXT
+
+	/* Code to skip \ comments to end of the current line. */
+3:
+	call _KEY
+	cmpb $'\n',%al		// end of line yet?
+	jne 3b
+	jmp 1b
+
+	.bss
+	// A static buffer where WORD returns.  Subsequent calls
+	// overwrite this buffer.  Maximum word length is 32 chars.
+5:	.space 32
+
+	defcode "ECHO",4,,ECHO
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	pop %eax
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	.bss
+2:	.space 1		// scratch used by ECHO
+
+	defcode "ECHOWORD",8,,ECHOWORD
+	mov $1,%ebx		// 1st param: stdout
+	pop %ecx		// 2nd param: address of string
+	pop %edx		// 3rd param: length of string
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "DROP",3,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+#if 0
+	defcode ":",1,,COLON
+	call nextword		// get next word, the procedure name
+	// The next word is returned in %ebx and has length %ecx bytes.
+
+	// Save the current value of VOCAB.
+	mov v_vocab,%eax
+	push %eax
+
+	// Change VOCAB to point to our new word's header (at LATEST).
+	mov v_latest,%edi
+	mov %edi,v_vocab
+
+	// We'll start by writing the word's header at LATEST; the header
+	// is just length byte, the word itself, link pointer.
+	mov %ecx,(%edi)		// Length byte
+	inc %edi
+	mov %ebx,%esi		// Copy the string.
+	rep movsb
+	// Round up to the next multiple of 4 so that the link pointer
+	// is aligned.
+	or $3,%edi
+	inc %edi
+	pop %eax		// Link pointer, points to old VOCAB.
+	mov %eax,(%edi)
+	add $4,%edi
+	// Write the codeword, which for user-defined words is always a
+	// pointer to the FORTH indirect threaded interpreter.
+	movl $DOCOL,(%edi)
+	add $4,%edi
+
+	// Finally, update LATEST.  As we go along compiling, we'll be
+	// writing compiled codewords to the LATEST pointer (and moving
+	// it along each time).
+	mov %edi,v_latest
+
+	movl $1,v_state		// go into compiling mode
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	// XXX
+
+#endif
+
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Variables containing the interpreter's state.
+ */
+	.data
+
+	.align 4
+v_state:
+	.int 0			// 0 = immediate, non-zero = compiling
+v_latest:
+	.int name_SYSEXIT	// last word in the dictionary
+v_here:
+	.int user_defs_start	// pointer to next space for user definition or current compiled def
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	.ascii "		\n\
+\\ Define some constants	\n\
+: '\\n'   10 ;			\n\
+: ')'     41 ;			\n\
+: 'space' 32 ;			\n\
+: '\"'    34 ;			\n\
+: '-'     45 ;			\n\
+: '0'     48 ;			\n\
+				\n\
+\\ CR command			\n\
+: CR '\\n' ECHO ;		\n\
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.6 b/jonesforth.S.6
new file mode 100644
index 0000000..d3e02fc
--- /dev/null
+++ b/jonesforth.S.6
@@ -0,0 +1,435 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NOTES----------------------------------------------------------------------
+
+Need to say something about $ before constants.
+
+And about je/jne/ja/jb/jbe/etc
+
+
+
+
+*/
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	lea -4(%ebp),%ebp	// push reg on to return stack
+	movl \reg,(%ebp)
+	.endm
+
+	.macro POPRSP reg
+	mov (%ebp),\reg		// pop top of return stack to reg
+	lea 4(%ebp),%ebp
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	PUSHRSP %esi		// push %esi on to the return stack
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x40
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int code_\label	// codeword
+	.text
+	.align 4
+	.globl code_\label
+code_\label :			// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	/* Some easy ones .... */
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "DROP",3,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+	/* COLD must not return (ie. must not call EXIT). */
+	defword "COLD",4,,COLD
+	.int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD
+
+/*
+This prints out each word in the input as <word>\n
+	defword "COLD",4,,COLD
+	.int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD
+*/
+
+	defcode "EXIT",4,,EXIT
+	POPRSP %esi		// pop return stack into %esi
+	NEXT
+
+	defcode "LIT",3,,LIT
+	// %esi points to the next command, but in this case it points to the next
+	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
+	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
+	lodsl
+	push %eax		// push %eax on to stack
+	NEXT
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	defcode "STATE",5,,STATE
+	push $v_state
+	NEXT
+
+	defcode "HERE",4,,HERE
+	push $v_here
+	NEXT
+
+	defcode "LATEST",6,,LATEST
+	push $v_latest
+	NEXT
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	PUSHRSP %eax		// push it on to the return stack
+	NEXT
+
+	defcode "R>",2,,FROMR
+	POPRSP %eax		// pop return stack on to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+
+#if 0 /* This definition is wrong. */
+	defcode "R",1,,R
+	mov %(ebp),%eax		// copy (don't pop) top of return stack to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+#endif
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop return stack and throw away
+	NEXT
+
+	defcode "KEY",3,,KEY
+	call _KEY
+	push %eax		// push return value on stack
+	NEXT
+_KEY:
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	inc %ebx
+	mov %ebx,(currkey)
+	ret
+1:
+	mov $0,%ebx		// out of input, exit (0)
+	mov $__NR_exit,%eax
+	int $0x80
+
+	defcode "ECHO",4,,ECHO
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	pop %eax
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	.bss
+2:	.space 1		// scratch used by ECHO
+
+	defcode "WORD",4,,WORD
+	call _WORD
+	push %eax		// push length
+	push %ebx		// push base address
+	NEXT
+
+_WORD:
+	/* Search for first non-blank character.  Also skip \ comments. */
+1:
+	call _KEY		// get next key, returned in %eax
+	cmpb $'\\',%al		// start of a comment?
+	je 3f			// if so, skip the comment
+	cmpb $' ',%al
+	jbe 1b			// if so, keep looking
+
+	/* Search for the end of the word, storing chars as we go. */
+	mov $5f,%edi		// pointer to return buffer
+2:
+	stosb			// add character to return buffer
+	call _KEY		// get next key, returned in %al
+	cmpb $' ',%al		// is blank?
+	ja 2b			// if not, keep looping
+
+	/* Return the word (well, the static buffer) and length. */
+	sub $5f,%edi
+	mov %edi,%eax		// return length of the word
+	mov $5f,%ebx		// return address of the word
+	ret
+
+	/* Code to skip \ comments to end of the current line. */
+3:
+	call _KEY
+	cmpb $'\n',%al		// end of line yet?
+	jne 3b
+	jmp 1b
+
+	.bss
+	// A static buffer where WORD returns.  Subsequent calls
+	// overwrite this buffer.  Maximum word length is 32 chars.
+5:	.space 32
+
+	defcode "ECHOWORD",8,,ECHOWORD
+	mov $1,%ebx		// 1st param: stdout
+	pop %ecx		// 2nd param: address of string
+	pop %edx		// 3rd param: length of string
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	defcode "'",1,,TICK
+	call _WORD		// returns %ebx = address of next word, %eax = length in bytes
+	mov %ebx,%edi		// %edi = address
+	mov %eax,%ecx		// %ecx = length
+
+	push %esi		// Save %esi so we can use it in string comparison.
+
+	// Now we start searching backwards through the dictionary for this word.
+	mov v_latest,%edx	// LATEST points to name header of the latest word in the dictionary
+1:
+	cmp %edx,%edx		// NULL pointer?  (end of the linked list)
+	je 4f
+
+	xor %eax,%eax
+	movb 4(%edx),%al	// %al = flags+length field
+	andb $0x1f,%al		// %al = name length
+	cmpb %cl,%al		// Length is the same?
+	jne 2f
+
+	// Compare the strings in detail.
+	push %ecx		// Save the length
+	lea 5(%edx),%esi	// Dictionary string we are checking against.
+	repe cmpsb		// Compare the strings.
+	pop %ecx
+	jne 2f			// Not the same.
+
+	// The strings are the same - return the header pointer on the stack.
+	pop %esi
+	push %edx
+	NEXT
+
+2:
+	mov (%edx),%edx		// Move back through the link field to the previous word
+	jmp 1b			// .. and loop.
+
+4:	// Not found.
+	pop %esi
+	xor %eax,%eax		// Push zero on to the stack to indicate not found.
+	push %eax
+	NEXT
+
+#if 0
+	defcode ":",1,,COLON
+	call nextword		// get next word, the procedure name
+	// The next word is returned in %ebx and has length %ecx bytes.
+
+	// Save the current value of VOCAB.
+	mov v_vocab,%eax
+	push %eax
+
+	// Change VOCAB to point to our new word's header (at LATEST).
+	mov v_latest,%edi
+	mov %edi,v_vocab
+
+	// We'll start by writing the word's header at LATEST; the header
+	// is just length byte, the word itself, link pointer.
+	mov %ecx,(%edi)		// Length byte
+	inc %edi
+	mov %ebx,%esi		// Copy the string.
+	rep movsb
+	// Round up to the next multiple of 4 so that the link pointer
+	// is aligned.
+	or $3,%edi
+	inc %edi
+	pop %eax		// Link pointer, points to old VOCAB.
+	mov %eax,(%edi)
+	add $4,%edi
+	// Write the codeword, which for user-defined words is always a
+	// pointer to the FORTH indirect threaded interpreter.
+	movl $DOCOL,(%edi)
+	add $4,%edi
+
+	// Finally, update LATEST.  As we go along compiling, we'll be
+	// writing compiled codewords to the LATEST pointer (and moving
+	// it along each time).
+	mov %edi,v_latest
+
+	movl $1,v_state		// go into compiling mode
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	// XXX
+
+#endif
+
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Variables containing the interpreter's state.
+ */
+	.data
+
+	.align 4
+v_state:
+	.int 0			// 0 = immediate, non-zero = compiling
+v_latest:
+	.int name_SYSEXIT	// last word in the dictionary
+v_here:
+	.int user_defs_start	// pointer to next space for user definition or current compiled def
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	.ascii "		\n\
+\\ Define some constants	\n\
+: '\\n'   10 ;			\n\
+: ')'     41 ;			\n\
+: 'space' 32 ;			\n\
+: '\"'    34 ;			\n\
+: '-'     45 ;			\n\
+: '0'     48 ;			\n\
+				\n\
+\\ CR command			\n\
+: CR '\\n' ECHO ;		\n\
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.7 b/jonesforth.S.7
new file mode 100644
index 0000000..ede6edd
--- /dev/null
+++ b/jonesforth.S.7
@@ -0,0 +1,475 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NOTES----------------------------------------------------------------------
+
+Need to say something about $ before constants.
+
+And about je/jne/ja/jb/jbe/etc
+
+
+
+
+*/
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	lea -4(%ebp),%ebp	// push reg on to return stack
+	movl \reg,(%ebp)
+	.endm
+
+	.macro POPRSP reg
+	mov (%ebp),\reg		// pop top of return stack to reg
+	lea 4(%ebp),%ebp
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	PUSHRSP %esi		// push %esi on to the return stack
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x40
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int code_\label	// codeword
+	.text
+	.align 4
+	.globl code_\label
+code_\label :			// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	/* Some easy ones .... */
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "DROP",4,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+	defcode "OVER",4,,OVER
+	mov 4(%esp),%eax	// get the second element of stack
+	push %eax		// and push it on top
+	NEXT
+
+	/* COLD must not return (ie. must not call EXIT). */
+	defword "COLD",4,,COLD
+	.int LIT,'<',ECHO
+	.int WORD,OVER,OVER,ECHOWORD,LIT,'=',ECHO,FIND,DOT
+	.int LIT,'>',ECHO
+	.int LIT,10,ECHO
+	.int RDROP,COLD
+
+/*
+This prints out each word in the input as <word>\n
+	defword "COLD",4,,COLD
+	.int LIT,'<',ECHO,WORD,ECHOWORD,LIT,'>',ECHO,LIT,10,ECHO,RDROP,COLD
+*/
+
+	defcode "EXIT",4,,EXIT
+	POPRSP %esi		// pop return stack into %esi
+	NEXT
+
+	defcode "LIT",3,,LIT
+	// %esi points to the next command, but in this case it points to the next
+	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
+	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
+	lodsl
+	push %eax		// push %eax on to stack
+	NEXT
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	defcode "STATE",5,,STATE
+	push $v_state
+	NEXT
+
+	defcode "HERE",4,,HERE
+	push $v_here
+	NEXT
+
+	defcode "LATEST",6,,LATEST
+	push $v_latest
+	NEXT
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	PUSHRSP %eax		// push it on to the return stack
+	NEXT
+
+	defcode "R>",2,,FROMR
+	POPRSP %eax		// pop return stack on to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+
+#if 0 /* This definition is wrong. */
+	defcode "R",1,,R
+	mov %(ebp),%eax		// copy (don't pop) top of return stack to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+#endif
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop return stack and throw away
+	NEXT
+
+	defcode "KEY",3,,KEY
+	call _KEY
+	push %eax		// push return value on stack
+	NEXT
+_KEY:
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	inc %ebx
+	mov %ebx,(currkey)
+	ret
+1:
+	mov $0,%ebx		// out of input, exit (0)
+	mov $__NR_exit,%eax
+	int $0x80
+
+	defcode "ECHO",4,,ECHO
+	pop %eax
+	call _ECHO
+	NEXT
+_ECHO:
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+	ret
+
+	.bss
+2:	.space 1		// scratch used by ECHO
+
+	defcode "WORD",4,,WORD
+	call _WORD
+	push %eax		// push length
+	push %ebx		// push base address
+	NEXT
+
+_WORD:
+	/* Search for first non-blank character.  Also skip \ comments. */
+1:
+	call _KEY		// get next key, returned in %eax
+	cmpb $'\\',%al		// start of a comment?
+	je 3f			// if so, skip the comment
+	cmpb $' ',%al
+	jbe 1b			// if so, keep looking
+
+	/* Search for the end of the word, storing chars as we go. */
+	mov $5f,%edi		// pointer to return buffer
+2:
+	stosb			// add character to return buffer
+	call _KEY		// get next key, returned in %al
+	cmpb $' ',%al		// is blank?
+	ja 2b			// if not, keep looping
+
+	/* Return the word (well, the static buffer) and length. */
+	sub $5f,%edi
+	mov %edi,%eax		// return length of the word
+	mov $5f,%ebx		// return address of the word
+	ret
+
+	/* Code to skip \ comments to end of the current line. */
+3:
+	call _KEY
+	cmpb $'\n',%al		// end of line yet?
+	jne 3b
+	jmp 1b
+
+	.bss
+	// A static buffer where WORD returns.  Subsequent calls
+	// overwrite this buffer.  Maximum word length is 32 chars.
+5:	.space 32
+
+	defcode "ECHOWORD",8,,ECHOWORD
+	mov $1,%ebx		// 1st param: stdout
+	pop %ecx		// 2nd param: address of string
+	pop %edx		// 3rd param: length of string
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	defcode ".",1,,DOT
+	pop %eax		// Get the number to print into %eax
+	call _DOT		// Easier to do this recursively ...
+	NEXT
+
+/*	if eax >= 10 then print (eax / 10)
+	r = eax mod 10
+	echo r */
+_DOT:
+	mov $10,%ecx		// Base 10
+	cmp %ecx,%eax
+	jb 1f
+	pushl %eax
+	xor %edx,%edx
+	idivl %ecx
+	call _DOT
+	popl %eax
+1:
+	aam $10
+	cbw
+	cwde
+	addl $'0',%eax
+	call _ECHO
+	ret
+
+	defcode "FIND",4,,FIND
+	pop %edi		// %edi = address
+	pop %ecx		// %ecx = length
+
+	push %esi		// Save %esi so we can use it in string comparison.
+
+	// Now we start searching backwards through the dictionary for this word.
+	mov v_latest,%edx	// LATEST points to name header of the latest word in the dictionary
+1:
+	test %edx,%edx		// NULL pointer?  (end of the linked list)
+	je 4f
+
+	xor %eax,%eax
+	movb 4(%edx),%al	// %al = flags+length field
+	andb $0x1f,%al		// %al = name length
+	cmpb %cl,%al		// Length is the same?
+	jne 2f
+
+	// Compare the strings in detail.
+	push %ecx		// Save the length
+	push %edi		// Save the address (repe cmpsb will move this pointer)
+	lea 5(%edx),%esi	// Dictionary string we are checking against.
+	repe cmpsb		// Compare the strings.
+	pop %edi
+	pop %ecx
+	jne 2f			// Not the same.
+
+	// The strings are the same - return the header pointer on the stack.
+	pop %esi
+	push %edx
+	NEXT
+
+2:
+	mov (%edx),%edx		// Move back through the link field to the previous word
+	jmp 1b			// .. and loop.
+
+4:	// Not found.
+	pop %esi
+	xor %eax,%eax		// Push zero on to the stack to indicate not found.
+	push %eax
+	NEXT
+
+#if 0
+	defcode ":",1,,COLON
+	call nextword		// get next word, the procedure name
+	// The next word is returned in %ebx and has length %ecx bytes.
+
+	// Save the current value of VOCAB.
+	mov v_vocab,%eax
+	push %eax
+
+	// Change VOCAB to point to our new word's header (at LATEST).
+	mov v_latest,%edi
+	mov %edi,v_vocab
+
+	// We'll start by writing the word's header at LATEST; the header
+	// is just length byte, the word itself, link pointer.
+	mov %ecx,(%edi)		// Length byte
+	inc %edi
+	mov %ebx,%esi		// Copy the string.
+	rep movsb
+	// Round up to the next multiple of 4 so that the link pointer
+	// is aligned.
+	or $3,%edi
+	inc %edi
+	pop %eax		// Link pointer, points to old VOCAB.
+	mov %eax,(%edi)
+	add $4,%edi
+	// Write the codeword, which for user-defined words is always a
+	// pointer to the FORTH indirect threaded interpreter.
+	movl $DOCOL,(%edi)
+	add $4,%edi
+
+	// Finally, update LATEST.  As we go along compiling, we'll be
+	// writing compiled codewords to the LATEST pointer (and moving
+	// it along each time).
+	mov %edi,v_latest
+
+	movl $1,v_state		// go into compiling mode
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	// XXX
+
+#endif
+
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Variables containing the interpreter's state.
+ */
+	.data
+
+	.align 4
+v_state:
+	.int 0			// 0 = immediate, non-zero = compiling
+v_latest:
+				// XXX should use 'link', but how to join it with name_?
+	.int name_SYSEXIT	// last word in the dictionary
+v_here:
+	.int user_defs_start	// pointer to next space for user definition or current compiled def
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	.ascii "		\n\
+\\ Define some constants	\n\
+: '\\n'   10 ;			\n\
+: ')'     41 ;			\n\
+: 'space' 32 ;			\n\
+: '\"'    34 ;			\n\
+: '-'     45 ;			\n\
+: '0'     48 ;			\n\
+				\n\
+\\ CR command			\n\
+: CR '\\n' ECHO ;		\n\
+				\n\
+ECHO DUP DROP OVER		\n\
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.8 b/jonesforth.S.8
new file mode 100644
index 0000000..c894342
--- /dev/null
+++ b/jonesforth.S.8
@@ -0,0 +1,598 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NOTES-------------------------------------------------------------------------------------------------------------------
+
+Need to say something about $ before constants.
+
+And about je/jne/ja/jb/jbe/etc
+
+
+
+
+*/
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	lea -4(%ebp),%ebp	// push reg on to return stack
+	movl \reg,(%ebp)
+	.endm
+
+	.macro POPRSP reg
+	mov (%ebp),\reg		// pop top of return stack to reg
+	lea 4(%ebp),%ebp
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	PUSHRSP %esi		// push %esi on to the return stack
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x20
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int code_\label	// codeword
+	.text
+	.align 4
+	.globl code_\label
+code_\label :			// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	.macro defvar name, namelen, flags=0, label, initial=0
+	defcode \name,\namelen,\flags,\label
+	push $var_\name
+	NEXT
+	.data
+	.align 4
+var_\name :
+	.int \initial
+	.endm
+
+	/* Some easy ones .... */
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "DROP",4,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+	defcode "OVER",4,,OVER
+	mov 4(%esp),%eax	// get the second element of stack
+	push %eax		// and push it on top
+	NEXT
+
+	// COLD must not return (ie. must not call EXIT).
+	defword "COLD",4,,COLD
+	// XXX reinitialisation of the interpreter
+	.int INTERPRETER	// call the interpreter loop (never returns)
+	.int LIT,1,SYSEXIT	// hmmm, but in case it does, exit(1).
+
+	defcode "EXIT",4,,EXIT
+	POPRSP %esi		// pop return stack into %esi
+	NEXT
+
+	defcode "LIT",3,,LIT
+	// %esi points to the next command, but in this case it points to the next
+	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
+	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
+	lodsl
+	push %eax		// push %eax on to stack
+	NEXT
+
+	defcode "0SKIP",5,,ZSKIP
+	// If the top of stack is zero, skip the next instruction.
+	pop %eax
+	test %eax,%eax
+	jnz 1f
+	lodsl			// this does the skip
+1:	NEXT
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	// The STATE variable is 0 for execute mode, != 0 for compile mode
+	defvar "STATE",5,,STATE
+
+	// This points to where compiled words go.
+	defvar "HERE",4,,HERE,user_defs_start
+
+	// This is the last definition in the dictionary.
+	defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	PUSHRSP %eax		// push it on to the return stack
+	NEXT
+
+	defcode "R>",2,,FROMR
+	POPRSP %eax		// pop return stack on to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+
+#if 0 /* This definition is wrong. */
+	defcode "R",1,,R
+	mov %(ebp),%eax		// copy (don't pop) top of return stack to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+#endif
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop return stack and throw away
+	NEXT
+
+	defcode "KEY",3,,KEY
+	call _KEY
+	push %eax		// push return value on stack
+	NEXT
+_KEY:
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	inc %ebx
+	mov %ebx,(currkey)
+	ret
+1:
+	mov $0,%ebx		// out of input, exit (0)
+	mov $__NR_exit,%eax
+	int $0x80
+
+	defcode "EMIT",4,,EMIT
+	pop %eax
+	call _EMIT
+	NEXT
+_EMIT:
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+	ret
+
+	.bss
+2:	.space 1		// scratch used by EMIT
+
+	defcode "WORD",4,,WORD
+	call _WORD
+	push %ecx		// push length
+	push %edi		// push base address
+	NEXT
+
+_WORD:
+	/* Search for first non-blank character.  Also skip \ comments. */
+1:
+	call _KEY		// get next key, returned in %eax
+	cmpb $'\\',%al		// start of a comment?
+	je 3f			// if so, skip the comment
+	cmpb $' ',%al
+	jbe 1b			// if so, keep looking
+
+	/* Search for the end of the word, storing chars as we go. */
+	mov $5f,%edi		// pointer to return buffer
+2:
+	stosb			// add character to return buffer
+	call _KEY		// get next key, returned in %al
+	cmpb $' ',%al		// is blank?
+	ja 2b			// if not, keep looping
+
+	/* Return the word (well, the static buffer) and length. */
+	sub $5f,%edi
+	mov %edi,%ecx		// return length of the word
+	mov $5f,%edi		// return address of the word
+	ret
+
+	/* Code to skip \ comments to end of the current line. */
+3:
+	call _KEY
+	cmpb $'\n',%al		// end of line yet?
+	jne 3b
+	jmp 1b
+
+	.bss
+	// A static buffer where WORD returns.  Subsequent calls
+	// overwrite this buffer.  Maximum word length is 32 chars.
+5:	.space 32
+
+	defcode "EMITWORD",8,,EMITWORD
+	mov $1,%ebx		// 1st param: stdout
+	pop %ecx		// 2nd param: address of string
+	pop %edx		// 3rd param: length of string
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	defcode ".",1,,DOT
+	pop %eax		// Get the number to print into %eax
+	call _DOT		// Easier to do this recursively ...
+	NEXT
+_DOT:
+	mov $10,%ecx		// Base 10
+	cmp %ecx,%eax
+	jb 1f
+	pushl %eax
+	xor %edx,%edx
+	idivl %ecx
+	call _DOT
+	popl %eax
+1:
+	aam $10
+	cbw
+	cwde
+	addl $'0',%eax
+	call _EMIT
+	ret
+
+	// Parse a number from a string on the stack -- almost the opposite of . (DOT)
+	// Note that there is absolutely no error checking.  In particular the length of the
+	// string must be >= 1 bytes.
+	defcode "SNUMBER",7,,SNUMBER
+	pop %edi
+	pop %ecx
+	call _SNUMBER
+	push %eax
+	NEXT
+_SNUMBER:
+	xor %eax,%eax
+	xor %ebx,%ebx
+1:
+	imull $10,%eax		// %eax *= 10
+	movb (%edi),%bl
+	inc %edi
+	subb $'0',%bl		// ASCII -> digit
+	add %ebx,%eax
+	dec %ecx
+	jnz 1b
+	ret
+
+	defcode "FIND",4,,FIND
+	pop %edi		// %edi = address
+	pop %ecx		// %ecx = length
+	call _FIND
+	push %eax
+	NEXT
+
+_FIND:
+	push %esi		// Save %esi so we can use it in string comparison.
+
+	// Now we start searching backwards through the dictionary for this word.
+	mov var_LATEST,%edx	// LATEST points to name header of the latest word in the dictionary
+1:
+	test %edx,%edx		// NULL pointer?  (end of the linked list)
+	je 4f
+
+	// Compare the length expected and the length of the word.
+	// Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
+	// this won't pick the word (the length will appear to be wrong).
+	xor %eax,%eax
+	movb 4(%edx),%al	// %al = flags+length field
+	andb $(F_HIDDEN|0x1f),%al // %al = name length
+	cmpb %cl,%al		// Length is the same?
+	jne 2f
+
+	// Compare the strings in detail.
+	push %ecx		// Save the length
+	push %edi		// Save the address (repe cmpsb will move this pointer)
+	lea 5(%edx),%esi	// Dictionary string we are checking against.
+	repe cmpsb		// Compare the strings.
+	pop %edi
+	pop %ecx
+	jne 2f			// Not the same.
+
+	// The strings are the same - return the header pointer in %eax
+	pop %esi
+	mov %edx,%eax
+	ret
+
+2:
+	mov (%edx),%edx		// Move back through the link field to the previous word
+	jmp 1b			// .. and loop.
+
+4:	// Not found.
+	pop %esi
+	xor %eax,%eax		// Return zero to indicate not found.
+	ret
+
+	defcode ">CFA",4,,TCFA	// DEA -> Codeword address
+	pop %edi
+	call _TCFA
+	push %edi
+	NEXT
+_TCFA:
+	xor %eax,%eax
+	add $4,%edi		// Skip link pointer.
+	movb (%edi),%al		// Load flags+len into %al.
+	inc %edi		// Skip flags+len byte.
+	andb $0x1f,%al		// Just the length, not the flags.
+	add %eax,%edi		// Skip the name.
+	orl $3,%edi		// The codeword is 4-byte aligned.
+	inc %edi
+	ret
+
+	defword "'",1,,TICK
+	.int WORD		// Get the following word.
+	.int FIND		// Look it up in the dictionary.
+	.int DUP		// If not found, skip >CFA (TCFA) instruction.
+	.int ZSKIP
+	.int TCFA		// Convert to a codeword pointer.
+	.int EXIT		// Return.
+
+	defcode ":",1,,COLON
+
+	// Get the word and create a dictionary entry header for it.
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+	mov %edi,%ebx		// %ebx = address of the word
+
+	movl var_HERE,%edi	// %edi is the address of the header
+	movl var_LATEST,%eax	// Get link pointer
+	stosl			// and store it in the header.
+
+	mov %cl,%al		// Get the length.
+	orb $F_HIDDEN,%al	// Set the HIDDEN flag on this entry.
+	stosb			// Store the length/flags byte.
+	push %esi
+	mov %ebx,%esi		// %esi = word
+	rep movsb		// Copy the word
+	pop %esi
+	orl $3,%edi		// Align to next 4 byte boundary.
+	inc %edi
+
+	movl $DOCOL,%eax	// The codeword for user-created words is always DOCOL (the interpreter)
+	stosl
+
+	// Header built, so now update LATEST and HERE.
+	// We'll be compiling words and putting them HERE.
+	movl var_HERE,%eax
+	movl %eax,var_LATEST
+	movl %edi,var_HERE
+
+	// And go into compile mode by setting STATE to 1.
+	xor %eax,%eax
+	inc %eax
+	mov %eax,var_STATE
+	NEXT
+
+	defcode ",",1,,COMMA
+	pop %eax		// Code pointer to store.
+	call _COMMA
+	NEXT
+_COMMA:
+	movl var_HERE,%edi	// HERE
+	stosl			// Store it.
+	movl %edi,var_HERE	// Update HERE (incremented)
+	ret
+
+	defcode "HIDDEN",6,,HIDDEN
+	call _HIDDEN
+	NEXT
+_HIDDEN:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_HIDDEN,(%edi)	// Toggle the HIDDEN bit.
+	ret
+
+	defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
+	call _IMMEDIATE
+	NEXT
+_IMMEDIATE:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_IMMED,(%edi)	// Toggle the IMMED bit.
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	movl $EXIT,%eax		// EXIT is the final codeword in compiled words.
+	call _COMMA		// Store it.
+	call _HIDDEN		// Toggle the HIDDEN flag (unhides the new word).
+	xor %eax,%eax		// Set STATE to 0 (back to execute mode).
+	movl %eax,var_STATE
+	NEXT
+
+/* This interpreter is pretty simple, but remember that in FORTH you can always override
+ * it later with a more powerful one!
+ */
+	defword "INTERPRETER",11,,INTERPRETER
+	.int INTERPRET,RDROP,INTERPRETER
+
+	defcode "INTERPRET",9,,INTERPRET
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+
+	// Is it in the dictionary?
+	call _FIND		// Returns %eax = pointer to header or 0 if not found.
+	test %eax,%eax		// Found?
+	jz 1f
+
+	// In the dictionary.  Is it an IMMEDIATE codeword?
+	mov %eax,%edi		// %edi = dictionary entry
+	movb 4(%edi),%al	// Get name+flags.
+	push %ax		// Just save it for now.
+	call _TCFA		// Convert dictionary entry (in %edi) to codeword pointer.
+	pop %ax
+	andb $F_IMMED,%al	// Is IMMED flag set?
+	mov %edi,%eax
+	jnz 3f			// If IMMED, jump straight to executing.
+
+	jmp 2f
+
+1:	// Not in the dictionary (not a word) so assume it's a number.
+	call _SNUMBER		// Returns the parsed number in %eax
+	mov %eax,%ebx
+	mov $LIT,%eax		// The word is LIT
+
+2:	// Are we compiling or executing?
+	movl var_STATE,%edx
+	test %edx,%edx
+	jz 3f			// Jump if executing.
+
+	// Compiling - just append the word to the current dictionary definition.
+	call _COMMA
+	cmp $LIT,%eax		// Was it LIT?
+	jne 4f
+	mov %ebx,%eax		// Yes, so LIT is followed by a number.
+	call _COMMA
+	NEXT
+
+3:	// Executing - run it!
+	cmp $LIT,%eax		// Literal?
+	je 4f
+	// Not a literal, execute it now.  This never returns, but the codeword will
+	// eventually call NEXT which will reenter the loop in INTERPRETER.
+	jmp *(%eax)
+
+4:	// Executing a literal, which means push it on the stack.
+	push %ebx
+	NEXT
+
+	// NB: SYSEXIT must be the last entry in the built-in dictionary.
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	.ascii "\n\
+: '\\n'   10 ;			\n\
+: ')'     41 ;			\n\
+: 'space' 32 ;			\n\
+: '\"'    34 ;			\n\
+: '-'     45 ;			\n\
+: '0'     48 ;			\n\
+				\n\
+\\ CR command			\n\
+: CR '\\n' EMIT ;		\n\
+				\n\
+CR CR '0' EMIT CR CR		\n\
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop
diff --git a/jonesforth.S.9 b/jonesforth.S.9
new file mode 100644
index 0000000..5267bb9
--- /dev/null
+++ b/jonesforth.S.9
@@ -0,0 +1,630 @@
+/* A minimal FORTH interpreter for Linux / i386 systems. -*- asm -*-
+ * By Richard W.M. Jones <rich@annexia.org>
+ *
+ * gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S
+ */
+
+#include <asm-i386/unistd.h>
+
+/* NOTES-------------------------------------------------------------------------------------------------------------------
+
+Need to say something about $ before constants.
+
+And about je/jne/ja/jb/jbe/etc
+
+
+
+
+*/
+
+/* NEXT macro. */
+	.macro NEXT
+	lodsl
+	jmp *(%eax)
+	.endm
+
+/* Macros to deal with the return stack. */
+	.macro PUSHRSP reg
+	lea -4(%ebp),%ebp	// push reg on to return stack
+	movl \reg,(%ebp)
+	.endm
+
+	.macro POPRSP reg
+	mov (%ebp),\reg		// pop top of return stack to reg
+	lea 4(%ebp),%ebp
+	.endm
+
+/* ELF entry point. */
+	.text
+	.globl _start
+_start:
+	cld
+	mov $return_stack,%ebp	// Initialise the return stack.
+
+	mov $cold_start,%esi	// Initialise interpreter.
+	NEXT			// Run interpreter!
+
+	.section .rodata
+cold_start:			// High-level code without a codeword.
+	.int COLD
+
+/* DOCOL - the interpreter! */
+	.text
+	.align 4
+DOCOL:
+	PUSHRSP %esi		// push %esi on to the return stack
+	addl $4,%eax		// %eax points to codeword, so make
+	movl %eax,%esi		// %esi point to first data word
+	NEXT
+
+/*----------------------------------------------------------------------
+ * Fixed sized buffers for everything.
+ */
+	.bss
+
+/* FORTH return stack. */
+#define RETURN_STACK_SIZE 8192
+	.align 4096
+	.space RETURN_STACK_SIZE
+return_stack:
+
+/* Space for user-defined words. */
+#define USER_DEFS_SIZE 16384
+	.align 4096
+user_defs_start:
+	.space USER_DEFS_SIZE
+
+
+
+
+
+
+/*----------------------------------------------------------------------
+ * Built-in words defined the long way.
+ */
+#define F_IMMED 0x80
+#define F_HIDDEN 0x20
+
+	// Store the chain of links.
+	.set link,0
+
+	.macro defcode name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int code_\label	// codeword
+	.text
+	.align 4
+	.globl code_\label
+code_\label :			// assembler code follows
+	.endm
+
+	.macro defword name, namelen, flags=0, label
+	.section .rodata
+	.align 4
+	.globl name_\label
+name_\label :
+	.int link		// link
+	.set link,name_\label
+	.byte \flags+\namelen	// flags + length byte
+	.ascii "\name"		// the name
+	.align 4
+	.globl \label
+\label :
+	.int DOCOL		// codeword - the interpreter
+	// list of word pointers follow
+	.endm
+
+	.macro defvar name, namelen, flags=0, label, initial=0
+	defcode \name,\namelen,\flags,\label
+	push $var_\name
+	NEXT
+	.data
+	.align 4
+var_\name :
+	.int \initial
+	.endm
+
+	// Some easy ones, written in assembly for speed
+	defcode "DROP",4,,DROP
+	pop %eax		// drop top of stack
+	NEXT
+
+	defcode "DUP",3,,DUP
+	pop %eax		// duplicate top of stack
+	push %eax
+	push %eax
+	NEXT
+
+	defcode "SWAP",4,,SWAP
+	pop %eax		// swap top of stack
+	pop %ebx
+	push %eax
+	push %ebx
+	NEXT
+
+	defcode "OVER",4,,OVER
+	mov 4(%esp),%eax	// get the second element of stack
+	push %eax		// and push it on top
+	NEXT
+
+	// COLD must not return (ie. must not call EXIT).
+	defword "COLD",4,,COLD
+	// XXX reinitialisation of the interpreter
+	.int INTERPRETER	// call the interpreter loop (never returns)
+	.int LIT,1,SYSEXIT	// hmmm, but in case it does, exit(1).
+
+	defcode "EXIT",4,,EXIT
+	POPRSP %esi		// pop return stack into %esi
+	NEXT
+
+	defcode "LIT",3,,LIT
+	// %esi points to the next command, but in this case it points to the next
+	// literal 32 bit integer.  Get that literal into %eax and increment %esi.
+	// On x86, it's a convenient single byte instruction!  (cf. NEXT macro)
+	lodsl
+	push %eax		// push the literal number on to stack
+	NEXT
+
+#if 0
+	defcode "0SKIP",5,,ZSKIP
+	// If the top of stack is zero, skip the next instruction.
+	pop %eax
+	test %eax,%eax
+	jnz 1f
+	lodsl			// this does the skip
+1:	NEXT
+#endif
+
+	defcode "!",1,,STORE
+	pop %ebx		// address to store at
+	pop %eax		// data to store there
+	mov %eax,(%ebx)		// store it
+	NEXT
+
+	defcode "@",1,,FETCH
+	pop %ebx		// address to fetch
+	mov (%ebx),%eax		// fetch it
+	push %eax		// push value onto stack
+	NEXT
+
+	// The STATE variable is 0 for execute mode, != 0 for compile mode
+	defvar "STATE",5,,STATE
+
+	// This points to where compiled words go.
+	defvar "HERE",4,,HERE,user_defs_start
+
+	// This is the last definition in the dictionary.
+	defvar "LATEST",6,,LATEST,name_SYSEXIT // SYSEXIT must be last in built-in dictionary
+
+	// _X, _Y and _Z are scratch variables used by standard words.
+	defvar "_X",2,,TX
+	defvar "_Y",2,,TY
+	defvar "_Z",2,,TZ
+
+	defcode ">R",2,,TOR
+	pop %eax		// pop parameter stack into %eax
+	PUSHRSP %eax		// push it on to the return stack
+	NEXT
+
+	defcode "R>",2,,FROMR
+	POPRSP %eax		// pop return stack on to %eax
+	push %eax		// and push on to parameter stack
+	NEXT
+
+	defcode "RSP@",4,,RSPFETCH
+	push %ebp
+	NEXT
+
+	defcode "RSP!",4,,RSPSTORE
+	pop %ebp
+	NEXT
+
+	defcode "RDROP",5,,RDROP
+	lea 4(%ebp),%ebp	// pop return stack and throw away
+	NEXT
+
+	defcode "KEY",3,,KEY
+	call _KEY
+	push %eax		// push return value on stack
+	NEXT
+_KEY:
+	mov (currkey),%ebx
+	cmp (bufftop),%ebx
+	jge 1f
+	xor %eax,%eax
+	mov (%ebx),%al
+	inc %ebx
+	mov %ebx,(currkey)
+	ret
+
+1:	// out of input; use read(2) to fetch more input from stdin
+	xor %ebx,%ebx		// 1st param: stdin
+	mov $buffer,%ecx	// 2nd param: buffer
+	mov %ecx,currkey
+	mov $buffend-buffer,%edx // 3rd param: max length
+	mov $__NR_read,%eax	// syscall: read
+	int $0x80
+	test %eax,%eax		// If %eax <= 0, then exit.
+	jbe 2f
+	addl %eax,%ecx		// buffer+%eax = bufftop
+	mov %ecx,bufftop
+	jmp _KEY
+
+2:	// error or out of input: exit
+	xor %ebx,%ebx
+	mov $__NR_exit,%eax	// syscall: exit
+	int $0x80
+
+	defcode "EMIT",4,,EMIT
+	pop %eax
+	call _EMIT
+	NEXT
+_EMIT:
+	mov $1,%ebx		// 1st param: stdout
+
+	// write needs the address of the byte to write
+	mov %al,(2f)
+	mov $2f,%ecx		// 2nd param: address
+
+	mov $1,%edx		// 3rd param: nbytes = 1
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+	ret
+
+	.bss
+2:	.space 1		// scratch used by EMIT
+
+	defcode "WORD",4,,WORD
+	call _WORD
+	push %ecx		// push length
+	push %edi		// push base address
+	NEXT
+
+_WORD:
+	/* Search for first non-blank character.  Also skip \ comments. */
+1:
+	call _KEY		// get next key, returned in %eax
+	cmpb $'\\',%al		// start of a comment?
+	je 3f			// if so, skip the comment
+	cmpb $' ',%al
+	jbe 1b			// if so, keep looking
+
+	/* Search for the end of the word, storing chars as we go. */
+	mov $5f,%edi		// pointer to return buffer
+2:
+	stosb			// add character to return buffer
+	call _KEY		// get next key, returned in %al
+	cmpb $' ',%al		// is blank?
+	ja 2b			// if not, keep looping
+
+	/* Return the word (well, the static buffer) and length. */
+	sub $5f,%edi
+	mov %edi,%ecx		// return length of the word
+	mov $5f,%edi		// return address of the word
+	ret
+
+	/* Code to skip \ comments to end of the current line. */
+3:
+	call _KEY
+	cmpb $'\n',%al		// end of line yet?
+	jne 3b
+	jmp 1b
+
+	.bss
+	// A static buffer where WORD returns.  Subsequent calls
+	// overwrite this buffer.  Maximum word length is 32 chars.
+5:	.space 32
+
+	defcode "EMITWORD",8,,EMITWORD
+	mov $1,%ebx		// 1st param: stdout
+	pop %ecx		// 2nd param: address of string
+	pop %edx		// 3rd param: length of string
+
+	mov $__NR_write,%eax	// write syscall
+	int $0x80
+
+	NEXT
+
+	defcode ".",1,,DOT
+	pop %eax		// Get the number to print into %eax
+	call _DOT		// Easier to do this recursively ...
+	NEXT
+_DOT:
+	mov $10,%ecx		// Base 10
+	cmp %ecx,%eax
+	jb 1f
+	pushl %eax
+	xor %edx,%edx
+	idivl %ecx
+	call _DOT
+	popl %eax
+1:
+	aam $10
+	cbw
+	cwde
+	addl $'0',%eax
+	call _EMIT
+	ret
+
+	// Parse a number from a string on the stack -- almost the opposite of . (DOT)
+	// Note that there is absolutely no error checking.  In particular the length of the
+	// string must be >= 1 bytes.
+	defcode "SNUMBER",7,,SNUMBER
+	pop %edi
+	pop %ecx
+	call _SNUMBER
+	push %eax
+	NEXT
+_SNUMBER:
+	xor %eax,%eax
+	xor %ebx,%ebx
+1:
+	imull $10,%eax		// %eax *= 10
+	movb (%edi),%bl
+	inc %edi
+	subb $'0',%bl		// ASCII -> digit
+	add %ebx,%eax
+	dec %ecx
+	jnz 1b
+	ret
+
+	defcode "FIND",4,,FIND
+	pop %edi		// %edi = address
+	pop %ecx		// %ecx = length
+	call _FIND
+	push %eax
+	NEXT
+
+_FIND:
+	push %esi		// Save %esi so we can use it in string comparison.
+
+	// Now we start searching backwards through the dictionary for this word.
+	mov var_LATEST,%edx	// LATEST points to name header of the latest word in the dictionary
+1:
+	test %edx,%edx		// NULL pointer?  (end of the linked list)
+	je 4f
+
+	// Compare the length expected and the length of the word.
+	// Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery
+	// this won't pick the word (the length will appear to be wrong).
+	xor %eax,%eax
+	movb 4(%edx),%al	// %al = flags+length field
+	andb $(F_HIDDEN|0x1f),%al // %al = name length
+	cmpb %cl,%al		// Length is the same?
+	jne 2f
+
+	// Compare the strings in detail.
+	push %ecx		// Save the length
+	push %edi		// Save the address (repe cmpsb will move this pointer)
+	lea 5(%edx),%esi	// Dictionary string we are checking against.
+	repe cmpsb		// Compare the strings.
+	pop %edi
+	pop %ecx
+	jne 2f			// Not the same.
+
+	// The strings are the same - return the header pointer in %eax
+	pop %esi
+	mov %edx,%eax
+	ret
+
+2:
+	mov (%edx),%edx		// Move back through the link field to the previous word
+	jmp 1b			// .. and loop.
+
+4:	// Not found.
+	pop %esi
+	xor %eax,%eax		// Return zero to indicate not found.
+	ret
+
+	defcode ">CFA",4,,TCFA	// DEA -> Codeword address
+	pop %edi
+	call _TCFA
+	push %edi
+	NEXT
+_TCFA:
+	xor %eax,%eax
+	add $4,%edi		// Skip link pointer.
+	movb (%edi),%al		// Load flags+len into %al.
+	inc %edi		// Skip flags+len byte.
+	andb $0x1f,%al		// Just the length, not the flags.
+	add %eax,%edi		// Skip the name.
+	addl $3,%edi		// The codeword is 4-byte aligned.
+	andl $~3,%edi
+	ret
+
+#if 0
+	defword "'",1,,TICK
+	.int WORD		// Get the following word.
+	.int FIND		// Look it up in the dictionary.
+	.int DUP		// If not found, skip >CFA (TCFA) instruction.
+	.int ZSKIP
+	.int TCFA		// Convert to a codeword pointer.
+	.int EXIT		// Return.
+#endif
+
+	defcode ":",1,,COLON
+
+	// Get the word and create a dictionary entry header for it.
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+	mov %edi,%ebx		// %ebx = address of the word
+
+	movl var_HERE,%edi	// %edi is the address of the header
+	movl var_LATEST,%eax	// Get link pointer
+	stosl			// and store it in the header.
+
+	mov %cl,%al		// Get the length.
+	orb $F_HIDDEN,%al	// Set the HIDDEN flag on this entry.
+	stosb			// Store the length/flags byte.
+	push %esi
+	mov %ebx,%esi		// %esi = word
+	rep movsb		// Copy the word
+	pop %esi
+	addl $3,%edi		// Align to next 4 byte boundary.
+	andl $~3,%edi
+
+	movl $DOCOL,%eax	// The codeword for user-created words is always DOCOL (the interpreter)
+	stosl
+
+	// Header built, so now update LATEST and HERE.
+	// We'll be compiling words and putting them HERE.
+	movl var_HERE,%eax
+	movl %eax,var_LATEST
+	movl %edi,var_HERE
+
+	// And go into compile mode by setting STATE to 1.
+	xor %eax,%eax
+	inc %eax
+	mov %eax,var_STATE
+	NEXT
+
+	defcode ",",1,,COMMA
+	pop %eax		// Code pointer to store.
+	call _COMMA
+	NEXT
+_COMMA:
+	movl var_HERE,%edi	// HERE
+	stosl			// Store it.
+	movl %edi,var_HERE	// Update HERE (incremented)
+	ret
+
+	defcode "HIDDEN",6,,HIDDEN
+	call _HIDDEN
+	NEXT
+_HIDDEN:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_HIDDEN,(%edi)	// Toggle the HIDDEN bit.
+	ret
+
+	defcode "IMMEDIATE",9,F_IMMED,IMMEDIATE
+	call _IMMEDIATE
+	NEXT
+_IMMEDIATE:
+	movl var_LATEST,%edi	// LATEST word.
+	addl $4,%edi		// Point to name/flags byte.
+	xorb $F_IMMED,(%edi)	// Toggle the IMMED bit.
+	ret
+
+	defcode ";",1,F_IMMED,SEMICOLON
+	movl $EXIT,%eax		// EXIT is the final codeword in compiled words.
+	call _COMMA		// Store it.
+	call _HIDDEN		// Toggle the HIDDEN flag (unhides the new word).
+	xor %eax,%eax		// Set STATE to 0 (back to execute mode).
+	movl %eax,var_STATE
+	NEXT
+
+/* This interpreter is pretty simple, but remember that in FORTH you can always override
+ * it later with a more powerful one!
+ */
+	defword "INTERPRETER",11,,INTERPRETER
+	.int INTERPRET,RDROP,INTERPRETER
+
+	defcode "INTERPRET",9,,INTERPRET
+	call _WORD		// Returns %ecx = length, %edi = pointer to word.
+
+	// Is it in the dictionary?
+	call _FIND		// Returns %eax = pointer to header or 0 if not found.
+	test %eax,%eax		// Found?
+	jz 1f
+
+	// In the dictionary.  Is it an IMMEDIATE codeword?
+	mov %eax,%edi		// %edi = dictionary entry
+	movb 4(%edi),%al	// Get name+flags.
+	push %ax		// Just save it for now.
+	call _TCFA		// Convert dictionary entry (in %edi) to codeword pointer.
+	pop %ax
+	andb $F_IMMED,%al	// Is IMMED flag set?
+	mov %edi,%eax
+	jnz 3f			// If IMMED, jump straight to executing.
+
+	jmp 2f
+
+1:	// Not in the dictionary (not a word) so assume it's a number.
+	call _SNUMBER		// Returns the parsed number in %eax
+	mov %eax,%ebx
+	mov $LIT,%eax		// The word is LIT
+
+2:	// Are we compiling or executing?
+	movl var_STATE,%edx
+	test %edx,%edx
+	jz 3f			// Jump if executing.
+
+	// Compiling - just append the word to the current dictionary definition.
+	call _COMMA
+	cmp $LIT,%eax		// Was it LIT?
+	jne 4f
+	mov %ebx,%eax		// Yes, so LIT is followed by a number.
+	call _COMMA
+	NEXT
+
+3:	// Executing - run it!
+	cmp $LIT,%eax		// Literal?
+	je 4f
+	// Not a literal, execute it now.  This never returns, but the codeword will
+	// eventually call NEXT which will reenter the loop in INTERPRETER.
+	jmp *(%eax)
+
+4:	// Executing a literal, which means push it on the stack.
+	push %ebx
+	NEXT
+
+	// NB: SYSEXIT must be the last entry in the built-in dictionary.
+	defcode SYSEXIT,7,,SYSEXIT
+	pop %ebx
+	mov $__NR_exit,%eax
+	int $0x80
+
+/*----------------------------------------------------------------------
+ * Input buffer & initial input.
+ */
+	.data
+	.align 4096
+buffer:
+	// XXX gives 'Warning: unterminated string; newline inserted' messages which you can ignore
+	.ascii "\
+\\ Define some character constants
+: '\\n'   10 ;
+: 'SPACE' 32 ;
+: '\"'    34 ;
+
+\\ CR prints a carriage return
+: CR '\\n' EMIT ;
+
+\\ SPACE prints a space
+: SPACE 'SPACE' EMIT ;
+
+\\ Primitive . (DOT) function doesn't follow with a blank, so redefine it to behave like FORTH.
+\\ Notice how we can trivially redefine existing functions.
+: . . SPACE ;
+
+\\ XXX SPACES
+
+\\ DUP, DROP are defined in assembly for speed, but this is how you might define them
+\\ in FORTH.  Notice use of the scratch variables _X and _Y.
+\\ : DUP _X ! _X @ _X @ ;
+\\ : DROP _X ! ;
+
+
+
+\\ Finally print the welcome prompt.
+79 EMIT 75 EMIT 'SPACE' EMIT
+"
+
+_initbufftop:
+	.align 4096
+buffend:
+
+currkey:
+	.int buffer
+bufftop:
+	.int _initbufftop