X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=jonesforth.S;h=681f9a89fcb319f1292ffbef962d7db9c4283c96;hb=194e3f36f539ff90540bfb780cfbd424d7f6aaec;hp=de5bbe3001f594391e570c2fbab5d4c7f7f5cb15;hpb=31a2023bf2670b9a15629f873cbb0ef2ae28bcd3;p=jonesforth.git diff --git a/jonesforth.S b/jonesforth.S index de5bbe3..681f9a8 100644 --- a/jonesforth.S +++ b/jonesforth.S @@ -1,11 +1,11 @@ /* A sometimes minimal FORTH compiler and tutorial for Linux / i386 systems. -*- asm -*- By Richard W.M. Jones http://annexia.org/forth This is PUBLIC DOMAIN (see public domain release statement below). - $Id: jonesforth.S,v 1.28 2007-09-24 00:18:19 rich Exp $ + $Id: jonesforth.S,v 1.33 2007-09-26 22:47:49 rich Exp $ gcc -m32 -nostdlib -static -Wl,-Ttext,0 -o jonesforth jonesforth.S */ - .set JONES_VERSION,28 + .set JONES_VERSION,32 /* INTRODUCTION ---------------------------------------------------------------------- @@ -45,7 +45,8 @@ over every other element in a list of numbers? You can add it to the language. What about an operator which pulls in variables directly from a configuration file and makes them available as FORTH variables? Or how about adding Makefile-like dependencies to - the language? No problem in FORTH. This concept isn't common in programming languages, + the language? No problem in FORTH. How about modifying the FORTH compiler to allow + complex inlining strategies -- simple. This concept isn't common in programming languages, but it has a name (in fact two names): "macros" (by which I mean LISP-style macros, not the lame C preprocessor) and "domain specific languages" (DSLs). @@ -74,8 +75,14 @@ This code draws heavily on the design of LINA FORTH (http://home.hccnet.nl/a.w.m.van.der.horst/lina.html) by Albert van der Horst. Any similarities in the code are probably not accidental. - Also I used this document (http://ftp.funet.fi/pub/doc/IOCCC/1992/buzzard.2.design) which really - defies easy explanation. + Some parts of this FORTH are also based on this IOCCC entry from 1992: + http://ftp.funet.fi/pub/doc/IOCCC/1992/buzzard.2.design. + I was very proud when Sean Barrett, the original author of the IOCCC entry, commented in the LtU thread + http://lambda-the-ultimate.org/node/2452#comment-36818 about this FORTH. + + And finally I'd like to acknowledge the (possibly forgotten?) authors of ARTIC FORTH because their + original program which I still have on original cassette tape kept nagging away at me all these years. + http://en.wikipedia.org/wiki/Artic_Software PUBLIC DOMAIN ---------------------------------------------------------------------- @@ -600,7 +607,7 @@ bufftop: /* BUILT-IN WORDS ---------------------------------------------------------------------- - Remember our dictionary entries (headers). Let's bring those together with the codeword + Remember our dictionary entries (headers)? Let's bring those together with the codeword and data words to see how : DOUBLE DUP + ; really looks in memory. pointer to previous word @@ -783,20 +790,18 @@ code_\label : // assembler code follows push %eax // ignore overflow NEXT - defcode "/",1,,DIV - xor %edx,%edx - pop %ebx - pop %eax - idivl %ebx - push %eax // push quotient - NEXT +/* + In this FORTH, only /MOD is primitive. Later we will define the / and MOD words in + terms of the primitive /MOD. +*/ - defcode "MOD",3,,MOD + defcode "/MOD",4,,DIVMOD xor %edx,%edx pop %ebx pop %eax idivl %ebx push %edx // push remainder + push %eax // push quotient NEXT defcode "=",1,,EQU // top two words are equal? @@ -877,7 +882,7 @@ code_\label : // assembler code follows 1: pushl $1 NEXT - defcode "0<",2,,ZLT + defcode "0<",2,,ZLT // comparisons with 0 pop %eax test %eax,%eax jl 1f @@ -913,22 +918,22 @@ code_\label : // assembler code follows 1: pushl $1 NEXT - defcode "AND",3,,AND + defcode "AND",3,,AND // bitwise AND pop %eax andl %eax,(%esp) NEXT - defcode "OR",2,,OR + defcode "OR",2,,OR // bitwise OR pop %eax orl %eax,(%esp) NEXT - defcode "XOR",3,,XOR + defcode "XOR",3,,XOR // bitwise XOR pop %eax xorl %eax,(%esp) NEXT - defcode "INVERT",6,,INVERT // this is the FORTH bitwise "NOT" function + defcode "INVERT",6,,INVERT // this is the FORTH bitwise "NOT" function (cf. NEGATE) notl (%esp) NEXT @@ -977,7 +982,7 @@ code_\label : // assembler code follows | addr of EXIT | +------------------+ - And NEXT just completes the job by, well in this case just by calling DOUBLE again :-) + And NEXT just completes the job by, well, in this case just by calling DOUBLE again :-) LITERALS ---------------------------------------------------------------------- @@ -1045,18 +1050,20 @@ code_\label : // assembler code follows subl %eax,(%ebx) // add it NEXT -/* ! and @ (STORE and FETCH) store 32-bit words. It's also useful to be able to read and write bytes. - * I don't know whether FORTH has these words, so I invented my own, called !b and @b. - * Byte-oriented operations only work on architectures which permit them (i386 is one of those). - * UPDATE: writing a byte to the dictionary pointer is called C, in FORTH. +/* + ! and @ (STORE and FETCH) store 32-bit words. It's also useful to be able to read and write bytes + so we also define standard words C@ and C!. + + Byte-oriented operations only work on architectures which permit them (i386 is one of those). */ - defcode "!b",2,,STOREBYTE + + defcode "C!",2,,STOREBYTE pop %ebx // address to store at pop %eax // data to store there movb %al,(%ebx) // store it NEXT - defcode "@b",2,,FETCHBYTE + defcode "C@",2,,FETCHBYTE pop %ebx // address to fetch xor %eax,%eax movb (%ebx),%al // fetch it @@ -1123,7 +1130,7 @@ var_\name : DOCOL Pointer to DOCOL. F_IMMED The IMMEDIATE flag's actual value. F_HIDDEN The HIDDEN flag's actual value. - F_LENMASK The length mask. + F_LENMASK The length mask in the flags/len byte. */ .macro defconst name, namelen, flags=0, label, value @@ -1339,36 +1346,9 @@ _WORD: 5: .space 32 /* - . (also called DOT) prints the top of the stack as an integer in the current BASE. -*/ - - defcode ".",1,,DOT - pop %eax // Get the number to print into %eax - call _DOT // Easier to do this recursively ... - NEXT -_DOT: - mov var_BASE,%ecx // Get current BASE -1: - cmp %ecx,%eax // %eax < BASE? If so jump to print immediately. - jb 2f - xor %edx,%edx // %edx:%eax / %ecx -> quotient %eax, remainder %edx - idivl %ecx - pushl %edx // Print quotient (top half) first ... - call _DOT - popl %eax // ... then loop to print remainder - jmp 1b -2: // %eax < BASE so print immediately. - movl $digits,%edx - addl %eax,%edx - movb (%edx),%al // Note top bits are already zero. - call _EMIT - ret - .section .rodata -digits: .ascii "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" - -/* - Almost the opposite of DOT (but not quite), SNUMBER parses a numeric string such as one returned - by WORD and pushes the number on the parameter stack. + As well as reading in words we'll need to read in numbers and for that we are using a function + called SNUMBER. This parses a numeric string such as one returned by WORD and pushes the + number on the parameter stack. This function does absolutely no error checking, and in particular the length of the string must be >= 1 bytes, and should contain only digits 0-9. If it doesn't you'll get random results.