--- /dev/null
+# Mark objects as 'ancient' so they are taken out of the OCaml heap.
+# $Id: Makefile,v 1.1 2006-09-27 12:07:07 rich Exp $
+
+CC := gcc
+CFLAGS := -g -fPIC -Wall -Werror
+
+OCAMLCFLAGS := -g
+OCAMLCPACKAGES :=
+OCAMLCLIBS :=
+
+OCAMLOPTFLAGS :=
+OCAMLOPTPACKAGES := $(OCAMLCPACKAGES)
+OCAMLOPTLIBS :=
+
+OCAMLDOCFLAGS := -html -stars -sort $(OCAMLCPACKAGES)
+
+all: ancient.cma ancient.cmxa test_ancient.opt META
+
+ancient.cma: ancient.cmo ancient_c.o
+ ocamlmklib -o ancient $^
+
+ancient.cmxa: ancient.cmx ancient_c.o
+ ocamlmklib -o ancient $^
+
+test_ancient.opt: ancient.cmxa test_ancient.cmx
+ LIBRARY_PATH=.:$$LIBRARY_PATH \
+ ocamlfind ocamlopt $(OCAMLOPTFLAGS) $(OCAMLOPTLIBS) -o $@ $^
+
+# Common rules for building OCaml objects.
+
+.mli.cmi:
+ ocamlfind ocamlc $(OCAMLCFLAGS) $(OCAMLCINCS) $(OCAMLCPACKAGES) -c $<
+.ml.cmo:
+ ocamlfind ocamlc $(OCAMLCFLAGS) $(OCAMLCINCS) $(OCAMLCPACKAGES) -c $<
+.ml.cmx:
+ ocamlfind ocamlopt $(OCAMLOPTFLAGS) $(OCAMLOPTINCS) $(OCAMLOPTPACKAGES) -c $<
+
+# Findlib META file.
+
+META: META.in Makefile.config
+ $(SED) -e 's/@PACKAGE@/$(PACKAGE)/' \
+ -e 's/@VERSION@/$(VERSION)/' \
+ < $< > $@
+
+# Clean.
+
+clean:
+ rm -f *.cmi *.cmo *.cmx *.cma *.cmxa *.o *.a *.so *~ core META *.opt
+
+# Dependencies.
+
+depend: .depend
+
+.depend: $(wildcard *.mli) $(wildcard *.ml)
+ rm -f .depend
+ ocamldep $^ > $@
+
+ifeq ($(wildcard .depend),.depend)
+include .depend
+endif
+
+# Install.
+
+install:
+ rm -rf $(DESTDIR)$(OCAMLLIBDIR)/ancient
+ install -c -m 0755 -d $(DESTDIR)$(OCAMLLIBDIR)/weblogs
+ install -c -m 0644 *.cmi *.mli *.cma *.cmxa *.a META \
+ $(DESTDIR)$(OCAMLLIBDIR)/ancient
+
+# Distribution.
+
+dist:
+ $(MAKE) check-manifest
+ rm -rf $(PACKAGE)-$(VERSION)
+ mkdir $(PACKAGE)-$(VERSION)
+ tar -cf - -T MANIFEST | tar -C $(PACKAGE)-$(VERSION) -xf -
+ tar zcf $(PACKAGE)-$(VERSION).tar.gz $(PACKAGE)-$(VERSION)
+ rm -rf $(PACKAGE)-$(VERSION)
+ ls -l $(PACKAGE)-$(VERSION).tar.gz
+
+check-manifest:
+ @for d in `find -type d -name CVS | grep -v '^\./debian/'`; \
+ do \
+ b=`dirname $$d`/; \
+ awk -F/ '$$1 != "D" {print $$2}' $$d/Entries | \
+ sed -e "s|^|$$b|" -e "s|^\./||"; \
+ done | sort > .check-manifest; \
+ sort MANIFEST > .orig-manifest; \
+ diff -u .orig-manifest .check-manifest; rv=$$?; \
+ rm -f .orig-manifest .check-manifest; \
+ exit $$rv
+
+# Debian packages.
+
+dpkg:
+ @if [ 0 != `cvs -q update | wc -l` ]; then \
+ echo Please commit all changes to CVS first.; \
+ exit 1; \
+ fi
+ $(MAKE) dist
+ rm -rf /tmp/dbuild
+ mkdir /tmp/dbuild
+ cp $(PACKAGE)-$(VERSION).tar.gz \
+ /tmp/dbuild/$(PACKAGE)_$(VERSION).orig.tar.gz
+ export CVSROOT=`cat CVS/Root`; \
+ cd /tmp/dbuild && \
+ cvs export \
+ -d $(PACKAGE)-$(VERSION) \
+ -D now merjis/freeware/ancient
+ cd /tmp/dbuild/$(PACKAGE)-$(VERSION) && dpkg-buildpackage -rfakeroot
+ rm -rf /tmp/dbuild/$(PACKAGE)-$(VERSION)
+ ls -l /tmp/dbuild
+
+# Developer documentation (in html/ subdirectory).
+
+doc:
+ rm -rf html
+ mkdir html
+ -ocamlfind ocamldoc $(OCAMLDOCFLAGS) -d html ancient.ml{i,}
+
+.PHONY: depend dist check-manifest dpkg doc
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli
--- /dev/null
+/* Mark objects as 'ancient' so they are taken out of the OCaml heap.
+ * $Id: ancient_c.c,v 1.1 2006-09-27 12:07:07 rich Exp $
+ */
+
+#include <string.h>
+#include <assert.h>
+
+#include <caml/config.h>
+#include <caml/memory.h>
+#include <caml/alloc.h>
+#include <caml/mlvalues.h>
+#include <caml/fail.h>
+
+// From byterun/misc.h:
+typedef char * addr;
+
+// From byterun/major_gc.h:
+#ifdef __alpha
+typedef int page_table_entry;
+#else
+typedef char page_table_entry;
+#endif
+CAMLextern char *caml_heap_start;
+CAMLextern char *caml_heap_end;
+CAMLextern page_table_entry *caml_page_table;
+extern asize_t caml_page_low, caml_page_high;
+
+#define In_heap 1
+#define Not_in_heap 0
+#define Page(p) ((uintnat) (p) >> Page_log)
+#define Is_in_heap(p) \
+ (assert (Is_block ((value) (p))), \
+ (addr)(p) >= (addr)caml_heap_start && (addr)(p) < (addr)caml_heap_end \
+ && caml_page_table [Page (p)])
+
+// Area is an expandable buffer, allocated on the C heap.
+typedef struct area {
+ void *ptr; // Start of area.
+ size_t n; // Current position.
+ size_t size; // Allocated size.
+} area;
+
+static inline void
+area_init (area *a)
+{
+ a->ptr = 0;
+ a->n =
+ a->size = 0;
+}
+
+static inline int
+area_append (area *a, const void *obj, size_t size)
+{
+ while (a->n + size > a->size) {
+ if (a->size == 0) a->size = 256; else a->size <<= 1;
+ a->ptr = realloc (a->ptr, a->size);
+ if (a->ptr == 0) return -1; // Out of memory.
+ }
+ memcpy (a->ptr + a->n, obj, size);
+ a->n += size;
+ return 0;
+}
+
+static inline void
+area_shrink (area *a)
+{
+ if (a->n != a->size) {
+ a->size = a->n;
+ a->ptr = realloc (a->ptr, a->size);
+ assert (a->ptr); // Getting smaller, so shouldn't really fail.
+ }
+}
+
+static inline void
+area_free (area *a)
+{
+ free (a->ptr);
+ a->n =
+ a->size = 0;
+}
+
+struct restore_item {
+ char *header;
+ value field_zero;
+};
+
+// When a block is visited, we overwrite the header with all 1's.
+// This is not quite an impossible value - one could imagine an
+// enormous custom block where the header could take on this
+// value. (XXX)
+static header_t visited = (unsigned long) -1;
+
+// The general plan here:
+//
+// 1. Starting at [obj], copy it to our out-of-heap memory area
+// defined by [ptr].
+// 2. Recursively visit subnodes of [obj] and do the same.
+// 3. As we copy each object, we avoid circularity by setting that
+// object's header to a special 'visited' value. However since these
+// are objects in the Caml heap we have to restore the original
+// headers at the end, which is the purpose of the [restore] area.
+//
+// XXX Recursive function will probably fall over once we apply it to
+// large, deeply recursive structures. Should be replaced with something
+// iterative.
+static size_t
+mark (value obj, area *ptr, area *restore)
+{
+ char *header = Hp_val (obj);
+ assert (Wosize_hp (header) > 0); // Always true? (XXX)
+
+ // We can't handle out-of-heap objects.
+ // XXX Since someone might try to mark an ancient object, they
+ // might get this error, so we should try to do better here.
+ assert (Is_in_heap (obj));
+
+ // If we've already visited this object, just return its offset
+ // in the out-of-heap memory.
+ if (memcmp (header, &visited, sizeof visited) == 0)
+ return (Long_val (Field (obj, 0)));
+
+ // Offset where we will store this object in the out-of-heap memory.
+ size_t offset = ptr->n;
+
+ // Copy the object out of the OCaml heap.
+ size_t bytes = Bhsize_hp (header);
+ if (area_append (ptr, header, bytes) == -1)
+ return -1; // Error out of memory.
+
+ // Scan the fields looking for pointers to blocks.
+ int can_scan = Tag_val (obj) < No_scan_tag;
+ if (can_scan) {
+ mlsize_t nr_words = Wosize_hp (header);
+ mlsize_t i;
+
+ for (i = 0; i < nr_words; ++i) {
+ value field = Field (obj, i);
+
+ if (Is_block (field)) {
+ size_t field_offset = mark (field, ptr, restore);
+ if (field_offset == -1) return -1; // Propagate out of memory errors.
+
+ // Since the recursive call to mark above can reallocate the
+ // area, we need to recompute these each time round the loop.
+ char *obj_copy_header = ptr->ptr + offset;
+ value obj_copy = Val_hp (obj_copy_header);
+
+ // Don't store absolute pointers yet because realloc will
+ // move the memory around. Store a fake pointer instead.
+ // We'll fix up these fake pointers afterwards.
+ Field (obj_copy, i) = (field_offset + sizeof (header_t)) << 2;
+ }
+ }
+ }
+
+ // Mark this object as having been "visited", but keep track of
+ // what was there before so it can be restored. We also need to
+ // record the offset.
+ // Observations:
+ // (1) What was in the header before is kept in the out-of-heap
+ // copy, so we don't explicitly need to remember that.
+ // (2) We can keep the offset in the zeroth field, but since
+ // the code above might have modified the copy, we need to remember
+ // what was in that field before.
+ // (3) We can overwrite the header with all 1's to indicate that
+ // we've visited (but see notes on 'static header_t visited' above).
+ // (4) All objects in OCaml are at least one word long (we hope!).
+ struct restore_item restore_item;
+ restore_item.header = header;
+ restore_item.field_zero = Field (obj, 0);
+ area_append (restore, &restore_item, sizeof restore_item);
+
+ memcpy (header, (void *)&visited, sizeof visited);
+ Field (obj, 0) = Val_long (offset);
+
+ return offset;
+}
+
+// See comments immediately above.
+static void
+do_restore (area *ptr, area *restore)
+{
+ mlsize_t i;
+ for (i = 0; i < restore->n; i += sizeof (struct restore_item))
+ {
+ struct restore_item *restore_item =
+ (struct restore_item *)(restore->ptr + i);
+ assert (memcmp (restore_item->header, &visited, sizeof visited) == 0);
+
+ value obj = Val_hp (restore_item->header);
+ size_t offset = Long_val (Field (obj, 0));
+
+ char *obj_copy_header = ptr->ptr + offset;
+ //value obj_copy = Val_hp (obj_copy_header);
+
+ // Restore the original header.
+ memcpy (restore_item->header, obj_copy_header, sizeof visited);
+
+ // Restore the original zeroth field.
+ Field (obj, 0) = restore_item->field_zero;
+ }
+}
+
+CAMLprim value
+ancient_mark (value obj)
+{
+ CAMLparam1 (obj);
+ CAMLlocal1 (proxy);
+
+ area ptr; // This will be the out of heap area.
+ area_init (&ptr);
+ area restore; // Headers to be fixed up after.
+ area_init (&restore);
+
+ if (mark (obj, &ptr, &restore) == -1) {
+ // Ran out of memory. Recover and throw an exception.
+ do_restore (&ptr, &restore);
+ area_free (&restore);
+ area_free (&ptr);
+ caml_failwith ("out of memory");
+ }
+ area_shrink (&ptr);
+
+ // Restore Caml heap structures.
+ do_restore (&ptr, &restore);
+ area_free (&restore);
+
+ // Update all fake pointers in the out of heap area to make them real
+ // pointers.
+ size_t i;
+ for (i = 0; i < ptr.n; )
+ {
+ // Out of heap area is: header, fields, header, fields, ...
+ // The header of each object tells us how many fields it has.
+ char *header = ptr.ptr + i;
+ size_t bytes = Bhsize_hp (header);
+ value obj = Val_hp (header);
+
+ int can_scan = Tag_val (obj) < No_scan_tag;
+ if (can_scan) {
+ mlsize_t nr_words = Wosize_hp (header);
+ mlsize_t j;
+
+ for (j = 0; j < nr_words; ++j) {
+ value field = Field (obj, j);
+
+ if (Is_block (field)) {
+ size_t field_offset = field >> 2;
+ void *field_ptr = ptr.ptr + field_offset;
+ Field (obj, j) = (value) field_ptr;
+ }
+ }
+ }
+
+ i += bytes; // Skip to next object.
+ }
+
+ // Replace obj with a proxy.
+ proxy = caml_alloc (1, Abstract_tag);
+ Field (proxy, 0) = (value) ptr.ptr;
+
+ CAMLreturn (proxy);
+}
+
+CAMLprim value
+ancient_follow (value obj)
+{
+ CAMLparam1 (obj);
+ CAMLlocal1 (v);
+
+ v = Field (obj, 0);
+ if (Is_long (v)) caml_invalid_argument ("deleted");
+ v = Val_hp (v); // v points to the header; make it point to the object.
+
+ CAMLreturn (v);
+}
+
+CAMLprim value
+ancient_delete (value obj)
+{
+ CAMLparam1 (obj);
+ CAMLlocal1 (v);
+
+ v = Field (obj, 0);
+ if (Is_long (v)) caml_invalid_argument ("deleted");
+
+ // Otherwise v is a pointer to the out of heap malloc'd object.
+ assert (!Is_in_heap (v));
+ free ((void *) v);
+
+ // Replace the proxy (a pointer) with an int 0 so we know it's
+ // been deleted in future.
+ Field (obj, 0) = Val_long (0);
+
+ CAMLreturn (Val_unit);
+}
--- /dev/null
+(* Very basic tests of Ancient module.
+ * $Id: test_ancient.ml,v 1.1 2006-09-27 12:07:07 rich Exp $
+ *)
+
+open Printf
+
+type item = {
+ name : string;
+ dob : string;
+ address : string;
+ phone : string option;
+ marital_status : marital_status;
+ id : int;
+}
+and marital_status = Single | Married | Divorced
+
+let gc_compact () =
+ eprintf "compacting ... %!";
+ Gc.compact ();
+ let stat = Gc.stat () in
+ let live_words = stat.Gc.live_words in
+ eprintf "live words = %d (%d MB)\n%!"
+ live_words (live_words * 8 / 1024 / 1024)
+
+let random_string () =
+ let n = 1 + Random.int 20 in
+ let str = String.create n in
+ for i = 0 to n-1 do
+ let c = 97 + Random.int 26 in
+ let c = Char.chr c in
+ str.[i] <- c
+ done;
+ str
+
+let random_string_option () =
+ if Random.int 3 = 1 then None else Some (random_string ())
+
+let random_marital_status () =
+ match Random.int 3 with
+ | 0 -> Single
+ | 1 -> Married
+ | _ -> Divorced
+
+let rec output_data chan data =
+ let n = Array.length data in
+ for i = 0 to n-1; do
+ output_item chan data.(i)
+ done
+
+and output_item chan item =
+ fprintf chan "id = %d\n%!" item.id;
+ fprintf chan "\tname = %s\n%!" item.name;
+ fprintf chan "\tdob = %s\n%!" item.dob;
+ fprintf chan "\taddress = %s\n%!" item.address;
+ fprintf chan "\tphone = %s\n%!"
+ (match item.phone with
+ | None -> "None"
+ | Some str -> "Some " ^ str);
+ fprintf chan "\tmarital_status = %s\n%!"
+ (string_of_marital_status item.marital_status)
+
+and string_of_marital_status status =
+ match status with
+ | Single -> "Single"
+ | Married -> "Married"
+ | Divorced -> "Divorced"
+
+let () =
+ eprintf "Before allocating data on OCaml heap ...\n";
+ gc_compact ();
+ let data =
+ Array.init 100000 (
+ fun id ->
+ { id = id;
+ name = random_string ();
+ dob = random_string ();
+ address = random_string ();
+ phone = random_string_option ();
+ marital_status = random_marital_status () }
+ ) in
+ eprintf "After allocating data on OCaml heap ...\n";
+ gc_compact ();
+
+ let chan = open_out "test_ancient.out1" in
+ output_data chan data;
+ close_out chan;
+
+ let data = Ancient.mark data in
+ eprintf "After marking data as ancient ...\n";
+ gc_compact ();
+
+ let data = Ancient.follow data in
+ eprintf "Number of elements in array = %d\n" (Array.length data);
+
+ let chan = open_out "test_ancient.out2" in
+ output_data chan data;
+ close_out chan;
+
+ eprintf "After writing out ancient data ...\n";
+ gc_compact ()