From f5645e2de53ff5935195cdb086d46feda4eff705 Mon Sep 17 00:00:00 2001 From: rich Date: Wed, 27 Sep 2006 12:07:07 +0000 Subject: [PATCH 1/1] 'Ancient' generation in garbage collector. --- .cvsignore | 8 ++ .depend | 4 + Makefile | 123 +++++++++++++++++++++++ ancient.ml | 11 +++ ancient.mli | 31 ++++++ ancient_c.c | 296 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test_ancient.ml | 100 +++++++++++++++++++ 7 files changed, 573 insertions(+) create mode 100644 .cvsignore create mode 100644 .depend create mode 100644 Makefile create mode 100644 ancient.ml create mode 100644 ancient.mli create mode 100644 ancient_c.c create mode 100644 test_ancient.ml diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 0000000..aaded14 --- /dev/null +++ b/.cvsignore @@ -0,0 +1,8 @@ +*.cmi +*.cmo +*.cmx +*.cma +*.cmxa +*.opt +test_ancient.out1 +test_ancient.out2 diff --git a/.depend b/.depend new file mode 100644 index 0000000..648baca --- /dev/null +++ b/.depend @@ -0,0 +1,4 @@ +ancient.cmo: ancient.cmi +ancient.cmx: ancient.cmi +test_ancient.cmo: ancient.cmi +test_ancient.cmx: ancient.cmx diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e9c818c --- /dev/null +++ b/Makefile @@ -0,0 +1,123 @@ +# Mark objects as 'ancient' so they are taken out of the OCaml heap. +# $Id: Makefile,v 1.1 2006-09-27 12:07:07 rich Exp $ + +CC := gcc +CFLAGS := -g -fPIC -Wall -Werror + +OCAMLCFLAGS := -g +OCAMLCPACKAGES := +OCAMLCLIBS := + +OCAMLOPTFLAGS := +OCAMLOPTPACKAGES := $(OCAMLCPACKAGES) +OCAMLOPTLIBS := + +OCAMLDOCFLAGS := -html -stars -sort $(OCAMLCPACKAGES) + +all: ancient.cma ancient.cmxa test_ancient.opt META + +ancient.cma: ancient.cmo ancient_c.o + ocamlmklib -o ancient $^ + +ancient.cmxa: ancient.cmx ancient_c.o + ocamlmklib -o ancient $^ + +test_ancient.opt: ancient.cmxa test_ancient.cmx + LIBRARY_PATH=.:$$LIBRARY_PATH \ + ocamlfind ocamlopt $(OCAMLOPTFLAGS) $(OCAMLOPTLIBS) -o $@ $^ + +# Common rules for building OCaml objects. + +.mli.cmi: + ocamlfind ocamlc $(OCAMLCFLAGS) $(OCAMLCINCS) $(OCAMLCPACKAGES) -c $< +.ml.cmo: + ocamlfind ocamlc $(OCAMLCFLAGS) $(OCAMLCINCS) $(OCAMLCPACKAGES) -c $< +.ml.cmx: + ocamlfind ocamlopt $(OCAMLOPTFLAGS) $(OCAMLOPTINCS) $(OCAMLOPTPACKAGES) -c $< + +# Findlib META file. + +META: META.in Makefile.config + $(SED) -e 's/@PACKAGE@/$(PACKAGE)/' \ + -e 's/@VERSION@/$(VERSION)/' \ + < $< > $@ + +# Clean. + +clean: + rm -f *.cmi *.cmo *.cmx *.cma *.cmxa *.o *.a *.so *~ core META *.opt + +# Dependencies. + +depend: .depend + +.depend: $(wildcard *.mli) $(wildcard *.ml) + rm -f .depend + ocamldep $^ > $@ + +ifeq ($(wildcard .depend),.depend) +include .depend +endif + +# Install. + +install: + rm -rf $(DESTDIR)$(OCAMLLIBDIR)/ancient + install -c -m 0755 -d $(DESTDIR)$(OCAMLLIBDIR)/weblogs + install -c -m 0644 *.cmi *.mli *.cma *.cmxa *.a META \ + $(DESTDIR)$(OCAMLLIBDIR)/ancient + +# Distribution. + +dist: + $(MAKE) check-manifest + rm -rf $(PACKAGE)-$(VERSION) + mkdir $(PACKAGE)-$(VERSION) + tar -cf - -T MANIFEST | tar -C $(PACKAGE)-$(VERSION) -xf - + tar zcf $(PACKAGE)-$(VERSION).tar.gz $(PACKAGE)-$(VERSION) + rm -rf $(PACKAGE)-$(VERSION) + ls -l $(PACKAGE)-$(VERSION).tar.gz + +check-manifest: + @for d in `find -type d -name CVS | grep -v '^\./debian/'`; \ + do \ + b=`dirname $$d`/; \ + awk -F/ '$$1 != "D" {print $$2}' $$d/Entries | \ + sed -e "s|^|$$b|" -e "s|^\./||"; \ + done | sort > .check-manifest; \ + sort MANIFEST > .orig-manifest; \ + diff -u .orig-manifest .check-manifest; rv=$$?; \ + rm -f .orig-manifest .check-manifest; \ + exit $$rv + +# Debian packages. + +dpkg: + @if [ 0 != `cvs -q update | wc -l` ]; then \ + echo Please commit all changes to CVS first.; \ + exit 1; \ + fi + $(MAKE) dist + rm -rf /tmp/dbuild + mkdir /tmp/dbuild + cp $(PACKAGE)-$(VERSION).tar.gz \ + /tmp/dbuild/$(PACKAGE)_$(VERSION).orig.tar.gz + export CVSROOT=`cat CVS/Root`; \ + cd /tmp/dbuild && \ + cvs export \ + -d $(PACKAGE)-$(VERSION) \ + -D now merjis/freeware/ancient + cd /tmp/dbuild/$(PACKAGE)-$(VERSION) && dpkg-buildpackage -rfakeroot + rm -rf /tmp/dbuild/$(PACKAGE)-$(VERSION) + ls -l /tmp/dbuild + +# Developer documentation (in html/ subdirectory). + +doc: + rm -rf html + mkdir html + -ocamlfind ocamldoc $(OCAMLDOCFLAGS) -d html ancient.ml{i,} + +.PHONY: depend dist check-manifest dpkg doc + +.SUFFIXES: .cmo .cmi .cmx .ml .mli diff --git a/ancient.ml b/ancient.ml new file mode 100644 index 0000000..09e3ee2 --- /dev/null +++ b/ancient.ml @@ -0,0 +1,11 @@ +(* Mark objects as 'ancient' so they are taken out of the OCaml heap. + * $Id: ancient.ml,v 1.1 2006-09-27 12:07:07 rich Exp $ + *) + +type 'a ancient + +external mark : 'a -> 'a ancient = "ancient_mark" + +external follow : 'a ancient -> 'a = "ancient_follow" + +external delete : 'a ancient -> unit = "ancient_delete" diff --git a/ancient.mli b/ancient.mli new file mode 100644 index 0000000..9f574db --- /dev/null +++ b/ancient.mli @@ -0,0 +1,31 @@ +(** Mark objects as 'ancient' so they are taken out of the OCaml heap. + * $Id: ancient.mli,v 1.1 2006-09-27 12:07:07 rich Exp $ + *) + +type 'a ancient + +val mark : 'a -> 'a ancient + (** [mark obj] copies [obj] and all objects referenced + * by [obj] out of the OCaml heap. It returns the proxy + * for [obj]. + * + * The copy of [obj] accessed through the proxy MUST NOT be mutated. + * + * If [obj] represents a large object, then it is a good + * idea to call {!Gc.compact} after marking to recover the + * OCaml heap memory. + *) + +val follow : 'a ancient -> 'a + (** Follow proxy link to out of heap object. + * + * @raise [Invalid_argument "deleted"] if the object has been deleted. + *) + +val delete : 'a ancient -> unit + (** [delete obj] deletes ancient object [obj]. + * + * @raise [Invalid_argument "deleted"] if the object has been deleted. + * + * Forgetting to delete an ancient object results in a memory leak. + *) diff --git a/ancient_c.c b/ancient_c.c new file mode 100644 index 0000000..8a73c0c --- /dev/null +++ b/ancient_c.c @@ -0,0 +1,296 @@ +/* Mark objects as 'ancient' so they are taken out of the OCaml heap. + * $Id: ancient_c.c,v 1.1 2006-09-27 12:07:07 rich Exp $ + */ + +#include +#include + +#include +#include +#include +#include +#include + +// From byterun/misc.h: +typedef char * addr; + +// From byterun/major_gc.h: +#ifdef __alpha +typedef int page_table_entry; +#else +typedef char page_table_entry; +#endif +CAMLextern char *caml_heap_start; +CAMLextern char *caml_heap_end; +CAMLextern page_table_entry *caml_page_table; +extern asize_t caml_page_low, caml_page_high; + +#define In_heap 1 +#define Not_in_heap 0 +#define Page(p) ((uintnat) (p) >> Page_log) +#define Is_in_heap(p) \ + (assert (Is_block ((value) (p))), \ + (addr)(p) >= (addr)caml_heap_start && (addr)(p) < (addr)caml_heap_end \ + && caml_page_table [Page (p)]) + +// Area is an expandable buffer, allocated on the C heap. +typedef struct area { + void *ptr; // Start of area. + size_t n; // Current position. + size_t size; // Allocated size. +} area; + +static inline void +area_init (area *a) +{ + a->ptr = 0; + a->n = + a->size = 0; +} + +static inline int +area_append (area *a, const void *obj, size_t size) +{ + while (a->n + size > a->size) { + if (a->size == 0) a->size = 256; else a->size <<= 1; + a->ptr = realloc (a->ptr, a->size); + if (a->ptr == 0) return -1; // Out of memory. + } + memcpy (a->ptr + a->n, obj, size); + a->n += size; + return 0; +} + +static inline void +area_shrink (area *a) +{ + if (a->n != a->size) { + a->size = a->n; + a->ptr = realloc (a->ptr, a->size); + assert (a->ptr); // Getting smaller, so shouldn't really fail. + } +} + +static inline void +area_free (area *a) +{ + free (a->ptr); + a->n = + a->size = 0; +} + +struct restore_item { + char *header; + value field_zero; +}; + +// When a block is visited, we overwrite the header with all 1's. +// This is not quite an impossible value - one could imagine an +// enormous custom block where the header could take on this +// value. (XXX) +static header_t visited = (unsigned long) -1; + +// The general plan here: +// +// 1. Starting at [obj], copy it to our out-of-heap memory area +// defined by [ptr]. +// 2. Recursively visit subnodes of [obj] and do the same. +// 3. As we copy each object, we avoid circularity by setting that +// object's header to a special 'visited' value. However since these +// are objects in the Caml heap we have to restore the original +// headers at the end, which is the purpose of the [restore] area. +// +// XXX Recursive function will probably fall over once we apply it to +// large, deeply recursive structures. Should be replaced with something +// iterative. +static size_t +mark (value obj, area *ptr, area *restore) +{ + char *header = Hp_val (obj); + assert (Wosize_hp (header) > 0); // Always true? (XXX) + + // We can't handle out-of-heap objects. + // XXX Since someone might try to mark an ancient object, they + // might get this error, so we should try to do better here. + assert (Is_in_heap (obj)); + + // If we've already visited this object, just return its offset + // in the out-of-heap memory. + if (memcmp (header, &visited, sizeof visited) == 0) + return (Long_val (Field (obj, 0))); + + // Offset where we will store this object in the out-of-heap memory. + size_t offset = ptr->n; + + // Copy the object out of the OCaml heap. + size_t bytes = Bhsize_hp (header); + if (area_append (ptr, header, bytes) == -1) + return -1; // Error out of memory. + + // Scan the fields looking for pointers to blocks. + int can_scan = Tag_val (obj) < No_scan_tag; + if (can_scan) { + mlsize_t nr_words = Wosize_hp (header); + mlsize_t i; + + for (i = 0; i < nr_words; ++i) { + value field = Field (obj, i); + + if (Is_block (field)) { + size_t field_offset = mark (field, ptr, restore); + if (field_offset == -1) return -1; // Propagate out of memory errors. + + // Since the recursive call to mark above can reallocate the + // area, we need to recompute these each time round the loop. + char *obj_copy_header = ptr->ptr + offset; + value obj_copy = Val_hp (obj_copy_header); + + // Don't store absolute pointers yet because realloc will + // move the memory around. Store a fake pointer instead. + // We'll fix up these fake pointers afterwards. + Field (obj_copy, i) = (field_offset + sizeof (header_t)) << 2; + } + } + } + + // Mark this object as having been "visited", but keep track of + // what was there before so it can be restored. We also need to + // record the offset. + // Observations: + // (1) What was in the header before is kept in the out-of-heap + // copy, so we don't explicitly need to remember that. + // (2) We can keep the offset in the zeroth field, but since + // the code above might have modified the copy, we need to remember + // what was in that field before. + // (3) We can overwrite the header with all 1's to indicate that + // we've visited (but see notes on 'static header_t visited' above). + // (4) All objects in OCaml are at least one word long (we hope!). + struct restore_item restore_item; + restore_item.header = header; + restore_item.field_zero = Field (obj, 0); + area_append (restore, &restore_item, sizeof restore_item); + + memcpy (header, (void *)&visited, sizeof visited); + Field (obj, 0) = Val_long (offset); + + return offset; +} + +// See comments immediately above. +static void +do_restore (area *ptr, area *restore) +{ + mlsize_t i; + for (i = 0; i < restore->n; i += sizeof (struct restore_item)) + { + struct restore_item *restore_item = + (struct restore_item *)(restore->ptr + i); + assert (memcmp (restore_item->header, &visited, sizeof visited) == 0); + + value obj = Val_hp (restore_item->header); + size_t offset = Long_val (Field (obj, 0)); + + char *obj_copy_header = ptr->ptr + offset; + //value obj_copy = Val_hp (obj_copy_header); + + // Restore the original header. + memcpy (restore_item->header, obj_copy_header, sizeof visited); + + // Restore the original zeroth field. + Field (obj, 0) = restore_item->field_zero; + } +} + +CAMLprim value +ancient_mark (value obj) +{ + CAMLparam1 (obj); + CAMLlocal1 (proxy); + + area ptr; // This will be the out of heap area. + area_init (&ptr); + area restore; // Headers to be fixed up after. + area_init (&restore); + + if (mark (obj, &ptr, &restore) == -1) { + // Ran out of memory. Recover and throw an exception. + do_restore (&ptr, &restore); + area_free (&restore); + area_free (&ptr); + caml_failwith ("out of memory"); + } + area_shrink (&ptr); + + // Restore Caml heap structures. + do_restore (&ptr, &restore); + area_free (&restore); + + // Update all fake pointers in the out of heap area to make them real + // pointers. + size_t i; + for (i = 0; i < ptr.n; ) + { + // Out of heap area is: header, fields, header, fields, ... + // The header of each object tells us how many fields it has. + char *header = ptr.ptr + i; + size_t bytes = Bhsize_hp (header); + value obj = Val_hp (header); + + int can_scan = Tag_val (obj) < No_scan_tag; + if (can_scan) { + mlsize_t nr_words = Wosize_hp (header); + mlsize_t j; + + for (j = 0; j < nr_words; ++j) { + value field = Field (obj, j); + + if (Is_block (field)) { + size_t field_offset = field >> 2; + void *field_ptr = ptr.ptr + field_offset; + Field (obj, j) = (value) field_ptr; + } + } + } + + i += bytes; // Skip to next object. + } + + // Replace obj with a proxy. + proxy = caml_alloc (1, Abstract_tag); + Field (proxy, 0) = (value) ptr.ptr; + + CAMLreturn (proxy); +} + +CAMLprim value +ancient_follow (value obj) +{ + CAMLparam1 (obj); + CAMLlocal1 (v); + + v = Field (obj, 0); + if (Is_long (v)) caml_invalid_argument ("deleted"); + v = Val_hp (v); // v points to the header; make it point to the object. + + CAMLreturn (v); +} + +CAMLprim value +ancient_delete (value obj) +{ + CAMLparam1 (obj); + CAMLlocal1 (v); + + v = Field (obj, 0); + if (Is_long (v)) caml_invalid_argument ("deleted"); + + // Otherwise v is a pointer to the out of heap malloc'd object. + assert (!Is_in_heap (v)); + free ((void *) v); + + // Replace the proxy (a pointer) with an int 0 so we know it's + // been deleted in future. + Field (obj, 0) = Val_long (0); + + CAMLreturn (Val_unit); +} diff --git a/test_ancient.ml b/test_ancient.ml new file mode 100644 index 0000000..7d68961 --- /dev/null +++ b/test_ancient.ml @@ -0,0 +1,100 @@ +(* Very basic tests of Ancient module. + * $Id: test_ancient.ml,v 1.1 2006-09-27 12:07:07 rich Exp $ + *) + +open Printf + +type item = { + name : string; + dob : string; + address : string; + phone : string option; + marital_status : marital_status; + id : int; +} +and marital_status = Single | Married | Divorced + +let gc_compact () = + eprintf "compacting ... %!"; + Gc.compact (); + let stat = Gc.stat () in + let live_words = stat.Gc.live_words in + eprintf "live words = %d (%d MB)\n%!" + live_words (live_words * 8 / 1024 / 1024) + +let random_string () = + let n = 1 + Random.int 20 in + let str = String.create n in + for i = 0 to n-1 do + let c = 97 + Random.int 26 in + let c = Char.chr c in + str.[i] <- c + done; + str + +let random_string_option () = + if Random.int 3 = 1 then None else Some (random_string ()) + +let random_marital_status () = + match Random.int 3 with + | 0 -> Single + | 1 -> Married + | _ -> Divorced + +let rec output_data chan data = + let n = Array.length data in + for i = 0 to n-1; do + output_item chan data.(i) + done + +and output_item chan item = + fprintf chan "id = %d\n%!" item.id; + fprintf chan "\tname = %s\n%!" item.name; + fprintf chan "\tdob = %s\n%!" item.dob; + fprintf chan "\taddress = %s\n%!" item.address; + fprintf chan "\tphone = %s\n%!" + (match item.phone with + | None -> "None" + | Some str -> "Some " ^ str); + fprintf chan "\tmarital_status = %s\n%!" + (string_of_marital_status item.marital_status) + +and string_of_marital_status status = + match status with + | Single -> "Single" + | Married -> "Married" + | Divorced -> "Divorced" + +let () = + eprintf "Before allocating data on OCaml heap ...\n"; + gc_compact (); + let data = + Array.init 100000 ( + fun id -> + { id = id; + name = random_string (); + dob = random_string (); + address = random_string (); + phone = random_string_option (); + marital_status = random_marital_status () } + ) in + eprintf "After allocating data on OCaml heap ...\n"; + gc_compact (); + + let chan = open_out "test_ancient.out1" in + output_data chan data; + close_out chan; + + let data = Ancient.mark data in + eprintf "After marking data as ancient ...\n"; + gc_compact (); + + let data = Ancient.follow data in + eprintf "Number of elements in array = %d\n" (Array.length data); + + let chan = open_out "test_ancient.out2" in + output_data chan data; + close_out chan; + + eprintf "After writing out ancient data ...\n"; + gc_compact () -- 1.8.3.1