X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=ancient_c.c;h=d5b11af7bcecb7316da087d55cec1b1116b1921a;hb=b082c2b2bd7990ed0cfaa07f18aae9597c5a6892;hp=8a73c0ce75592ec6b5725a2678f6c744e8720124;hpb=f5645e2de53ff5935195cdb086d46feda4eff705;p=ocaml-ancient.git diff --git a/ancient_c.c b/ancient_c.c index 8a73c0c..d5b11af 100644 --- a/ancient_c.c +++ b/ancient_c.c @@ -1,5 +1,4 @@ /* Mark objects as 'ancient' so they are taken out of the OCaml heap. - * $Id: ancient_c.c,v 1.1 2006-09-27 12:07:07 rich Exp $ */ #include @@ -11,9 +10,32 @@ #include #include +#include "mmalloc/mmalloc.h" + +// uintnat, intnat only appeared in Caml 3.09.x. +#if OCAML_VERSION_MAJOR == 3 && OCAML_VERSION_MINOR < 9 +typedef unsigned long uintnat; +typedef long intnat; +#endif + +/* We need the macro 'Is_in_young_or_heap' which tell us if a block + * address is within the OCaml minor or major heaps. This comes out + * of the guts of OCaml. + */ + +#if OCAML_VERSION_MAJOR == 3 && OCAML_VERSION_MINOR <= 10 +// Up to OCaml 3.10 there was a single contiguous page table. + // From byterun/misc.h: typedef char * addr; +// From byterun/minor_gc.h: +CAMLextern char *caml_young_start; +CAMLextern char *caml_young_end; +#define Is_young(val) \ + (assert (Is_block (val)), \ + (addr)(val) < (addr)caml_young_end && (addr)(val) > (addr)caml_young_start) + // From byterun/major_gc.h: #ifdef __alpha typedef int page_table_entry; @@ -23,7 +45,6 @@ typedef char page_table_entry; CAMLextern char *caml_heap_start; CAMLextern char *caml_heap_end; CAMLextern page_table_entry *caml_page_table; -extern asize_t caml_page_low, caml_page_high; #define In_heap 1 #define Not_in_heap 0 @@ -33,11 +54,57 @@ extern asize_t caml_page_low, caml_page_high; (addr)(p) >= (addr)caml_heap_start && (addr)(p) < (addr)caml_heap_end \ && caml_page_table [Page (p)]) +#define Is_in_heap_or_young(p) (Is_young (p) || Is_in_heap (p)) + +#else /* OCaml >= 3.11 */ + +// GC was rewritten in OCaml 3.11 so there is no longer a +// single contiguous page table. + +// From byterun/memory.h: +#define Not_in_heap 0 +#define In_heap 1 +#define In_young 2 +#define In_static_data 4 +#define In_code_area 8 + +#ifdef ARCH_SIXTYFOUR + +/* 64 bits: Represent page table as a sparse hash table */ +int caml_page_table_lookup(void * addr); +#define Classify_addr(a) (caml_page_table_lookup((void *)(a))) + +#else + +/* 32 bits: Represent page table as a 2-level array */ +#define Pagetable2_log 11 +#define Pagetable2_size (1 << Pagetable2_log) +#define Pagetable1_log (Page_log + Pagetable2_log) +#define Pagetable1_size (1 << (32 - Pagetable1_log)) +CAMLextern unsigned char * caml_page_table[Pagetable1_size]; + +#define Pagetable_index1(a) (((uintnat)(a)) >> Pagetable1_log) +#define Pagetable_index2(a) \ + ((((uintnat)(a)) >> Page_log) & (Pagetable2_size - 1)) +#define Classify_addr(a) \ + caml_page_table[Pagetable_index1(a)][Pagetable_index2(a)] + +#endif + +#define Is_in_heap_or_young(a) (Classify_addr(a) & (In_heap | In_young)) + +#endif /* OCaml >= 3.11 */ + // Area is an expandable buffer, allocated on the C heap. typedef struct area { void *ptr; // Start of area. size_t n; // Current position. size_t size; // Allocated size. + + // If this area requires custom realloc function, these will be non-null. + void *(*realloc)(void *data, void *ptr, size_t size); + void (*free)(void *data, void *ptr); + void *data; } area; static inline void @@ -46,6 +113,21 @@ area_init (area *a) a->ptr = 0; a->n = a->size = 0; + a->realloc = 0; + a->free = 0; + a->data = 0; +} + +static inline void +area_init_custom (area *a, + void *(*realloc)(void *data, void *ptr, size_t size), + void (*free)(void *data, void *ptr), + void *data) +{ + area_init (a); + a->realloc = realloc; + a->free = free; + a->data = data; } static inline int @@ -53,7 +135,10 @@ area_append (area *a, const void *obj, size_t size) { while (a->n + size > a->size) { if (a->size == 0) a->size = 256; else a->size <<= 1; - a->ptr = realloc (a->ptr, a->size); + a->ptr = + a->realloc + ? a->realloc (a->data, a->ptr, a->size) + : realloc (a->ptr, a->size); if (a->ptr == 0) return -1; // Out of memory. } memcpy (a->ptr + a->n, obj, size); @@ -66,7 +151,10 @@ area_shrink (area *a) { if (a->n != a->size) { a->size = a->n; - a->ptr = realloc (a->ptr, a->size); + a->ptr = + a->realloc + ? a->realloc (a->data, a->ptr, a->size) + : realloc (a->ptr, a->size); assert (a->ptr); // Getting smaller, so shouldn't really fail. } } @@ -74,7 +162,8 @@ area_shrink (area *a) static inline void area_free (area *a) { - free (a->ptr); + if (a->free) a->free (a->data, a->ptr); + else free (a->ptr); a->n = a->size = 0; } @@ -99,26 +188,33 @@ static header_t visited = (unsigned long) -1; // object's header to a special 'visited' value. However since these // are objects in the Caml heap we have to restore the original // headers at the end, which is the purpose of the [restore] area. +// 4. We use realloc to allocate the memory for the copy, but because +// the memory can move around, we cannot store absolute pointers. +// Instead we store offsets and fix them up later. This is the +// purpose of the [fixups] area. // -// XXX Recursive function will probably fall over once we apply it to -// large, deeply recursive structures. Should be replaced with something -// iterative. +// XXX Large, deeply recursive structures cause a stack overflow. +// Temporary solution: 'ulimit -s unlimited'. This function should +// be replaced with something iterative. static size_t -mark (value obj, area *ptr, area *restore) +_mark (value obj, area *ptr, area *restore, area *fixups) { - char *header = Hp_val (obj); - assert (Wosize_hp (header) > 0); // Always true? (XXX) + // XXX This assertion might fail if someone tries to mark an object + // which is already ancient. + assert (Is_in_heap_or_young (obj)); - // We can't handle out-of-heap objects. - // XXX Since someone might try to mark an ancient object, they - // might get this error, so we should try to do better here. - assert (Is_in_heap (obj)); + char *header = Hp_val (obj); // If we've already visited this object, just return its offset // in the out-of-heap memory. if (memcmp (header, &visited, sizeof visited) == 0) return (Long_val (Field (obj, 0))); + // XXX Actually this fails if you try to persist a zero-length + // array. Needs to be fixed, but it breaks some rather important + // functions below. + assert (Wosize_hp (header) > 0); + // Offset where we will store this object in the out-of-heap memory. size_t offset = ptr->n; @@ -136,8 +232,9 @@ mark (value obj, area *ptr, area *restore) for (i = 0; i < nr_words; ++i) { value field = Field (obj, i); - if (Is_block (field)) { - size_t field_offset = mark (field, ptr, restore); + if (Is_block (field) && + Is_in_heap_or_young (field)) { + size_t field_offset = _mark (field, ptr, restore, fixups); if (field_offset == -1) return -1; // Propagate out of memory errors. // Since the recursive call to mark above can reallocate the @@ -147,8 +244,11 @@ mark (value obj, area *ptr, area *restore) // Don't store absolute pointers yet because realloc will // move the memory around. Store a fake pointer instead. - // We'll fix up these fake pointers afterwards. - Field (obj_copy, i) = (field_offset + sizeof (header_t)) << 2; + // We'll fix up these fake pointers afterwards in do_fixups. + Field (obj_copy, i) = field_offset + sizeof (header_t); + + size_t fixup = (void *)&Field(obj_copy, i) - ptr->ptr; + area_append (fixups, &fixup, sizeof fixup); } } } @@ -164,7 +264,8 @@ mark (value obj, area *ptr, area *restore) // what was in that field before. // (3) We can overwrite the header with all 1's to indicate that // we've visited (but see notes on 'static header_t visited' above). - // (4) All objects in OCaml are at least one word long (we hope!). + // (4) All objects in OCaml are at least one word long (XXX - actually + // this is not true). struct restore_item restore_item; restore_item.header = header; restore_item.field_zero = Field (obj, 0); @@ -201,19 +302,38 @@ do_restore (area *ptr, area *restore) } } -CAMLprim value -ancient_mark (value obj) +// Fixup fake pointers. +static void +do_fixups (area *ptr, area *fixups) { - CAMLparam1 (obj); - CAMLlocal1 (proxy); + long i; + + for (i = 0; i < fixups->n; i += sizeof (size_t)) + { + size_t fixup = *(size_t *)(fixups->ptr + i); + size_t offset = *(size_t *)(ptr->ptr + fixup); + void *real_ptr = ptr->ptr + offset; + *(value *)(ptr->ptr + fixup) = (value) real_ptr; + } +} +static void * +mark (value obj, + void *(*realloc)(void *data, void *ptr, size_t size), + void (*free)(void *data, void *ptr), + void *data, + size_t *r_size) +{ area ptr; // This will be the out of heap area. - area_init (&ptr); + area_init_custom (&ptr, realloc, free, data); area restore; // Headers to be fixed up after. area_init (&restore); + area fixups; // List of fake pointers to be fixed up. + area_init (&fixups); - if (mark (obj, &ptr, &restore) == -1) { + if (_mark (obj, &ptr, &restore, &fixups) == -1) { // Ran out of memory. Recover and throw an exception. + area_free (&fixups); do_restore (&ptr, &restore); area_free (&restore); area_free (&ptr); @@ -227,39 +347,47 @@ ancient_mark (value obj) // Update all fake pointers in the out of heap area to make them real // pointers. - size_t i; - for (i = 0; i < ptr.n; ) - { - // Out of heap area is: header, fields, header, fields, ... - // The header of each object tells us how many fields it has. - char *header = ptr.ptr + i; - size_t bytes = Bhsize_hp (header); - value obj = Val_hp (header); - - int can_scan = Tag_val (obj) < No_scan_tag; - if (can_scan) { - mlsize_t nr_words = Wosize_hp (header); - mlsize_t j; - - for (j = 0; j < nr_words; ++j) { - value field = Field (obj, j); - - if (Is_block (field)) { - size_t field_offset = field >> 2; - void *field_ptr = ptr.ptr + field_offset; - Field (obj, j) = (value) field_ptr; - } - } - } + do_fixups (&ptr, &fixups); + area_free (&fixups); - i += bytes; // Skip to next object. - } + if (r_size) *r_size = ptr.size; + return ptr.ptr; +} - // Replace obj with a proxy. +static void * +my_realloc (void *data __attribute__((unused)), void *ptr, size_t size) +{ + return realloc (ptr, size); +} + +static void +my_free (void *data __attribute__((unused)), void *ptr) +{ + return free (ptr); +} + +CAMLprim value +ancient_mark_info (value obj) +{ + CAMLparam1 (obj); + CAMLlocal3 (proxy, info, rv); + + size_t size; + void *ptr = mark (obj, my_realloc, my_free, 0, &size); + + // Make the proxy. proxy = caml_alloc (1, Abstract_tag); - Field (proxy, 0) = (value) ptr.ptr; + Field (proxy, 0) = (value) ptr; - CAMLreturn (proxy); + // Make the info struct. + info = caml_alloc (1, 0); + Field (info, 0) = Val_long (size); + + rv = caml_alloc (2, 0); + Field (rv, 0) = proxy; + Field (rv, 1) = info; + + CAMLreturn (rv); } CAMLprim value @@ -285,7 +413,7 @@ ancient_delete (value obj) if (Is_long (v)) caml_invalid_argument ("deleted"); // Otherwise v is a pointer to the out of heap malloc'd object. - assert (!Is_in_heap (v)); + assert (!Is_in_heap_or_young (v)); free ((void *) v); // Replace the proxy (a pointer) with an int 0 so we know it's @@ -294,3 +422,146 @@ ancient_delete (value obj) CAMLreturn (Val_unit); } + +CAMLprim value +ancient_is_ancient (value obj) +{ + CAMLparam1 (obj); + CAMLlocal1 (v); + + v = Is_in_heap_or_young (obj) ? Val_false : Val_true; + + CAMLreturn (v); +} + +CAMLprim value +ancient_address_of (value obj) +{ + CAMLparam1 (obj); + CAMLlocal1 (v); + + if (Is_block (obj)) v = caml_copy_nativeint ((intnat) obj); + else v = caml_copy_nativeint (0); + + CAMLreturn (v); +} + +CAMLprim value +ancient_attach (value fdv, value baseaddrv) +{ + CAMLparam2 (fdv, baseaddrv); + CAMLlocal1 (mdv); + + int fd = Int_val (fdv); + void *baseaddr = (void *) Nativeint_val (baseaddrv); + void *md = mmalloc_attach (fd, baseaddr); + if (md == 0) { + perror ("mmalloc_attach"); + caml_failwith ("mmalloc_attach"); + } + + mdv = caml_alloc (1, Abstract_tag); + Field (mdv, 0) = (value) md; + + CAMLreturn (mdv); +} + +CAMLprim value +ancient_detach (value mdv) +{ + CAMLparam1 (mdv); + + void *md = (void *) Field (mdv, 0); + + if (mmalloc_detach (md) != 0) { + perror ("mmalloc_detach"); + caml_failwith ("mmalloc_detach"); + } + + CAMLreturn (Val_unit); +} + +struct keytable { + void **keys; + int allocated; +}; + +CAMLprim value +ancient_share_info (value mdv, value keyv, value obj) +{ + CAMLparam3 (mdv, keyv, obj); + CAMLlocal3 (proxy, info, rv); + + void *md = (void *) Field (mdv, 0); + int key = Int_val (keyv); + + // Get the key table. + struct keytable *keytable = mmalloc_getkey (md, 0); + if (keytable == 0) { + keytable = mmalloc (md, sizeof (struct keytable)); + if (keytable == 0) caml_failwith ("out of memory"); + keytable->keys = 0; + keytable->allocated = 0; + mmalloc_setkey (md, 0, keytable); + } + + // Existing key exists? Free it. + if (key < keytable->allocated && keytable->keys[key] != 0) { + mfree (md, keytable->keys[key]); + keytable->keys[key] = 0; + } + + // Keytable large enough? If not, realloc it. + if (key >= keytable->allocated) { + int allocated = keytable->allocated == 0 ? 32 : keytable->allocated * 2; + void **keys = mrealloc (md, keytable->keys, allocated * sizeof (void *)); + if (keys == 0) caml_failwith ("out of memory"); + int i; + for (i = keytable->allocated; i < allocated; ++i) keys[i] = 0; + keytable->keys = keys; + keytable->allocated = allocated; + } + + // Do the mark. + size_t size; + void *ptr = mark (obj, mrealloc, mfree, md, &size); + + // Add the key to the keytable. + keytable->keys[key] = ptr; + + // Make the proxy. + proxy = caml_alloc (1, Abstract_tag); + Field (proxy, 0) = (value) ptr; + + // Make the info struct. + info = caml_alloc (1, 0); + Field (info, 0) = Val_long (size); + + rv = caml_alloc (2, 0); + Field (rv, 0) = proxy; + Field (rv, 1) = info; + + CAMLreturn (rv); +} + +CAMLprim value +ancient_get (value mdv, value keyv) +{ + CAMLparam2 (mdv, keyv); + CAMLlocal1 (proxy); + + void *md = (void *) Field (mdv, 0); + int key = Int_val (keyv); + + // Key exists? + struct keytable *keytable = mmalloc_getkey (md, 0); + if (keytable == 0 || key >= keytable->allocated || keytable->keys[key] == 0) + caml_raise_not_found (); + void *ptr = keytable->keys[key]; + + // Return the proxy. + proxy = caml_alloc (1, Abstract_tag); + Field (proxy, 0) = (value) ptr; + + CAMLreturn (proxy); +}