From 2f2d5af5cf03640650c8b49933c36665fdf52d61 Mon Sep 17 00:00:00 2001 From: rich Date: Mon, 9 Oct 2006 12:18:05 +0000 Subject: [PATCH] Added: Ancient.is_ancient Ancient.max_key for use by Weblogs. --- MANIFEST | 7 ++++--- README.txt | 46 +++++++++++++++++++++++++++++++++++++++++++++- ancient.ml | 6 +++++- ancient.mli | 13 ++++++++++--- ancient_c.c | 23 +++++++++++++++++++---- 5 files changed, 83 insertions(+), 12 deletions(-) diff --git a/MANIFEST b/MANIFEST index a5bbdbe..d5b0530 100644 --- a/MANIFEST +++ b/MANIFEST @@ -35,6 +35,7 @@ mmalloc/mrealloc.c mmalloc/mvalloc.c mmalloc/sbrk-sup.c mmalloc/TODO -test_ancient.ml -test_ancient_shared.ml -test_ancient_weblogs.ml +README.txt +test_ancient_dict_read.ml +test_ancient_dict_verify.ml +test_ancient_dict_write.ml diff --git a/README.txt b/README.txt index d0abe58..67bf422 100644 --- a/README.txt +++ b/README.txt @@ -1,6 +1,6 @@ 'Ancient' module for OCaml ---------------------------------------------------------------------- -$Id: README.txt,v 1.1 2006-10-06 15:03:47 rich Exp $ +$Id: README.txt,v 1.2 2006-10-09 12:18:05 rich Exp $ What does this module do? ---------------------------------------------------------------------- @@ -192,6 +192,50 @@ which avoids loading unused fields at all. In some circumstances we have shown that this could make a huge difference to performance, but we are not sure how to implement this cleanly in the current library. +(7) [Advanced topic] Certain techniques such as Address Space +Randomisation (http://lwn.net/Articles/121845/) are probably not +compatible with the Ancient module and shared files. Because the +ancient data structures contain real pointers, these pointers would be +invalidated if the shared file was not mapped in at precisely the same +base address in all processes which are sharing the file. + +One solution might be to use private mappings and a list of fixups. +In fact, the code actually builds a list of fixups currently while +marking, because it needs to deal with precisely this issue (during +marking, memory is allocated with realloc which might move the memory +segment, thus real pointers cannot be stored while marking, but need +to be fixed up afterwards). The list of fixups would need to be +stored alongside the memory segment (currently it is discarded after +marking), and the file would need to be mapped in using MAP_PRIVATE +(see below). + +A possible problem with this is that because OCaml objects tend to be +small and contain a lot of pointers, it is likely that fixing up the +pointers would result in every page in the memory segment becoming +dirty, which would basically cancel out any benefit of using shared +mappings in the first place. However it is likely that some users of +this module have large amounts of opaque data and few pointers, and +for them this would be worthwhile. + +(8) Currently mmalloc is implemented so that the file is mapped in +PROT_READ|PROT_WRITE and MAP_SHARED. Ancient data structures are +supposed to be immutable so strictly speaking write access shouldn't +be required. It may be worthwhile modifying mmalloc to allow +read-only mappings, and private mappings. + +(9) The library assumes that every OCaml object is at least one word +long. This seemed like a good assumption up until I found that +zero-length arrays are valid zero word objects. At the moment you +cannot mark structures which contain zero-length arrays -- you will +get an assert-failure in the _mark function. + +Possibly there are other types of OCaml structure which are zero word +objects and also cannot be marked. I'm not sure what these will be: +for example empty strings are stored as one word OCaml objects, so +they are OK. + +The solution to this bug is non-trivial. + Authors ---------------------------------------------------------------------- diff --git a/ancient.ml b/ancient.ml index dfce30c..f8b7088 100644 --- a/ancient.ml +++ b/ancient.ml @@ -1,5 +1,5 @@ (* Mark objects as 'ancient' so they are taken out of the OCaml heap. - * $Id: ancient.ml,v 1.4 2006-09-28 12:40:07 rich Exp $ + * $Id: ancient.ml,v 1.5 2006-10-09 12:18:05 rich Exp $ *) type 'a ancient @@ -10,6 +10,8 @@ external follow : 'a ancient -> 'a = "ancient_follow" external delete : 'a ancient -> unit = "ancient_delete" +external is_ancient : 'a -> bool = "ancient_is_ancient" + type md external attach : Unix.file_descr -> nativeint -> md = "ancient_attach" @@ -19,3 +21,5 @@ external detach : md -> unit = "ancient_detach" external share : md -> int -> 'a -> 'a ancient = "ancient_share" external get : md -> int -> 'a ancient = "ancient_get" + +let max_key = 1023 (* MMALLOC_KEYS-1. See mmprivate.h *) diff --git a/ancient.mli b/ancient.mli index 6bf689f..f77a4b4 100644 --- a/ancient.mli +++ b/ancient.mli @@ -1,5 +1,5 @@ (** Mark objects as 'ancient' so they are taken out of the OCaml heap. - * $Id: ancient.mli,v 1.5 2006-10-06 15:03:47 rich Exp $ + * $Id: ancient.mli,v 1.6 2006-10-09 12:18:05 rich Exp $ *) type 'a ancient @@ -30,6 +30,11 @@ val delete : 'a ancient -> unit * Forgetting to delete an ancient object results in a memory leak. *) +val is_ancient : 'a -> bool + (** [is_ancient ptr] returns true if [ptr] is an object on the ancient + * heap. + *) + (** {6 Shared memory mappings} *) type md @@ -73,7 +78,7 @@ val share : md -> int -> 'a -> 'a ancient * file. See {!Ancient.attach}, {!Ancient.detach}. * * More than one object can be stored in a file. They are - * indexed using integers in the range [0..1023] (the limit + * indexed using integers in the range [0..max_key] (the limit * is hard-coded in [mmalloc/mmprivate.h]). The [key] parameter * controls which object is written/overwritten by [share]. * If you do not wish to use this feature, just pass [0] @@ -96,7 +101,7 @@ val get : md -> int -> 'a ancient (** [get md key] returns the object indexed by [key] in the * attached file. * - * The key is in the range [0..1023] (the limit is hard-coded in + * The key is in the range [0..max_key] (the limit is hard-coded in * [mmalloc/mmprivate.h]). If you do not wish to use this feature, * just pass [0] as the key when sharing / getting. * @@ -108,3 +113,5 @@ val get : md -> int -> 'a ancient * * @raises [Not_found] if no object is associated with the key. *) + +val max_key : int diff --git a/ancient_c.c b/ancient_c.c index 52886c9..95383be 100644 --- a/ancient_c.c +++ b/ancient_c.c @@ -1,5 +1,5 @@ /* Mark objects as 'ancient' so they are taken out of the OCaml heap. - * $Id: ancient_c.c,v 1.7 2006-10-06 12:25:20 rich Exp $ + * $Id: ancient_c.c,v 1.8 2006-10-09 12:18:05 rich Exp $ */ #include @@ -145,18 +145,22 @@ static header_t visited = (unsigned long) -1; static size_t _mark (value obj, area *ptr, area *restore, area *fixups) { - char *header = Hp_val (obj); - assert (Wosize_hp (header) > 0); // Always true? (XXX) - // XXX This assertion might fail if someone tries to mark an object // which is already ancient. assert (Is_young (obj) || Is_in_heap (obj)); + char *header = Hp_val (obj); + // If we've already visited this object, just return its offset // in the out-of-heap memory. if (memcmp (header, &visited, sizeof visited) == 0) return (Long_val (Field (obj, 0))); + // XXX Actually this fails if you try to persist a zero-length + // array. Needs to be fixed, but it breaks some rather important + // functions below. + assert (Wosize_hp (header) > 0); + // Offset where we will store this object in the out-of-heap memory. size_t offset = ptr->n; @@ -354,6 +358,17 @@ ancient_delete (value obj) } CAMLprim value +ancient_is_ancient (value obj) +{ + CAMLparam1 (obj); + CAMLlocal1 (v); + + v = Is_young (obj) || Is_in_heap (obj) ? Val_false : Val_true; + + CAMLreturn (v); +} + +CAMLprim value ancient_attach (value fdv, value baseaddrv) { CAMLparam2 (fdv, baseaddrv); -- 1.8.3.1