2 * By Richard W.M. Jones <rich@annexia.org>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the Free
16 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 * $Id: hash.c,v 1.4 2002/11/26 19:47:29 rich Exp $
52 #define HASH_NR_BUCKETS 32
54 /* This is the hashing function -- the same one as used by Perl. */
55 static inline unsigned
56 HASH (const void *key, size_t key_size, int nr_buckets)
59 const char *s = (const char *) key;
64 return h & (nr_buckets - 1);
67 /*----- HASHes -----*/
69 struct hash_bucket_entry
76 _hash_new (pool pool, size_t key_size, size_t value_size)
81 h = pmalloc (pool, sizeof *h);
83 h->key_size = key_size;
84 h->value_size = value_size;
85 h->buckets = new_vector (pool, sizeof (vector));
86 vector_fill (h->buckets, null, HASH_NR_BUCKETS);
91 /* NB. This only copies the hash, NOT the structures or whatever
92 * which might be pointed to by keys/values in the hash. Beware.
95 copy_hash (pool pool, hash h)
100 new_h = pmalloc (pool, sizeof *new_h);
102 new_h->key_size = h->key_size;
103 new_h->value_size = h->value_size;
104 new_h->buckets = copy_vector (pool, h->buckets);
106 /* Copy the buckets. */
107 for (b = 0; b < vector_size (new_h->buckets); ++b)
111 vector_get (new_h->buckets, b, v);
115 v = copy_vector (pool, v);
116 vector_replace (new_h->buckets, b, v);
118 /* Copy the keys/values in this vector. */
119 for (i = 0; i < vector_size (v); ++i)
121 struct hash_bucket_entry entry;
123 vector_get (v, i, entry);
124 entry.key = pmemdup (pool, entry.key, h->key_size);
125 entry.value = pmemdup (pool, entry.value, h->value_size);
126 vector_replace (v, i, entry);
135 _hash_get (hash h, const void *key, void *value)
139 ptr = _hash_get_ptr (h, key);
140 if (ptr == 0) return 0;
142 if (value) memcpy (value, ptr, h->value_size);
147 _hash_get_ptr (hash h, const void *key)
152 /* Find the appropriate bucket. */
153 b = HASH (key, h->key_size, vector_size (h->buckets));
154 vector_get (h->buckets, b, bucket);
159 /* Search this bucket linearly. */
160 for (i = 0; i < vector_size (bucket); ++i)
162 struct hash_bucket_entry entry;
164 vector_get (bucket, i, entry);
165 if (memcmp (entry.key, key, h->key_size) == 0)
173 _hash_insert (hash h, const void *key, const void *value)
177 struct hash_bucket_entry entry;
179 /* Find the appropriate bucket. */
180 b = HASH (key, h->key_size, vector_size (h->buckets));
181 vector_get (h->buckets, b, bucket);
183 /* If there is no bucket there, we have to allocate a fresh vector. */
186 bucket = new_vector (h->pool, struct hash_bucket_entry);
187 vector_replace (h->buckets, b, bucket);
190 /* Search this bucket linearly. */
191 for (i = 0; i < vector_size (bucket); ++i)
193 vector_get (bucket, i, entry);
194 if (memcmp (entry.key, key, h->key_size) == 0)
196 memcpy (entry.value, value, h->value_size);
197 /*entry.value = pmemdup (h->pool, value, h->value_size);*/
199 /* Replace this entry. */
200 vector_replace (bucket, i, entry);
206 /* Append to this bucket. */
207 entry.key = pmemdup (h->pool, key, h->key_size);
208 entry.value = pmemdup (h->pool, value, h->value_size);
210 vector_push_back (bucket, entry);
215 _hash_erase (hash h, const void *key)
219 struct hash_bucket_entry entry;
221 /* Find the appropriate bucket. */
222 b = HASH (key, h->key_size, vector_size (h->buckets));
223 vector_get (h->buckets, b, bucket);
225 if (bucket == 0) return 0;
227 /* Search this bucket linearly. */
228 for (i = 0; i < vector_size (bucket); ++i)
230 vector_get (bucket, i, entry);
231 if (memcmp (entry.key, key, h->key_size) == 0)
233 /* Remove this entry. */
234 vector_erase (bucket, i);
244 hash_keys_in_pool (hash h, pool p)
248 struct hash_bucket_entry entry;
250 keys = _vector_new (p, h->key_size);
252 for (i = 0; i < vector_size (h->buckets); ++i)
254 vector_get (h->buckets, i, bucket);
257 for (j = 0; j < vector_size (bucket); ++j)
259 vector_get (bucket, j, entry);
260 _vector_push_back (keys, entry.key);
270 return hash_keys_in_pool (h, h->pool);
274 hash_values_in_pool (hash h, pool p)
277 vector bucket, values;
278 struct hash_bucket_entry entry;
280 values = _vector_new (p, h->value_size);
282 for (i = 0; i < vector_size (h->buckets); ++i)
284 vector_get (h->buckets, i, bucket);
287 for (j = 0; j < vector_size (bucket); ++j)
289 vector_get (bucket, j, entry);
290 _vector_push_back (values, entry.value);
300 return hash_values_in_pool (h, h->pool);
309 for (i = 0; i < vector_size (h->buckets); ++i)
311 vector_get (h->buckets, i, bucket);
312 n += bucket ? vector_size (bucket) : 0;
319 hash_get_buckets_used (hash h)
324 for (i = 0; i < vector_size (h->buckets); ++i)
326 vector_get (h->buckets, i, bucket);
334 hash_get_buckets_allocated (hash h)
336 return vector_size (h->buckets);
340 hash_set_buckets_allocated (hash h, int new_size)
344 /* The user has been warned not to call this after elements have been
345 * inserted into the hash, and to make NEW_SIZE a power of 2.
347 if (vector_size (h->buckets) > new_size)
348 vector_erase_range (h->buckets, new_size, vector_size (h->buckets));
349 else if (vector_size (h->buckets) < new_size)
350 vector_fill (h->buckets, null, new_size - vector_size (h->buckets));
353 /*----- SASHes -----*/
355 struct sash_bucket_entry
368 h = pmalloc (pool, sizeof *h);
370 h->buckets = new_vector (pool, sizeof (vector));
371 vector_fill (h->buckets, null, HASH_NR_BUCKETS);
376 /* NB. Unlike copy_hash, this does copy the strings into the new
377 * pool. So a copy_sash is really a deep copy of the string hash and the
381 copy_sash (pool pool, sash h)
386 new_h = pmalloc (pool, sizeof *new_h);
388 new_h->buckets = copy_vector (pool, h->buckets);
390 /* Copy the buckets. */
391 for (b = 0; b < vector_size (new_h->buckets); ++b)
395 vector_get (new_h->buckets, b, v);
399 v = copy_vector (pool, v);
400 vector_replace (new_h->buckets, b, v);
402 /* Copy the string keys/values in this vector. */
403 for (i = 0; i < vector_size (v); ++i)
405 struct sash_bucket_entry entry;
407 vector_get (v, i, entry);
408 entry.key = pstrdup (pool, entry.key);
409 entry.value = pstrdup (pool, entry.value);
410 vector_replace (v, i, entry);
419 _sash_get (sash h, const char *key, const char **ptr)
424 /* Find the appropriate bucket. */
425 b = HASH (key, strlen (key), vector_size (h->buckets));
426 vector_get (h->buckets, b, bucket);
434 /* Search this bucket linearly. */
435 for (i = 0; i < vector_size (bucket); ++i)
437 struct sash_bucket_entry entry;
439 vector_get (bucket, i, entry);
440 if (strcmp (entry.key, key) == 0)
442 if (ptr) *ptr = entry.value;
452 sash_insert (sash h, const char *key, const char *value)
454 int b, i, len = strlen (value);
456 struct sash_bucket_entry entry;
458 /* Find the appropriate bucket. */
459 b = HASH (key, strlen (key), vector_size (h->buckets));
460 vector_get (h->buckets, b, bucket);
462 /* If there is no bucket there, we have to allocate a fresh vector. */
465 bucket = new_vector (h->pool, struct sash_bucket_entry);
466 vector_replace (h->buckets, b, bucket);
469 /* Search this bucket linearly. */
470 for (i = 0; i < vector_size (bucket); ++i)
472 vector_get (bucket, i, entry);
473 if (strcmp (entry.key, key) == 0)
475 /* To avoid unnecessarily allocating more memory, we try to
476 * be clever here. If the existing allocation is large enough
477 * to store the new string, use it. Otherwise reallocate it
480 if (len < entry.value_allocated)
481 memcpy (entry.value, value, len + 1);
484 entry.value = prealloc (h->pool, entry.value, len + 1);
485 memcpy (entry.value, value, len + 1);
486 entry.value_allocated = len + 1;
489 /* Replace this entry. */
490 vector_replace (bucket, i, entry);
496 /* Append to this bucket. */
497 entry.key = pstrdup (h->pool, key);
498 entry.value = pstrdup (h->pool, value);
499 entry.value_allocated = len + 1;
501 vector_push_back (bucket, entry);
506 sash_erase (sash h, const char *key)
510 struct sash_bucket_entry entry;
512 /* Find the appropriate bucket. */
513 b = HASH (key, strlen (key), vector_size (h->buckets));
514 vector_get (h->buckets, b, bucket);
516 if (bucket == 0) return 0;
518 /* Search this bucket linearly. */
519 for (i = 0; i < vector_size (bucket); ++i)
521 vector_get (bucket, i, entry);
522 if (strcmp (entry.key, key) == 0)
524 /* Remove this entry. */
525 vector_erase (bucket, i);
535 sash_keys_in_pool (sash h, pool p)
539 struct sash_bucket_entry entry;
541 keys = new_vector (p, char *);
543 for (i = 0; i < vector_size (h->buckets); ++i)
545 vector_get (h->buckets, i, bucket);
548 for (j = 0; j < vector_size (bucket); ++j)
552 vector_get (bucket, j, entry);
553 key = pstrdup (p, entry.key);
554 vector_push_back (keys, key);
564 return sash_keys_in_pool (h, h->pool);
568 sash_values_in_pool (sash h, pool p)
571 vector bucket, values;
572 struct sash_bucket_entry entry;
574 values = new_vector (p, char *);
576 for (i = 0; i < vector_size (h->buckets); ++i)
578 vector_get (h->buckets, i, bucket);
581 for (j = 0; j < vector_size (bucket); ++j)
585 vector_get (bucket, j, entry);
586 value = pstrdup (p, entry.value);
587 vector_push_back (values, value);
597 return sash_values_in_pool (h, h->pool);
606 for (i = 0; i < vector_size (h->buckets); ++i)
608 vector_get (h->buckets, i, bucket);
609 n += bucket ? vector_size (bucket) : 0;
616 sash_get_buckets_used (sash h)
621 for (i = 0; i < vector_size (h->buckets); ++i)
623 vector_get (h->buckets, i, bucket);
631 sash_get_buckets_allocated (sash h)
633 return vector_size (h->buckets);
637 sash_set_buckets_allocated (sash h, int new_size)
641 /* The user has been warned not to call this after elements have been
642 * inserted into the sash, and to make NEW_SIZE a power of 2.
644 if (vector_size (h->buckets) > new_size)
645 vector_erase_range (h->buckets, new_size, vector_size (h->buckets));
646 else if (vector_size (h->buckets) < new_size)
647 vector_fill (h->buckets, null, new_size - vector_size (h->buckets));
650 /*----- SHASHes -----*/
652 struct shash_bucket_entry
659 _shash_new (pool pool, size_t value_size)
664 h = pmalloc (pool, sizeof *h);
666 h->value_size = value_size;
667 h->buckets = new_vector (pool, sizeof (vector));
668 vector_fill (h->buckets, null, HASH_NR_BUCKETS);
673 /* NB. Unlike copy_hash, this does copy the strings into the new
674 * pool. So a copy_shash is really a deep copy of the shash and the
678 copy_shash (pool pool, shash h)
683 new_h = pmalloc (pool, sizeof *new_h);
685 new_h->value_size = h->value_size;
686 new_h->buckets = copy_vector (pool, h->buckets);
688 /* Copy the buckets. */
689 for (b = 0; b < vector_size (new_h->buckets); ++b)
693 vector_get (new_h->buckets, b, v);
697 v = copy_vector (pool, v);
698 vector_replace (new_h->buckets, b, v);
700 /* Copy the string keys in this vector. */
701 for (i = 0; i < vector_size (v); ++i)
703 struct shash_bucket_entry entry;
705 vector_get (v, i, entry);
706 entry.key = pstrdup (pool, entry.key);
707 entry.value = pmemdup (pool, entry.value, h->value_size);
708 vector_replace (v, i, entry);
717 _shash_get (shash h, const char *key, void *value)
721 ptr = _shash_get_ptr (h, key);
722 if (ptr == 0) return 0;
724 if (value) memcpy (value, ptr, h->value_size);
729 _shash_get_ptr (shash h, const char *key)
734 /* Find the appropriate bucket. */
735 b = HASH (key, strlen (key), vector_size (h->buckets));
736 vector_get (h->buckets, b, bucket);
741 /* Search this bucket linearly. */
742 for (i = 0; i < vector_size (bucket); ++i)
744 struct shash_bucket_entry entry;
746 vector_get (bucket, i, entry);
747 if (strcmp (entry.key, key) == 0)
755 _shash_insert (shash h, const char *key, const void *value)
759 struct shash_bucket_entry entry;
761 /* Find the appropriate bucket. */
762 b = HASH (key, strlen (key), vector_size (h->buckets));
763 vector_get (h->buckets, b, bucket);
765 /* If there is no bucket there, we have to allocate a fresh vector. */
768 bucket = new_vector (h->pool, struct shash_bucket_entry);
769 vector_replace (h->buckets, b, bucket);
772 /* Search this bucket linearly. */
773 for (i = 0; i < vector_size (bucket); ++i)
775 vector_get (bucket, i, entry);
776 if (strcmp (entry.key, key) == 0)
778 memcpy (entry.value, value, h->value_size);
779 /*entry.value = pmemdup (h->pool, value, h->value_size);*/
781 /* Replace this entry. */
782 vector_replace (bucket, i, entry);
788 /* Append to this bucket. */
789 entry.key = pstrdup (h->pool, key);
790 entry.value = pmemdup (h->pool, value, h->value_size);
792 vector_push_back (bucket, entry);
797 shash_erase (shash h, const char *key)
801 struct shash_bucket_entry entry;
803 /* Find the appropriate bucket. */
804 b = HASH (key, strlen (key), vector_size (h->buckets));
805 vector_get (h->buckets, b, bucket);
807 if (bucket == 0) return 0;
809 /* Search this bucket linearly. */
810 for (i = 0; i < vector_size (bucket); ++i)
812 vector_get (bucket, i, entry);
813 if (strcmp (entry.key, key) == 0)
815 /* Remove this entry. */
816 vector_erase (bucket, i);
826 shash_keys_in_pool (shash h, pool p)
830 struct shash_bucket_entry entry;
832 keys = new_vector (p, char *);
834 for (i = 0; i < vector_size (h->buckets); ++i)
836 vector_get (h->buckets, i, bucket);
839 for (j = 0; j < vector_size (bucket); ++j)
843 vector_get (bucket, j, entry);
844 key = pstrdup (p, entry.key);
845 vector_push_back (keys, key);
855 return shash_keys_in_pool (h, h->pool);
859 shash_values_in_pool (shash h, pool p)
862 vector bucket, values;
863 struct shash_bucket_entry entry;
865 values = _vector_new (p, h->value_size);
867 for (i = 0; i < vector_size (h->buckets); ++i)
869 vector_get (h->buckets, i, bucket);
872 for (j = 0; j < vector_size (bucket); ++j)
874 vector_get (bucket, j, entry);
875 _vector_push_back (values, entry.value);
883 shash_values (shash h)
885 return shash_values_in_pool (h, h->pool);
894 for (i = 0; i < vector_size (h->buckets); ++i)
896 vector_get (h->buckets, i, bucket);
897 n += bucket ? vector_size (bucket) : 0;
904 shash_get_buckets_used (shash h)
909 for (i = 0; i < vector_size (h->buckets); ++i)
911 vector_get (h->buckets, i, bucket);
919 shash_get_buckets_allocated (shash h)
921 return vector_size (h->buckets);
925 shash_set_buckets_allocated (shash h, int new_size)
929 /* The user has been warned not to call this after elements have been
930 * inserted into the shash, and to make NEW_SIZE a power of 2.
932 if (vector_size (h->buckets) > new_size)
933 vector_erase_range (h->buckets, new_size, vector_size (h->buckets));
934 else if (vector_size (h->buckets) < new_size)
935 vector_fill (h->buckets, null, new_size - vector_size (h->buckets));