/* A hash class. * By Richard W.M. Jones * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: hash.c,v 1.4 2002/11/26 19:47:29 rich Exp $ */ #include "config.h" #ifdef HAVE_STRING_H #include #endif #include #include #include struct hash { pool pool; size_t key_size; size_t value_size; vector buckets; }; struct sash { pool pool; vector buckets; }; struct shash { pool pool; size_t value_size; vector buckets; }; #define HASH_NR_BUCKETS 32 /* This is the hashing function -- the same one as used by Perl. */ static inline unsigned HASH (const void *key, size_t key_size, int nr_buckets) { unsigned h = 0; const char *s = (const char *) key; while (key_size--) h = h * 33 + *s++; return h & (nr_buckets - 1); } /*----- HASHes -----*/ struct hash_bucket_entry { void *key; void *value; }; hash _hash_new (pool pool, size_t key_size, size_t value_size) { hash h; vector null = 0; h = pmalloc (pool, sizeof *h); h->pool = pool; h->key_size = key_size; h->value_size = value_size; h->buckets = new_vector (pool, sizeof (vector)); vector_fill (h->buckets, null, HASH_NR_BUCKETS); return h; } /* NB. This only copies the hash, NOT the structures or whatever * which might be pointed to by keys/values in the hash. Beware. */ hash copy_hash (pool pool, hash h) { hash new_h; int b, i; new_h = pmalloc (pool, sizeof *new_h); new_h->pool = pool; new_h->key_size = h->key_size; new_h->value_size = h->value_size; new_h->buckets = copy_vector (pool, h->buckets); /* Copy the buckets. */ for (b = 0; b < vector_size (new_h->buckets); ++b) { vector v; vector_get (new_h->buckets, b, v); if (v) { v = copy_vector (pool, v); vector_replace (new_h->buckets, b, v); /* Copy the keys/values in this vector. */ for (i = 0; i < vector_size (v); ++i) { struct hash_bucket_entry entry; vector_get (v, i, entry); entry.key = pmemdup (pool, entry.key, h->key_size); entry.value = pmemdup (pool, entry.value, h->value_size); vector_replace (v, i, entry); } } } return new_h; } int _hash_get (hash h, const void *key, void *value) { const void *ptr; ptr = _hash_get_ptr (h, key); if (ptr == 0) return 0; if (value) memcpy (value, ptr, h->value_size); return 1; } const void * _hash_get_ptr (hash h, const void *key) { int b, i; vector bucket; /* Find the appropriate bucket. */ b = HASH (key, h->key_size, vector_size (h->buckets)); vector_get (h->buckets, b, bucket); if (bucket == 0) return 0; /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { struct hash_bucket_entry entry; vector_get (bucket, i, entry); if (memcmp (entry.key, key, h->key_size) == 0) return entry.value; } return 0; } int _hash_insert (hash h, const void *key, const void *value) { int b, i; vector bucket; struct hash_bucket_entry entry; /* Find the appropriate bucket. */ b = HASH (key, h->key_size, vector_size (h->buckets)); vector_get (h->buckets, b, bucket); /* If there is no bucket there, we have to allocate a fresh vector. */ if (bucket == 0) { bucket = new_vector (h->pool, struct hash_bucket_entry); vector_replace (h->buckets, b, bucket); } /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { vector_get (bucket, i, entry); if (memcmp (entry.key, key, h->key_size) == 0) { memcpy (entry.value, value, h->value_size); /*entry.value = pmemdup (h->pool, value, h->value_size);*/ /* Replace this entry. */ vector_replace (bucket, i, entry); return 1; } } /* Append to this bucket. */ entry.key = pmemdup (h->pool, key, h->key_size); entry.value = pmemdup (h->pool, value, h->value_size); vector_push_back (bucket, entry); return 0; } int _hash_erase (hash h, const void *key) { int b, i; vector bucket; struct hash_bucket_entry entry; /* Find the appropriate bucket. */ b = HASH (key, h->key_size, vector_size (h->buckets)); vector_get (h->buckets, b, bucket); if (bucket == 0) return 0; /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { vector_get (bucket, i, entry); if (memcmp (entry.key, key, h->key_size) == 0) { /* Remove this entry. */ vector_erase (bucket, i); return 1; } } return 0; } inline vector hash_keys_in_pool (hash h, pool p) { int i, j; vector bucket, keys; struct hash_bucket_entry entry; keys = _vector_new (p, h->key_size); for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); if (bucket) for (j = 0; j < vector_size (bucket); ++j) { vector_get (bucket, j, entry); _vector_push_back (keys, entry.key); } } return keys; } vector hash_keys (hash h) { return hash_keys_in_pool (h, h->pool); } inline vector hash_values_in_pool (hash h, pool p) { int i, j; vector bucket, values; struct hash_bucket_entry entry; values = _vector_new (p, h->value_size); for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); if (bucket) for (j = 0; j < vector_size (bucket); ++j) { vector_get (bucket, j, entry); _vector_push_back (values, entry.value); } } return values; } vector hash_values (hash h) { return hash_values_in_pool (h, h->pool); } int hash_size (hash h) { vector bucket; int n = 0, i; for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); n += bucket ? vector_size (bucket) : 0; } return n; } int hash_get_buckets_used (hash h) { vector bucket; int n = 0, i; for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); n += bucket ? 1 : 0; } return n; } int hash_get_buckets_allocated (hash h) { return vector_size (h->buckets); } void hash_set_buckets_allocated (hash h, int new_size) { vector null = 0; /* The user has been warned not to call this after elements have been * inserted into the hash, and to make NEW_SIZE a power of 2. */ if (vector_size (h->buckets) > new_size) vector_erase_range (h->buckets, new_size, vector_size (h->buckets)); else if (vector_size (h->buckets) < new_size) vector_fill (h->buckets, null, new_size - vector_size (h->buckets)); } /*----- SASHes -----*/ struct sash_bucket_entry { char *key; char *value; int value_allocated; }; sash new_sash (pool pool) { sash h; vector null = 0; h = pmalloc (pool, sizeof *h); h->pool = pool; h->buckets = new_vector (pool, sizeof (vector)); vector_fill (h->buckets, null, HASH_NR_BUCKETS); return h; } /* NB. Unlike copy_hash, this does copy the strings into the new * pool. So a copy_sash is really a deep copy of the string hash and the * strings. */ sash copy_sash (pool pool, sash h) { sash new_h; int b, i; new_h = pmalloc (pool, sizeof *new_h); new_h->pool = pool; new_h->buckets = copy_vector (pool, h->buckets); /* Copy the buckets. */ for (b = 0; b < vector_size (new_h->buckets); ++b) { vector v; vector_get (new_h->buckets, b, v); if (v) { v = copy_vector (pool, v); vector_replace (new_h->buckets, b, v); /* Copy the string keys/values in this vector. */ for (i = 0; i < vector_size (v); ++i) { struct sash_bucket_entry entry; vector_get (v, i, entry); entry.key = pstrdup (pool, entry.key); entry.value = pstrdup (pool, entry.value); vector_replace (v, i, entry); } } } return new_h; } int _sash_get (sash h, const char *key, const char **ptr) { int b, i; vector bucket; /* Find the appropriate bucket. */ b = HASH (key, strlen (key), vector_size (h->buckets)); vector_get (h->buckets, b, bucket); if (bucket == 0) { if (ptr) *ptr = 0; return 0; } /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { struct sash_bucket_entry entry; vector_get (bucket, i, entry); if (strcmp (entry.key, key) == 0) { if (ptr) *ptr = entry.value; return 1; } } if (ptr) *ptr = 0; return 0; } int sash_insert (sash h, const char *key, const char *value) { int b, i, len = strlen (value); vector bucket; struct sash_bucket_entry entry; /* Find the appropriate bucket. */ b = HASH (key, strlen (key), vector_size (h->buckets)); vector_get (h->buckets, b, bucket); /* If there is no bucket there, we have to allocate a fresh vector. */ if (bucket == 0) { bucket = new_vector (h->pool, struct sash_bucket_entry); vector_replace (h->buckets, b, bucket); } /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { vector_get (bucket, i, entry); if (strcmp (entry.key, key) == 0) { /* To avoid unnecessarily allocating more memory, we try to * be clever here. If the existing allocation is large enough * to store the new string, use it. Otherwise reallocate it * to make it bigger. */ if (len < entry.value_allocated) memcpy (entry.value, value, len + 1); else { entry.value = prealloc (h->pool, entry.value, len + 1); memcpy (entry.value, value, len + 1); entry.value_allocated = len + 1; } /* Replace this entry. */ vector_replace (bucket, i, entry); return 1; } } /* Append to this bucket. */ entry.key = pstrdup (h->pool, key); entry.value = pstrdup (h->pool, value); entry.value_allocated = len + 1; vector_push_back (bucket, entry); return 0; } int sash_erase (sash h, const char *key) { int b, i; vector bucket; struct sash_bucket_entry entry; /* Find the appropriate bucket. */ b = HASH (key, strlen (key), vector_size (h->buckets)); vector_get (h->buckets, b, bucket); if (bucket == 0) return 0; /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { vector_get (bucket, i, entry); if (strcmp (entry.key, key) == 0) { /* Remove this entry. */ vector_erase (bucket, i); return 1; } } return 0; } inline vector sash_keys_in_pool (sash h, pool p) { int i, j; vector bucket, keys; struct sash_bucket_entry entry; keys = new_vector (p, char *); for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); if (bucket) for (j = 0; j < vector_size (bucket); ++j) { char *key; vector_get (bucket, j, entry); key = pstrdup (p, entry.key); vector_push_back (keys, key); } } return keys; } vector sash_keys (sash h) { return sash_keys_in_pool (h, h->pool); } inline vector sash_values_in_pool (sash h, pool p) { int i, j; vector bucket, values; struct sash_bucket_entry entry; values = new_vector (p, char *); for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); if (bucket) for (j = 0; j < vector_size (bucket); ++j) { char *value; vector_get (bucket, j, entry); value = pstrdup (p, entry.value); vector_push_back (values, value); } } return values; } vector sash_values (sash h) { return sash_values_in_pool (h, h->pool); } int sash_size (sash h) { vector bucket; int n = 0, i; for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); n += bucket ? vector_size (bucket) : 0; } return n; } int sash_get_buckets_used (sash h) { vector bucket; int n = 0, i; for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); n += bucket ? 1 : 0; } return n; } int sash_get_buckets_allocated (sash h) { return vector_size (h->buckets); } void sash_set_buckets_allocated (sash h, int new_size) { vector null = 0; /* The user has been warned not to call this after elements have been * inserted into the sash, and to make NEW_SIZE a power of 2. */ if (vector_size (h->buckets) > new_size) vector_erase_range (h->buckets, new_size, vector_size (h->buckets)); else if (vector_size (h->buckets) < new_size) vector_fill (h->buckets, null, new_size - vector_size (h->buckets)); } /*----- SHASHes -----*/ struct shash_bucket_entry { char *key; void *value; }; shash _shash_new (pool pool, size_t value_size) { shash h; vector null = 0; h = pmalloc (pool, sizeof *h); h->pool = pool; h->value_size = value_size; h->buckets = new_vector (pool, sizeof (vector)); vector_fill (h->buckets, null, HASH_NR_BUCKETS); return h; } /* NB. Unlike copy_hash, this does copy the strings into the new * pool. So a copy_shash is really a deep copy of the shash and the * strings. */ shash copy_shash (pool pool, shash h) { shash new_h; int b, i; new_h = pmalloc (pool, sizeof *new_h); new_h->pool = pool; new_h->value_size = h->value_size; new_h->buckets = copy_vector (pool, h->buckets); /* Copy the buckets. */ for (b = 0; b < vector_size (new_h->buckets); ++b) { vector v; vector_get (new_h->buckets, b, v); if (v) { v = copy_vector (pool, v); vector_replace (new_h->buckets, b, v); /* Copy the string keys in this vector. */ for (i = 0; i < vector_size (v); ++i) { struct shash_bucket_entry entry; vector_get (v, i, entry); entry.key = pstrdup (pool, entry.key); entry.value = pmemdup (pool, entry.value, h->value_size); vector_replace (v, i, entry); } } } return new_h; } int _shash_get (shash h, const char *key, void *value) { const void *ptr; ptr = _shash_get_ptr (h, key); if (ptr == 0) return 0; if (value) memcpy (value, ptr, h->value_size); return 1; } const void * _shash_get_ptr (shash h, const char *key) { int b, i; vector bucket; /* Find the appropriate bucket. */ b = HASH (key, strlen (key), vector_size (h->buckets)); vector_get (h->buckets, b, bucket); if (bucket == 0) return 0; /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { struct shash_bucket_entry entry; vector_get (bucket, i, entry); if (strcmp (entry.key, key) == 0) return entry.value; } return 0; } int _shash_insert (shash h, const char *key, const void *value) { int b, i; vector bucket; struct shash_bucket_entry entry; /* Find the appropriate bucket. */ b = HASH (key, strlen (key), vector_size (h->buckets)); vector_get (h->buckets, b, bucket); /* If there is no bucket there, we have to allocate a fresh vector. */ if (bucket == 0) { bucket = new_vector (h->pool, struct shash_bucket_entry); vector_replace (h->buckets, b, bucket); } /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { vector_get (bucket, i, entry); if (strcmp (entry.key, key) == 0) { memcpy (entry.value, value, h->value_size); /*entry.value = pmemdup (h->pool, value, h->value_size);*/ /* Replace this entry. */ vector_replace (bucket, i, entry); return 1; } } /* Append to this bucket. */ entry.key = pstrdup (h->pool, key); entry.value = pmemdup (h->pool, value, h->value_size); vector_push_back (bucket, entry); return 0; } int shash_erase (shash h, const char *key) { int b, i; vector bucket; struct shash_bucket_entry entry; /* Find the appropriate bucket. */ b = HASH (key, strlen (key), vector_size (h->buckets)); vector_get (h->buckets, b, bucket); if (bucket == 0) return 0; /* Search this bucket linearly. */ for (i = 0; i < vector_size (bucket); ++i) { vector_get (bucket, i, entry); if (strcmp (entry.key, key) == 0) { /* Remove this entry. */ vector_erase (bucket, i); return 1; } } return 0; } inline vector shash_keys_in_pool (shash h, pool p) { int i, j; vector bucket, keys; struct shash_bucket_entry entry; keys = new_vector (p, char *); for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); if (bucket) for (j = 0; j < vector_size (bucket); ++j) { char *key; vector_get (bucket, j, entry); key = pstrdup (p, entry.key); vector_push_back (keys, key); } } return keys; } vector shash_keys (shash h) { return shash_keys_in_pool (h, h->pool); } inline vector shash_values_in_pool (shash h, pool p) { int i, j; vector bucket, values; struct shash_bucket_entry entry; values = _vector_new (p, h->value_size); for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); if (bucket) for (j = 0; j < vector_size (bucket); ++j) { vector_get (bucket, j, entry); _vector_push_back (values, entry.value); } } return values; } vector shash_values (shash h) { return shash_values_in_pool (h, h->pool); } int shash_size (shash h) { vector bucket; int n = 0, i; for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); n += bucket ? vector_size (bucket) : 0; } return n; } int shash_get_buckets_used (shash h) { vector bucket; int n = 0, i; for (i = 0; i < vector_size (h->buckets); ++i) { vector_get (h->buckets, i, bucket); n += bucket ? 1 : 0; } return n; } int shash_get_buckets_allocated (shash h) { return vector_size (h->buckets); } void shash_set_buckets_allocated (shash h, int new_size) { vector null = 0; /* The user has been warned not to call this after elements have been * inserted into the shash, and to make NEW_SIZE a power of 2. */ if (vector_size (h->buckets) > new_size) vector_erase_range (h->buckets, new_size, vector_size (h->buckets)); else if (vector_size (h->buckets) < new_size) vector_fill (h->buckets, null, new_size - vector_size (h->buckets)); }