#include "precompile.h" #include "memutil.h" #include "hashset.h" #include "list.h" #include "hash.h" #include "dbgutil.h" #include /* double hash , same with .netframework hashtable */ // Table of prime numbers to use as hash table sizes. // The entry used for capacity is the smallest prime number in this aaray // that is larger than twice the previous capacity. /* Integer square root by Halleck's method, with Legalize's speedup */ static int isqrt (int x) { int squaredbit, remainder, root; if (x<1) return 0; /* Load the binary constant 01 00 00 ... 00, where the number * of zero bits to the right of the single one bit * is even, and the one bit is as far left as is consistant * with that condition.) */ squaredbit = (int) ((((unsigned int) ~0L) >> 1) & ~(((unsigned int) ~0L) >> 2)); /* This portable load replaces the loop that used to be * here, and was donated by legalize@xmission.com */ /* Form bits of the answer. */ remainder = x; root = 0; while (squaredbit > 0) { if (remainder >= (squaredbit | root)) { remainder -= (squaredbit | root); root >>= 1; root |= squaredbit; } else { root >>= 1; } squaredbit >>= 2; } return root; } static int is_prime(int candidate) { if ((candidate & 1) != 0) { int divisor = 3; int limit = isqrt(candidate); while (divisor <= limit) { if ((candidate % divisor) == 0) return 0; divisor+=2; } } return candidate == 2; } TOOLKIT_API int hash_get_prime(int min) { const int primes[] = { 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369 }; int i; TOOLKIT_ASSERT(min >= 0); for (i = 0; i < sizeof(primes) / sizeof(int); ++i) { int prime = primes[i]; if (prime >= min) return prime; } //outside of our predefined table. //compute the hard way. for (i = (min | 1); i < INT_MAX;i+=2) { if (is_prime(i)) return i; } return min; } TOOLKIT_API hashset_t *hashset_create(int node_offset, hashset_getkey getkey_fn, hashset_hash hash_fn, hashset_cmpkey cmp_fn) { return hashset_create2(0, 1.0, node_offset, getkey_fn, hash_fn, cmp_fn); } TOOLKIT_API hashset_t *hashset_create2(int init_size, float load_factor, int node_offset, hashset_getkey getkey_fn, hashset_hash hash_fn, hashset_cmpkey cmp_fn) { hashset_t *hs; TOOLKIT_ASSERT(node_offset >= 0); TOOLKIT_ASSERT(load_factor > 0.0f && load_factor <= 1.0f); TOOLKIT_ASSERT(getkey_fn); TOOLKIT_ASSERT(hash_fn); TOOLKIT_ASSERT(cmp_fn); hs = MALLOC_T(hashset_t); init_size = hash_get_prime(init_size <= 0 ? HASH_DEFAULT_INIT_HASH_SIZE : init_size); hs->buckets = ZALLOC(init_size*sizeof(struct hlist_head)); if (!hs->buckets) return NULL; hs->size = init_size; hs->node_offset = node_offset; hs->load_factor = load_factor * HASH_DEFAULT_LOAD_FACTOR; hs->count = 0; hs->cmp_fn = cmp_fn; hs->getkey_fn = getkey_fn; hs->hash_fn = hash_fn; hs->default_iterator.ht = hs; hashset_iterator_reset(&hs->default_iterator); return hs; } static int hashset_check_expand(hashset_t *ht) { if ((ht->count+1)/(float)ht->size > ht->load_factor) { /* expand */ int i; int new_size = hash_get_prime(ht->size * 2); struct hlist_head *new_buckets = (struct hlist_head *)ZALLOC(new_size*sizeof(struct hlist_head)); if (!new_buckets) return -1; for (i = 0; i < ht->size; ++i) { struct hlist_node *n = ht->buckets[i].first; struct hlist_node *nn = n ? n->next : NULL; while (n) { void *o = ((char*)(n) + ht->node_offset); unsigned int new_slot = ht->hash_fn(ht->getkey_fn(o)) % new_size; hlist_del(n); hlist_add_head(n, &new_buckets[new_slot]); n = nn; nn = n ? n->next : NULL; } } free(ht->buckets); ht->buckets = new_buckets; ht->size = new_size; } return 0; } TOOLKIT_API void* hashset_find(hashset_t *ht, const void *key) { unsigned int slot; TOOLKIT_ASSERT(ht); slot = ht->hash_fn(key) % ht->size; if (ht->buckets[slot].first) { struct hlist_node *n = ht->buckets[slot].first; while (n) { void *o = ((char*)(n) - ht->node_offset); if (ht->cmp_fn(ht->getkey_fn(o), key) == 0) return o; n = n->next; } } return NULL; } TOOLKIT_API int hashset_add(hashset_t *ht, void *obj) { unsigned int slot; TOOLKIT_ASSERT(ht); TOOLKIT_ASSERT(obj); if (hashset_find(ht, ht->getkey_fn(obj))) return -1; /* already exist */ if (hashset_check_expand(ht) != 0) return -1; slot = ht->hash_fn(ht->getkey_fn(obj)) % ht->size; hlist_add_head((struct hlist_node*)((char*)obj+ht->node_offset), &ht->buckets[slot]); ht->count++; return 0; } TOOLKIT_API int hashset_count(hashset_t *ht) { TOOLKIT_ASSERT(ht); return ht->count; } TOOLKIT_API void* hashset_remove(hashset_t *ht, const void *key) { void *obj; TOOLKIT_ASSERT(ht); //TOOLKIT_ASSERT(key); obj = hashset_find(ht, key); if (obj) { struct hlist_node *n = (struct hlist_node *)((char*)obj + ht->node_offset); hlist_del(n); ht->count--; } return obj; } TOOLKIT_API void hashset_destroy(hashset_t *ht) { TOOLKIT_ASSERT(ht); TOOLKIT_ASSERT(ht->count == 0); free(ht->buckets); free(ht); } TOOLKIT_API hashset_iterator *hashset_default_iterator(hashset_t *ht) { return hashset_iterator_reset(&ht->default_iterator); } TOOLKIT_API hashset_iterator *hashset_iteraor_create(hashset_t *ht) { hashset_iterator *iter; TOOLKIT_ASSERT(ht); iter = MALLOC_T(hashset_iterator); iter->ht = ht; iter->slot = -1; iter->pos = NULL; iter->pos_next = NULL; return iter; } TOOLKIT_API void hashset_iterator_destroy(hashset_iterator *iter) { TOOLKIT_ASSERT(iter); free(iter); } TOOLKIT_API int hashset_iterator_next(hashset_iterator *iter) { hashset_t *ht; TOOLKIT_ASSERT(iter); ht = iter->ht; if (iter->slot == -1) { while (++iter->slot < ht->size) { if (!hlist_empty(&ht->buckets[iter->slot])) { iter->pos = ht->buckets[iter->slot].first; iter->pos_next = iter->pos->next; return 0; } } } else { iter->pos = iter->pos_next; if (iter->pos == NULL) { while (iter->slot < ht->size) { if (iter->pos && iter->pos->next) { iter->pos = iter->pos->next; iter->pos_next = iter->pos->next; return 0; } else { ++iter->slot; if (iter->slot >= ht->size) break; iter->pos = ht->buckets[iter->slot].first; if (iter->pos) { iter->pos_next = iter->pos->next; return 0; } } } } else { iter->pos_next = iter->pos->next; return 0; } } return -1; } TOOLKIT_API void *hashset_iterator_get_object(hashset_iterator *iter) { hashset_t *ht; TOOLKIT_ASSERT(iter); ht = iter->ht; return (void*)((char*)iter->pos - ht->node_offset); } TOOLKIT_API hashset_iterator * hashset_iterator_reset(hashset_iterator *iter) { TOOLKIT_ASSERT(iter); iter->pos = iter->pos_next = NULL; iter->slot = -1; return iter; } /* map */ #define DEFAULT_MAP_SIZE 15 struct stringmap_kv_pair { struct hlist_node hnode; char *key; void *value; unsigned int h; }; struct stringmap_iterator { stringmap_t *map; int slot; struct hlist_node *pos, *pos_next; }; struct stringmap_t { struct hlist_head *buckets; int count; int size; stringmap_iterator default_iterator; }; TOOLKIT_API const char* stringmap_kv_pair_get_key(stringmap_kv_pair *kv) { TOOLKIT_ASSERT(kv); return kv->key; } TOOLKIT_API void *stringmap_kv_pair_get_value(stringmap_kv_pair *kv) { TOOLKIT_ASSERT(kv); return kv->value; } TOOLKIT_API void *stringmap_kv_pair_set_value(stringmap_kv_pair *kv, void *newval) { void *old; TOOLKIT_ASSERT(kv); old = kv->value; kv->value = newval; return old; } TOOLKIT_API stringmap_t *stringmap_create(int init_size) { stringmap_t *map; if (init_size <= 0) { init_size = DEFAULT_MAP_SIZE; } else { int i = DEFAULT_MAP_SIZE; while (init_size > i) i = i * 2 + 1; init_size = i; } map = MALLOC_T(stringmap_t); map->count = 0; map->size = init_size; map->default_iterator.map = map; stringmap_iterator_reset(&map->default_iterator); map->buckets = (struct hlist_head*)ZALLOC(sizeof(struct hlist_head)*init_size); return map; } TOOLKIT_API void stringmap_destroy(stringmap_t *map) { TOOLKIT_ASSERT(map); if (map->count) { TOOLKIT_ASSERT(0); } TOOLKIT_ASSERT(map->count == 0); free(map->buckets); free(map); } // bool TOOLKIT_API stringmap_kv_pair *stringmap_find(stringmap_t *map, const char *key) { unsigned int slot, h; stringmap_kv_pair *tpos; struct hlist_node *pos; TOOLKIT_ASSERT(map); TOOLKIT_ASSERT(key); h = hash32_str(key, HASH32_STR_INIT); slot = h % map->size; hlist_for_each_entry(tpos, pos, &map->buckets[slot], stringmap_kv_pair, hnode) { if (tpos->h == h && !strcmp(key, tpos->key)) return tpos; } return NULL; /* not found */ } static void stringmap_check_expand(stringmap_t *map) { if ((map->count+1)/(float)map->size > HASH_DEFAULT_LOAD_FACTOR) { /* expand */ int i; int new_size = map->size * 2 + 1; struct hlist_head *new_buckets = (struct hlist_head *)ZALLOC(new_size*sizeof(struct hlist_head)); for (i = 0; i < map->size; ++i) { stringmap_kv_pair *tpos; struct hlist_node *pos, *n; hlist_for_each_entry_safe(tpos, pos, n, &map->buckets[i], stringmap_kv_pair, hnode) { unsigned int new_slot = tpos->h % new_size; hlist_del(pos); hlist_add_head(pos, &new_buckets[new_slot]); } } free(map->buckets); map->buckets = new_buckets; map->size = new_size; } } TOOLKIT_API int stringmap_add(stringmap_t *map, const char *key, void *value) { unsigned int slot, h; stringmap_kv_pair *tpos; struct hlist_node *pos; TOOLKIT_ASSERT(map); TOOLKIT_ASSERT(key); h = hash32_str(key, HASH32_STR_INIT); slot = h % map->size; hlist_for_each_entry(tpos, pos, &map->buckets[slot], stringmap_kv_pair, hnode) { if (tpos->h == h && !strcmp(key, tpos->key)) return -1; /* already exist */ } stringmap_check_expand(map); tpos = MALLOC_T(stringmap_kv_pair); tpos->h = h; tpos->key = _strdup(key); tpos->value = value; slot = h % map->size; hlist_add_head(&tpos->hnode, &map->buckets[slot]); map->count ++; return 0; } TOOLKIT_API void *stringmap_remove(stringmap_t *map, const char *key) { unsigned int h, slot; stringmap_kv_pair *tpos; struct hlist_node *pos, *n; TOOLKIT_ASSERT(map); TOOLKIT_ASSERT(key); h = hash32_str(key, HASH32_STR_INIT); slot = h % map->size; hlist_for_each_entry_safe(tpos, pos, n, &map->buckets[slot], stringmap_kv_pair, hnode) { if (tpos->h == h && !strcmp(key, tpos->key)) { void *value = tpos->value; hlist_del(pos); free(tpos->key); free(tpos); map->count--; return value; } } return NULL; } TOOLKIT_API int stringmap_count(stringmap_t *map) { TOOLKIT_ASSERT(map); return map->count; } TOOLKIT_API stringmap_iterator *stringmap_default_iterator(stringmap_t *map) { return stringmap_iterator_reset(&map->default_iterator); } TOOLKIT_API stringmap_iterator *stringmap_iterator_create(stringmap_t *map) { stringmap_iterator *iter; TOOLKIT_ASSERT(map); iter = MALLOC_T(stringmap_iterator); iter->map = map; iter->slot = -1; iter->pos = NULL; iter->pos_next = NULL; return iter; } TOOLKIT_API void stringmap_iterator_destroy(stringmap_iterator *iter) { TOOLKIT_ASSERT(iter); free(iter); } TOOLKIT_API int stringmap_iterator_next(stringmap_iterator *iter) { stringmap_t *map; TOOLKIT_ASSERT(iter); map = iter->map; if (iter->slot == -1) { while (++iter->slot < map->size) { if (!hlist_empty(&map->buckets[iter->slot])) { iter->pos = map->buckets[iter->slot].first; iter->pos_next = iter->pos->next; return 0; } } } else { iter->pos = iter->pos_next; if (iter->pos == NULL) { while (iter->slot < map->size) { if (iter->pos && iter->pos->next) { iter->pos = iter->pos->next; iter->pos_next = iter->pos->next; return 0; } else { ++iter->slot; if (iter->slot >= map->size) break; iter->pos = map->buckets[iter->slot].first; if (iter->pos) { iter->pos_next = iter->pos->next; return 0; } } } } else { iter->pos_next = iter->pos->next; return 0; } } return -1; } TOOLKIT_API const char *stringmap_iterator_get_key(stringmap_iterator *iter) { stringmap_kv_pair *pair; TOOLKIT_ASSERT(iter); pair = hlist_entry(iter->pos, stringmap_kv_pair, hnode); return pair->key; } TOOLKIT_API void *stringmap_iterator_get_value(stringmap_iterator *iter) { stringmap_kv_pair *pair; TOOLKIT_ASSERT(iter); pair = hlist_entry(iter->pos, stringmap_kv_pair, hnode); return pair->value; } TOOLKIT_API stringmap_iterator * stringmap_iterator_reset(stringmap_iterator *iter) { TOOLKIT_ASSERT(iter); iter->pos = iter->pos_next = NULL; iter->slot = -1; return iter; } TOOLKIT_API int htable_init(htable_t *ht, int init_size) { init_size = hash_get_prime(init_size <= 0 ? HASH_DEFAULT_INIT_HASH_SIZE : init_size); ht->size = init_size; ht->buckets = ZALLOC(init_size*sizeof(struct hlist_head)); if (!ht->buckets) return -1; ht->count = 0; ht->size = init_size; return 0; } TOOLKIT_API void htable_term(htable_t *ht) { if (ht) { ht->size = ht->count = 0; free(ht->buckets); ht->buckets = NULL; } } TOOLKIT_API htable_t *htable_create(int init_size) { htable_t *ht = MALLOC_T(htable_t); if (ht) { if (htable_init(ht, init_size)) { free(ht); ht = NULL; } } return ht; } TOOLKIT_API void htable_destroy(htable_t *ht) { htable_term(ht); free(ht); } /* // test #include #include typedef struct student_data { char name[32]; int score; struct hlist_node node; }student_data; static student_data* new_student() { return MALLOC_T(student_data); } static void delete_student(student_data *stu) { free(stu); } static const void* student_getkey(const void *obj) { student_data *stu = (student_data *)obj; return stu->name; } static unsigned int student_hash(const void *key) { return hash_str((const char*)key); } static int student_cmpkey(const void *key_x, const void *key_y) { const char *x = key_x; const char *y = key_y; return strcmp(x, y); } int test() // int main() { hashset_t hs; student_data *stu_a, *stu_b, *stu_c; if (hashset_create(offset_of(student_data, node), student_getkey, student_hash, student_cmpkey, &hs) != 0) return -1; stu_a = new_student(); strcpy(stu_a->name, "stu_a"); TOOLKIT_ASSERT(hashset_add(&hs, stu_a) == 0); stu_b = new_student(); strcpy(stu_b->name, "stu_b"); TOOLKIT_ASSERT(hashset_add(&hs, stu_b) == 0); stu_c = new_student(); strcpy(stu_c->name, "stu_c"); TOOLKIT_ASSERT(hashset_add(&hs, stu_c) == 0); printf("hashset_count = %d\n", hashset_count(&hs)); { student_data *tpos; struct hlist_node *pos, *n; int slot; hashset_for_each_safe(tpos, pos, n, slot, &hs, student_data) { printf("%s ", tpos->name); hashset_remove(&hs, tpos->name); delete_student(tpos); } printf("\n"); } printf("hashset_count = %d\n", hashset_count(&hs)); getchar(); return 0; } */ /* int main() { stringmap_t *map; stringmap_iterator *it; map = stringmap_create(10); TOOLKIT_ASSERT(map); stringmap_add(map, "1", "1-23321"); stringmap_add(map, "2", "2-23321"); stringmap_add(map, "3", "3-23321"); printf("map size = %d\n", stringmap_count(map)); it = stringmap_iterator_create(map); while (stringmap_iterator_next(it) == 0) { const char *key = stringmap_iterator_get_key(it); const char *value = stringmap_iterator_get_value(it); printf("key: %s, value: %s\n", key, value); } stringmap_iterator_reset(it); while (stringmap_iterator_next(it) == 0) { const char *key = stringmap_iterator_get_key(it); stringmap_remove(map, key); } stringmap_iterator_destroy(it); stringmap_destroy(map); _CrtDumpMemoryLeaks(); return 0; } */