libStatGen Software 1
khash.h
1/* The MIT License
2
3 Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24*/
25
26/*
27 An example:
28
29#include "khash.h"
30KHASH_MAP_INIT_INT(32, char)
31int main() {
32 int ret, is_missing;
33 khiter_t k;
34 khash_t(32) *h = kh_init(32);
35 k = kh_put(32, h, 5, &ret);
36 if (!ret) kh_del(32, h, k);
37 kh_value(h, k) = 10;
38 k = kh_get(32, h, 10);
39 is_missing = (k == kh_end(h));
40 k = kh_get(32, h, 5);
41 kh_del(32, h, k);
42 for (k = kh_begin(h); k != kh_end(h); ++k)
43 if (kh_exist(h, k)) kh_value(h, k) = 1;
44 kh_destroy(32, h);
45 return 0;
46}
47*/
48
49/*
50 2011-02-14 (0.2.5):
51
52 * Allow to declare global functions.
53
54 2009-09-26 (0.2.4):
55
56 * Improve portability
57
58 2008-09-19 (0.2.3):
59
60 * Corrected the example
61 * Improved interfaces
62
63 2008-09-11 (0.2.2):
64
65 * Improved speed a little in kh_put()
66
67 2008-09-10 (0.2.1):
68
69 * Added kh_clear()
70 * Fixed a compiling error
71
72 2008-09-02 (0.2.0):
73
74 * Changed to token concatenation which increases flexibility.
75
76 2008-08-31 (0.1.2):
77
78 * Fixed a bug in kh_get(), which has not been tested previously.
79
80 2008-08-31 (0.1.1):
81
82 * Added destructor
83*/
84
85
86#ifndef __AC_KHASH_H
87#define __AC_KHASH_H
88
89/*!
90 @header
91
92 Generic hash table library.
93
94 @copyright Heng Li
95 */
96
97#define AC_VERSION_KHASH_H "0.2.5"
98
99#include <stdlib.h>
100#include <string.h>
101#include <limits.h>
102
103/* compiler specific configuration */
104
105#if UINT_MAX == 0xffffffffu
106typedef unsigned int khint32_t;
107#elif ULONG_MAX == 0xffffffffu
108typedef unsigned long khint32_t;
109#endif
110
111#if ULONG_MAX == ULLONG_MAX
112typedef unsigned long khint64_t;
113#else
114typedef unsigned long long khint64_t;
115#endif
116
117#ifdef _MSC_VER
118#define inline __inline
119#endif
120
121#ifdef _WIN32
122#define inline __inline
123#endif
124
125typedef khint32_t khint_t;
126typedef khint_t khiter_t;
127
128#define __ac_HASH_PRIME_SIZE 32
129static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
130{
131 0ul, 3ul, 11ul, 23ul, 53ul,
132 97ul, 193ul, 389ul, 769ul, 1543ul,
133 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
134 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
135 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
136 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
137 3221225473ul, 4294967291ul
138};
139
140#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
141#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
142#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
143#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
144#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
145#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
146#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
147
148static const double __ac_HASH_UPPER = 0.77;
149
150#define KHASH_DECLARE(name, khkey_t, khval_t) \
151 typedef struct { \
152 khint_t n_buckets, size, n_occupied, upper_bound; \
153 khint32_t *flags; \
154 khkey_t *keys; \
155 khval_t *vals; \
156 } kh_##name##_t; \
157 extern kh_##name##_t *kh_init_##name(); \
158 extern void kh_destroy_##name(kh_##name##_t *h); \
159 extern void kh_clear_##name(kh_##name##_t *h); \
160 extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
161 extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
162 extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
163 extern void kh_del_##name(kh_##name##_t *h, khint_t x);
164
165#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
166 typedef struct { \
167 khint_t n_buckets, size, n_occupied, upper_bound; \
168 khint32_t *flags; \
169 khkey_t *keys; \
170 khval_t *vals; \
171 } kh_##name##_t; \
172 SCOPE kh_##name##_t *kh_init_##name() { \
173 return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
174 } \
175 SCOPE void kh_destroy_##name(kh_##name##_t *h) \
176 { \
177 if (h) { \
178 free(h->keys); free(h->flags); \
179 free(h->vals); \
180 free(h); \
181 } \
182 } \
183 SCOPE void kh_clear_##name(kh_##name##_t *h) \
184 { \
185 if (h && h->flags) { \
186 memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \
187 h->size = h->n_occupied = 0; \
188 } \
189 } \
190 SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
191 { \
192 if (h->n_buckets) { \
193 khint_t inc, k, i, last; \
194 k = __hash_func(key); i = k % h->n_buckets; \
195 inc = 1 + k % (h->n_buckets - 1); last = i; \
196 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
197 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
198 else i += inc; \
199 if (i == last) return h->n_buckets; \
200 } \
201 return __ac_iseither(h->flags, i)? h->n_buckets : i; \
202 } else return 0; \
203 } \
204 SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
205 { \
206 khint32_t *new_flags = 0; \
207 khint_t j = 1; \
208 { \
209 khint_t t = __ac_HASH_PRIME_SIZE - 1; \
210 while (__ac_prime_list[t] > new_n_buckets) --t; \
211 new_n_buckets = __ac_prime_list[t+1]; \
212 if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
213 else { \
214 new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
215 memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
216 if (h->n_buckets < new_n_buckets) { \
217 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
218 if (kh_is_map) \
219 h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
220 } \
221 } \
222 } \
223 if (j) { \
224 for (j = 0; j != h->n_buckets; ++j) { \
225 if (__ac_iseither(h->flags, j) == 0) { \
226 khkey_t key = h->keys[j]; \
227 khval_t val; \
228 if (kh_is_map) val = h->vals[j]; \
229 __ac_set_isdel_true(h->flags, j); \
230 while (1) { \
231 khint_t inc, k, i; \
232 k = __hash_func(key); \
233 i = k % new_n_buckets; \
234 inc = 1 + k % (new_n_buckets - 1); \
235 while (!__ac_isempty(new_flags, i)) { \
236 if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
237 else i += inc; \
238 } \
239 __ac_set_isempty_false(new_flags, i); \
240 if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
241 { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
242 if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
243 __ac_set_isdel_true(h->flags, i); \
244 } else { \
245 h->keys[i] = key; \
246 if (kh_is_map) h->vals[i] = val; \
247 break; \
248 } \
249 } \
250 } \
251 } \
252 if (h->n_buckets > new_n_buckets) { \
253 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
254 if (kh_is_map) \
255 h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
256 } \
257 free(h->flags); \
258 h->flags = new_flags; \
259 h->n_buckets = new_n_buckets; \
260 h->n_occupied = h->size; \
261 h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
262 } \
263 } \
264 SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
265 { \
266 khint_t x; \
267 if (h->n_occupied >= h->upper_bound) { \
268 if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
269 else kh_resize_##name(h, h->n_buckets + 1); \
270 } \
271 { \
272 khint_t inc, k, i, site, last; \
273 x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
274 if (__ac_isempty(h->flags, i)) x = i; \
275 else { \
276 inc = 1 + k % (h->n_buckets - 1); last = i; \
277 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
278 if (__ac_isdel(h->flags, i)) site = i; \
279 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
280 else i += inc; \
281 if (i == last) { x = site; break; } \
282 } \
283 if (x == h->n_buckets) { \
284 if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
285 else x = i; \
286 } \
287 } \
288 } \
289 if (__ac_isempty(h->flags, x)) { \
290 h->keys[x] = key; \
291 __ac_set_isboth_false(h->flags, x); \
292 ++h->size; ++h->n_occupied; \
293 *ret = 1; \
294 } else if (__ac_isdel(h->flags, x)) { \
295 h->keys[x] = key; \
296 __ac_set_isboth_false(h->flags, x); \
297 ++h->size; \
298 *ret = 2; \
299 } else *ret = 0; \
300 return x; \
301 } \
302 SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
303 { \
304 if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
305 __ac_set_isdel_true(h->flags, x); \
306 --h->size; \
307 } \
308 }
309
310#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
311 KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
312
313/* --- BEGIN OF HASH FUNCTIONS --- */
314
315/*! @function
316 @abstract Integer hash function
317 @param key The integer [khint32_t]
318 @return The hash value [khint_t]
319 */
320#define kh_int_hash_func(key) (khint32_t)(key)
321/*! @function
322 @abstract Integer comparison function
323 */
324#define kh_int_hash_equal(a, b) ((a) == (b))
325/*! @function
326 @abstract 64-bit integer hash function
327 @param key The integer [khint64_t]
328 @return The hash value [khint_t]
329 */
330#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
331/*! @function
332 @abstract 64-bit integer comparison function
333 */
334#define kh_int64_hash_equal(a, b) ((a) == (b))
335/*! @function
336 @abstract const char* hash function
337 @param s Pointer to a null terminated string
338 @return The hash value
339 */
340static inline khint_t __ac_X31_hash_string(const char *s)
341{
342 khint_t h = *s;
343 if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
344 return h;
345}
346/*! @function
347 @abstract Another interface to const char* hash function
348 @param key Pointer to a null terminated string [const char*]
349 @return The hash value [khint_t]
350 */
351#define kh_str_hash_func(key) __ac_X31_hash_string(key)
352/*! @function
353 @abstract Const char* comparison function
354 */
355#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
356
357/* --- END OF HASH FUNCTIONS --- */
358
359/* Other necessary macros... */
360
361/*!
362 @abstract Type of the hash table.
363 @param name Name of the hash table [symbol]
364 */
365#define khash_t(name) kh_##name##_t
366
367/*! @function
368 @abstract Initiate a hash table.
369 @param name Name of the hash table [symbol]
370 @return Pointer to the hash table [khash_t(name)*]
371 */
372#define kh_init(name) kh_init_##name()
373
374/*! @function
375 @abstract Destroy a hash table.
376 @param name Name of the hash table [symbol]
377 @param h Pointer to the hash table [khash_t(name)*]
378 */
379#define kh_destroy(name, h) kh_destroy_##name(h)
380
381/*! @function
382 @abstract Reset a hash table without deallocating memory.
383 @param name Name of the hash table [symbol]
384 @param h Pointer to the hash table [khash_t(name)*]
385 */
386#define kh_clear(name, h) kh_clear_##name(h)
387
388/*! @function
389 @abstract Resize a hash table.
390 @param name Name of the hash table [symbol]
391 @param h Pointer to the hash table [khash_t(name)*]
392 @param s New size [khint_t]
393 */
394#define kh_resize(name, h, s) kh_resize_##name(h, s)
395
396/*! @function
397 @abstract Insert a key to the hash table.
398 @param name Name of the hash table [symbol]
399 @param h Pointer to the hash table [khash_t(name)*]
400 @param k Key [type of keys]
401 @param r Extra return code: 0 if the key is present in the hash table;
402 1 if the bucket is empty (never used); 2 if the element in
403 the bucket has been deleted [int*]
404 @return Iterator to the inserted element [khint_t]
405 */
406#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
407
408/*! @function
409 @abstract Retrieve a key from the hash table.
410 @param name Name of the hash table [symbol]
411 @param h Pointer to the hash table [khash_t(name)*]
412 @param k Key [type of keys]
413 @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
414 */
415#define kh_get(name, h, k) kh_get_##name(h, k)
416
417/*! @function
418 @abstract Remove a key from the hash table.
419 @param name Name of the hash table [symbol]
420 @param h Pointer to the hash table [khash_t(name)*]
421 @param k Iterator to the element to be deleted [khint_t]
422 */
423#define kh_del(name, h, k) kh_del_##name(h, k)
424
425
426/*! @function
427 @abstract Test whether a bucket contains data.
428 @param h Pointer to the hash table [khash_t(name)*]
429 @param x Iterator to the bucket [khint_t]
430 @return 1 if containing data; 0 otherwise [int]
431 */
432#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
433
434/*! @function
435 @abstract Get key given an iterator
436 @param h Pointer to the hash table [khash_t(name)*]
437 @param x Iterator to the bucket [khint_t]
438 @return Key [type of keys]
439 */
440#define kh_key(h, x) ((h)->keys[x])
441
442/*! @function
443 @abstract Get value given an iterator
444 @param h Pointer to the hash table [khash_t(name)*]
445 @param x Iterator to the bucket [khint_t]
446 @return Value [type of values]
447 @discussion For hash sets, calling this results in segfault.
448 */
449#define kh_val(h, x) ((h)->vals[x])
450
451/*! @function
452 @abstract Alias of kh_val()
453 */
454#define kh_value(h, x) ((h)->vals[x])
455
456/*! @function
457 @abstract Get the start iterator
458 @param h Pointer to the hash table [khash_t(name)*]
459 @return The start iterator [khint_t]
460 */
461#define kh_begin(h) (khint_t)(0)
462
463/*! @function
464 @abstract Get the end iterator
465 @param h Pointer to the hash table [khash_t(name)*]
466 @return The end iterator [khint_t]
467 */
468#define kh_end(h) ((h)->n_buckets)
469
470/*! @function
471 @abstract Get the number of elements in the hash table
472 @param h Pointer to the hash table [khash_t(name)*]
473 @return Number of elements in the hash table [khint_t]
474 */
475#define kh_size(h) ((h)->size)
476
477/*! @function
478 @abstract Get the number of buckets in the hash table
479 @param h Pointer to the hash table [khash_t(name)*]
480 @return Number of buckets in the hash table [khint_t]
481 */
482#define kh_n_buckets(h) ((h)->n_buckets)
483
484/* More conenient interfaces */
485
486/*! @function
487 @abstract Instantiate a hash set containing integer keys
488 @param name Name of the hash table [symbol]
489 */
490#define KHASH_SET_INIT_INT(name) \
491 KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
492
493/*! @function
494 @abstract Instantiate a hash map containing integer keys
495 @param name Name of the hash table [symbol]
496 @param khval_t Type of values [type]
497 */
498#define KHASH_MAP_INIT_INT(name, khval_t) \
499 KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
500
501/*! @function
502 @abstract Instantiate a hash map containing 64-bit integer keys
503 @param name Name of the hash table [symbol]
504 */
505#define KHASH_SET_INIT_INT64(name) \
506 KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
507
508/*! @function
509 @abstract Instantiate a hash map containing 64-bit integer keys
510 @param name Name of the hash table [symbol]
511 @param khval_t Type of values [type]
512 */
513#define KHASH_MAP_INIT_INT64(name, khval_t) \
514 KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
515
516typedef const char *kh_cstr_t;
517/*! @function
518 @abstract Instantiate a hash map containing const char* keys
519 @param name Name of the hash table [symbol]
520 */
521#define KHASH_SET_INIT_STR(name) \
522 KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
523
524/*! @function
525 @abstract Instantiate a hash map containing const char* keys
526 @param name Name of the hash table [symbol]
527 @param khval_t Type of values [type]
528 */
529#define KHASH_MAP_INIT_STR(name, khval_t) \
530 KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
531
532#endif /* __AC_KHASH_H */