ghash.c 76.4 KB
Newer Older
Owen Taylor's avatar
Owen Taylor committed
1 2 3 4
/* GLIB - Library of useful routines for C programming
 * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
 *
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
Owen Taylor's avatar
Owen Taylor committed
6
 * License as published by the Free Software Foundation; either
7
 * version 2.1 of the License, or (at your option) any later version.
Owen Taylor's avatar
Owen Taylor committed
8 9 10
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
Owen Taylor's avatar
Owen Taylor committed
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
Owen Taylor's avatar
Owen Taylor committed
16
 */
17

18
/*
19
 * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
20 21
 * file for a list of people on the GLib Team.  See the ChangeLog
 * files for a list of changes.  These files are distributed with
22
 * GLib at ftp://ftp.gtk.org/pub/gtk/.
23 24
 */

25
/*
26 27 28
 * MT safe
 */

29
#include "config.h"
30

31 32
#include <string.h>  /* memset */

Matthias Clasen's avatar
Matthias Clasen committed
33
#include "ghash.h"
34
#include "gmacros.h"
35
#include "glib-private.h"
36
#include "gstrfuncs.h"
37
#include "gstrfuncsprivate.h"
38
#include "gatomic.h"
Matthias Clasen's avatar
Matthias Clasen committed
39
#include "gtestutils.h"
40
#include "gslice.h"
41
#include "grefcount.h"
42
#include "gvalgrind.h"
Owen Taylor's avatar
Owen Taylor committed
43

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
/* The following #pragma is here so we can do this...
 *
 *   #ifndef USE_SMALL_ARRAYS
 *     is_big = TRUE;
 *   #endif
 *     return is_big ? *(((gpointer *) a) + index) : GUINT_TO_POINTER (*(((guint *) a) + index));
 *
 * ...instead of this...
 *
 *   #ifndef USE_SMALL_ARRAYS
 *     return *(((gpointer *) a) + index);
 *   #else
 *     return is_big ? *(((gpointer *) a) + index) : GUINT_TO_POINTER (*(((guint *) a) + index));
 *   #endif
 *
 * ...and still compile successfully when -Werror=duplicated-branches is passed. */

61
#if defined(__GNUC__) && __GNUC__ > 6
62
#pragma GCC diagnostic ignored "-Wduplicated-branches"
63
#endif
64

65
/**
Johan Dahlin's avatar
Johan Dahlin committed
66
 * SECTION:hash_tables
67 68
 * @title: Hash Tables
 * @short_description: associations between keys and values so that
Matthias Clasen's avatar
Matthias Clasen committed
69
 *     given a key the value can be found quickly
70 71
 *
 * A #GHashTable provides associations between keys and values which is
72 73 74
 * optimized so that given a key, the associated value can be found,
 * inserted or removed in amortized O(1). All operations going through
 * each element take O(n) time (list all keys/values, table resize, etc.).
75 76 77 78
 *
 * Note that neither keys nor values are copied when inserted into the
 * #GHashTable, so they must exist for the lifetime of the #GHashTable.
 * This means that the use of static strings is OK, but temporary
79
 * strings (i.e. those created in buffers and those returned by GTK
80 81 82 83 84 85 86 87 88 89 90 91 92 93
 * widgets) should be copied with g_strdup() before being inserted.
 *
 * If keys or values are dynamically allocated, you must be careful to
 * ensure that they are freed when they are removed from the
 * #GHashTable, and also when they are overwritten by new insertions
 * into the #GHashTable. It is also not advisable to mix static strings
 * and dynamically-allocated strings in a #GHashTable, because it then
 * becomes difficult to determine whether the string should be freed.
 *
 * To create a #GHashTable, use g_hash_table_new().
 *
 * To insert a key and value into a #GHashTable, use
 * g_hash_table_insert().
 *
94
 * To look up a value corresponding to a given key, use
95 96
 * g_hash_table_lookup() and g_hash_table_lookup_extended().
 *
97 98 99
 * g_hash_table_lookup_extended() can also be used to simply
 * check if a key is present in the hash table.
 *
100 101 102
 * To remove a key and value, use g_hash_table_remove().
 *
 * To call a function for each key and value pair use
103
 * g_hash_table_foreach() or use an iterator to iterate over the
104 105 106
 * key/value pairs in the hash table, see #GHashTableIter. The iteration order
 * of a hash table is not defined, and you must not rely on iterating over
 * keys/values in the same order as they were inserted.
107 108
 *
 * To destroy a #GHashTable use g_hash_table_destroy().
109
 *
Dan Winship's avatar
Dan Winship committed
110 111
 * A common use-case for hash tables is to store information about a
 * set of keys, without associating any particular value with each
112 113 114
 * key. GHashTable optimizes one way of doing so: If you store only
 * key-value pairs where key == value, then GHashTable does not
 * allocate memory to store the values, which can be a considerable
Dan Winship's avatar
Dan Winship committed
115 116 117
 * space saving, if your set is large. The functions
 * g_hash_table_add() and g_hash_table_contains() are designed to be
 * used when using #GHashTable this way.
118 119 120 121
 *
 * #GHashTable is not designed to be statically initialised with keys and
 * values known at compile time. To build a static hash table, use a tool such
 * as [gperf](https://www.gnu.org/software/gperf/).
122
 */
123 124 125 126 127

/**
 * GHashTable:
 *
 * The #GHashTable struct is an opaque data structure to represent a
Matthias Clasen's avatar
Matthias Clasen committed
128 129
 * [Hash Table][glib-Hash-Tables]. It should only be accessed via the
 * following functions.
Matthias Clasen's avatar
Matthias Clasen committed
130
 */
131 132 133

/**
 * GHashFunc:
Matthias Clasen's avatar
Matthias Clasen committed
134
 * @key: a key
135 136 137 138 139 140
 *
 * Specifies the type of the hash function which is passed to
 * g_hash_table_new() when a #GHashTable is created.
 *
 * The function is passed a key and should return a #guint hash value.
 * The functions g_direct_hash(), g_int_hash() and g_str_hash() provide
141
 * hash functions which can be used when the key is a #gpointer, #gint*,
142 143
 * and #gchar* respectively.
 *
144
 * g_direct_hash() is also the appropriate hash function for keys
Matthias Clasen's avatar
Matthias Clasen committed
145
 * of the form `GINT_TO_POINTER (n)` (or similar macros).
146
 *
147
 * A good hash functions should produce
148 149 150 151 152 153 154 155 156 157 158
 * hash values that are evenly distributed over a fairly large range.
 * The modulus is taken with the hash table size (a prime number) to
 * find the 'bucket' to place each key into. The function should also
 * be very fast, since it is called for each key lookup.
 *
 * Note that the hash functions provided by GLib have these qualities,
 * but are not particularly robust against manufactured keys that
 * cause hash collisions. Therefore, you should consider choosing
 * a more secure hash function when using a GHashTable with keys
 * that originate in untrusted data (such as HTTP requests).
 * Using g_str_hash() in that situation might make your application
159
 * vulnerable to
160
 * [Algorithmic Complexity Attacks](https://lwn.net/Articles/474912/).
Matthias Clasen's avatar
Matthias Clasen committed
161
 *
162 163 164 165 166
 * The key to choosing a good hash is unpredictability.  Even
 * cryptographic hashes are very easy to find collisions for when the
 * remainder is taken modulo a somewhat predictable prime number.  There
 * must be an element of randomness that an attacker is unable to guess.
 *
Matthias Clasen's avatar
Matthias Clasen committed
167 168
 * Returns: the hash value corresponding to the key
 */
169 170 171

/**
 * GHFunc:
Matthias Clasen's avatar
Matthias Clasen committed
172 173 174
 * @key: a key
 * @value: the value corresponding to the key
 * @user_data: user data passed to g_hash_table_foreach()
175 176 177 178
 *
 * Specifies the type of the function passed to g_hash_table_foreach().
 * It is called with each key/value pair, together with the @user_data
 * parameter which is passed to g_hash_table_foreach().
Matthias Clasen's avatar
Matthias Clasen committed
179
 */
180 181 182

/**
 * GHRFunc:
Matthias Clasen's avatar
Matthias Clasen committed
183 184 185
 * @key: a key
 * @value: the value associated with the key
 * @user_data: user data passed to g_hash_table_remove()
186 187 188 189 190 191
 *
 * Specifies the type of the function passed to
 * g_hash_table_foreach_remove(). It is called with each key/value
 * pair, together with the @user_data parameter passed to
 * g_hash_table_foreach_remove(). It should return %TRUE if the
 * key/value pair should be removed from the #GHashTable.
Matthias Clasen's avatar
Matthias Clasen committed
192 193 194 195
 *
 * Returns: %TRUE if the key/value pair should be removed from the
 *     #GHashTable
 */
196 197 198

/**
 * GEqualFunc:
Matthias Clasen's avatar
Matthias Clasen committed
199 200
 * @a: a value
 * @b: a value to compare with
201 202 203 204
 *
 * Specifies the type of a function used to test two values for
 * equality. The function should return %TRUE if both values are equal
 * and %FALSE otherwise.
Matthias Clasen's avatar
Matthias Clasen committed
205 206 207
 *
 * Returns: %TRUE if @a = @b; %FALSE otherwise
 */
208 209 210 211 212 213 214 215

/**
 * GHashTableIter:
 *
 * A GHashTableIter structure represents an iterator that can be used
 * to iterate over the elements of a #GHashTable. GHashTableIter
 * structures are typically allocated on the stack and then initialized
 * with g_hash_table_iter_init().
216 217 218
 *
 * The iteration order of a #GHashTableIter over the keys/values in a hash
 * table is not defined.
Matthias Clasen's avatar
Matthias Clasen committed
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
 */

/**
 * g_hash_table_freeze:
 * @hash_table: a #GHashTable
 *
 * This function is deprecated and will be removed in the next major
 * release of GLib. It does nothing.
 */

/**
 * g_hash_table_thaw:
 * @hash_table: a #GHashTable
 *
 * This function is deprecated and will be removed in the next major
 * release of GLib. It does nothing.
 */
236

237
#define HASH_TABLE_MIN_SHIFT 3  /* 1 << 3 == 8 buckets */
Owen Taylor's avatar
Owen Taylor committed
238

239 240 241 242
#define UNUSED_HASH_VALUE 0
#define TOMBSTONE_HASH_VALUE 1
#define HASH_IS_UNUSED(h_) ((h_) == UNUSED_HASH_VALUE)
#define HASH_IS_TOMBSTONE(h_) ((h_) == TOMBSTONE_HASH_VALUE)
243 244
#define HASH_IS_REAL(h_) ((h_) >= 2)

245 246 247 248 249 250 251 252 253 254 255 256
/* If int is smaller than void * on our arch, we start out with
 * int-sized keys and values and resize to pointer-sized entries as
 * needed. This saves a good amount of memory when the HT is being
 * used with e.g. GUINT_TO_POINTER(). */

#define BIG_ENTRY_SIZE (SIZEOF_VOID_P)
#define SMALL_ENTRY_SIZE (SIZEOF_INT)

#if SMALL_ENTRY_SIZE < BIG_ENTRY_SIZE
# define USE_SMALL_ARRAYS
#endif

257
struct _GHashTable
Owen Taylor's avatar
Owen Taylor committed
258
{
259
  gsize            size;
260 261
  gint             mod;
  guint            mask;
262
  gint             nnodes;
263
  gint             noccupied;  /* nnodes + tombstones */
264

265 266 267 268
  guint            have_big_keys : 1;
  guint            have_big_values : 1;

  gpointer         keys;
269
  guint           *hashes;
270
  gpointer         values;
271

272 273
  GHashFunc        hash_func;
  GEqualFunc       key_equal_func;
274
  gatomicrefcount  ref_count;
275 276 277 278 279 280 281 282
#ifndef G_DISABLE_ASSERT
  /*
   * Tracks the structure of the hash table, not its contents: is only
   * incremented when a node is added or removed (is not incremented
   * when the key or data of a node is modified).
   */
  int              version;
#endif
283 284
  GDestroyNotify   key_destroy_func;
  GDestroyNotify   value_destroy_func;
Owen Taylor's avatar
Owen Taylor committed
285 286
};

287 288
typedef struct
{
289 290 291
  GHashTable  *hash_table;
  gpointer     dummy1;
  gpointer     dummy2;
292
  gint         position;
293
  gboolean     dummy3;
294
  gint         version;
295 296
} RealIter;

297
G_STATIC_ASSERT (sizeof (GHashTableIter) == sizeof (RealIter));
298
G_STATIC_ASSERT (G_ALIGNOF (GHashTableIter) >= G_ALIGNOF (RealIter));
299

300 301 302
/* Each table size has an associated prime modulo (the first prime
 * lower than the table size) used to find the initial bucket. Probing
 * then works modulo 2^n. The prime modulo is necessary to get a
Matthias Clasen's avatar
Matthias Clasen committed
303 304
 * good distribution with poor hash functions.
 */
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
static const gint prime_mod [] =
{
  1,          /* For 1 << 0 */
  2,
  3,
  7,
  13,
  31,
  61,
  127,
  251,
  509,
  1021,
  2039,
  4093,
  8191,
  16381,
  32749,
  65521,      /* For 1 << 16 */
  131071,
  262139,
  524287,
  1048573,
  2097143,
  4194301,
  8388593,
  16777213,
  33554393,
  67108859,
  134217689,
  268435399,
  536870909,
  1073741789,
  2147483647  /* For 1 << 31 */
};

static void
g_hash_table_set_shift (GHashTable *hash_table, gint shift)
{
  hash_table->size = 1 << shift;
  hash_table->mod  = prime_mod [shift];

347 348 349
  /* hash_table->size is always a power of two, so we can calculate the mask
   * by simply subtracting 1 from it. The leading assertion ensures that
   * we're really dealing with a power of two. */
350

351 352
  g_assert ((hash_table->size & (hash_table->size - 1)) == 0);
  hash_table->mask = hash_table->size - 1;
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
}

static gint
g_hash_table_find_closest_shift (gint n)
{
  gint i;

  for (i = 0; n; i++)
    n >>= 1;

  return i;
}

static void
g_hash_table_set_shift_from_size (GHashTable *hash_table, gint size)
{
  gint shift;

  shift = g_hash_table_find_closest_shift (size);
  shift = MAX (shift, HASH_TABLE_MIN_SHIFT);

  g_hash_table_set_shift (hash_table, shift);
}

377
static inline gpointer
378
g_hash_table_realloc_key_or_value_array (gpointer a, guint size, G_GNUC_UNUSED gboolean is_big)
379 380 381 382 383 384 385 386
{
#ifdef USE_SMALL_ARRAYS
  return g_realloc (a, size * (is_big ? BIG_ENTRY_SIZE : SMALL_ENTRY_SIZE));
#else
  return g_renew (gpointer, a, size);
#endif
}

387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
static inline gpointer
g_hash_table_fetch_key_or_value (gpointer a, guint index, gboolean is_big)
{
#ifndef USE_SMALL_ARRAYS
  is_big = TRUE;
#endif
  return is_big ? *(((gpointer *) a) + index) : GUINT_TO_POINTER (*(((guint *) a) + index));
}

static inline void
g_hash_table_assign_key_or_value (gpointer a, guint index, gboolean is_big, gpointer v)
{
#ifndef USE_SMALL_ARRAYS
  is_big = TRUE;
#endif
  if (is_big)
    *(((gpointer *) a) + index) = v;
  else
    *(((guint *) a) + index) = GPOINTER_TO_UINT (v);
}

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
static inline gpointer
g_hash_table_evict_key_or_value (gpointer a, guint index, gboolean is_big, gpointer v)
{
#ifndef USE_SMALL_ARRAYS
  is_big = TRUE;
#endif
  if (is_big)
    {
      gpointer r = *(((gpointer *) a) + index);
      *(((gpointer *) a) + index) = v;
      return r;
    }
  else
    {
      gpointer r = GUINT_TO_POINTER (*(((guint *) a) + index));
      *(((guint *) a) + index) = GPOINTER_TO_UINT (v);
      return r;
    }
}

428 429 430 431 432 433 434 435 436 437
static inline guint
g_hash_table_hash_to_index (GHashTable *hash_table, guint hash)
{
  /* Multiply the hash by a small prime before applying the modulo. This
   * prevents the table from becoming densely packed, even with a poor hash
   * function. A densely packed table would have poor performance on
   * workloads with many failed lookups or a high degree of churn. */
  return (hash * 11) % hash_table->mod;
}

438
/*
439 440
 * g_hash_table_lookup_node:
 * @hash_table: our #GHashTable
441
 * @key: the key to look up against
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
 * @hash_return: key hash return location
 *
 * Performs a lookup in the hash table, preserving extra information
 * usually needed for insertion.
 *
 * This function first computes the hash value of the key using the
 * user's hash function.
 *
 * If an entry in the table matching @key is found then this function
 * returns the index of that entry in the table, and if not, the
 * index of an unused node (empty or tombstone) where the key can be
 * inserted.
 *
 * The computed hash value is returned in the variable pointed to
 * by @hash_return. This is to save insertions from having to compute
 * the hash record again for the new record.
Matthias Clasen's avatar
Matthias Clasen committed
458 459
 *
 * Returns: index of the described node
460 461
 */
static inline guint
462
g_hash_table_lookup_node (GHashTable    *hash_table,
463 464
                          gconstpointer  key,
                          guint         *hash_return)
465 466
{
  guint node_index;
Matthias Clasen's avatar
Matthias Clasen committed
467
  guint node_hash;
468
  guint hash_value;
Dan Winship's avatar
Dan Winship committed
469
  guint first_tombstone = 0;
470 471 472
  gboolean have_tombstone = FALSE;
  guint step = 0;

Matthias Clasen's avatar
Matthias Clasen committed
473
  hash_value = hash_table->hash_func (key);
474
  if (G_UNLIKELY (!HASH_IS_REAL (hash_value)))
475 476 477 478
    hash_value = 2;

  *hash_return = hash_value;

479
  node_index = g_hash_table_hash_to_index (hash_table, hash_value);
Matthias Clasen's avatar
Matthias Clasen committed
480
  node_hash = hash_table->hashes[node_index];
481

Matthias Clasen's avatar
Matthias Clasen committed
482
  while (!HASH_IS_UNUSED (node_hash))
483
    {
Matthias Clasen's avatar
Matthias Clasen committed
484 485 486
      /* We first check if our full hash values
       * are equal so we can avoid calling the full-blown
       * key equality function in most cases.
487
       */
488
      if (node_hash == hash_value)
489
        {
490
          gpointer node_key = g_hash_table_fetch_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys);
491

492 493
          if (hash_table->key_equal_func)
            {
494
              if (hash_table->key_equal_func (node_key, key))
495 496
                return node_index;
            }
497
          else if (node_key == key)
498 499 500 501
            {
              return node_index;
            }
        }
Matthias Clasen's avatar
Matthias Clasen committed
502
      else if (HASH_IS_TOMBSTONE (node_hash) && !have_tombstone)
503 504 505
        {
          first_tombstone = node_index;
          have_tombstone = TRUE;
506
        }
507 508 509 510

      step++;
      node_index += step;
      node_index &= hash_table->mask;
Matthias Clasen's avatar
Matthias Clasen committed
511
      node_hash = hash_table->hashes[node_index];
512 513
    }

514 515 516 517
  if (have_tombstone)
    return first_tombstone;

  return node_index;
518 519
}

520
/*
521 522
 * g_hash_table_remove_node:
 * @hash_table: our #GHashTable
523
 * @node: pointer to node to remove
524 525
 * @notify: %TRUE if the destroy notify handlers are to be called
 *
526 527
 * Removes a node from the hash table and updates the node count.
 * The node is replaced by a tombstone. No table resize is performed.
528 529 530
 *
 * If @notify is %TRUE then the destroy notify functions are called
 * for the key and value of the hash node.
531
 */
532 533
static void
g_hash_table_remove_node (GHashTable   *hash_table,
Matthias Clasen's avatar
Matthias Clasen committed
534
                          gint          i,
535 536
                          gboolean      notify)
{
537 538
  gpointer key;
  gpointer value;
539

540 541
  key = g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys);
  value = g_hash_table_fetch_key_or_value (hash_table->values, i, hash_table->have_big_values);
542

543
  /* Erect tombstone */
544
  hash_table->hashes[i] = TOMBSTONE_HASH_VALUE;
545 546

  /* Be GC friendly */
547 548
  g_hash_table_assign_key_or_value (hash_table->keys, i, hash_table->have_big_keys, NULL);
  g_hash_table_assign_key_or_value (hash_table->values, i, hash_table->have_big_values, NULL);
549 550

  hash_table->nnodes--;
551 552 553 554 555 556 557

  if (notify && hash_table->key_destroy_func)
    hash_table->key_destroy_func (key);

  if (notify && hash_table->value_destroy_func)
    hash_table->value_destroy_func (value);

558 559
}

560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
/*
 * g_hash_table_setup_storage:
 * @hash_table: our #GHashTable
 *
 * Initialise the hash table size, mask, mod, and arrays.
 */
static void
g_hash_table_setup_storage (GHashTable *hash_table)
{
  gboolean small;

  /* We want to use small arrays only if:
   *   - we are running on a system where that makes sense (64 bit); and
   *   - we are not running under valgrind.
   */
  small = FALSE;

#ifdef USE_SMALL_ARRAYS
  small = TRUE;

# ifdef ENABLE_VALGRIND
  if (RUNNING_ON_VALGRIND)
    small = FALSE;
# endif
#endif

  g_hash_table_set_shift (hash_table, HASH_TABLE_MIN_SHIFT);

  hash_table->have_big_keys = !small;
  hash_table->have_big_values = !small;

  hash_table->keys   = g_hash_table_realloc_key_or_value_array (NULL, hash_table->size, hash_table->have_big_keys);
  hash_table->values = hash_table->keys;
  hash_table->hashes = g_new0 (guint, hash_table->size);
}

596
/*
597 598 599 600
 * g_hash_table_remove_all_nodes:
 * @hash_table: our #GHashTable
 * @notify: %TRUE if the destroy notify handlers are to be called
 *
601
 * Removes all nodes from the table.
602 603 604
 *
 * If @notify is %TRUE then the destroy notify functions are called
 * for the key and value of the hash node.
605 606 607 608 609 610 611 612
 *
 * Since this may be a precursor to freeing the table entirely, we'd
 * ideally perform no resize, and we can indeed avoid that in some
 * cases.  However: in the case that we'll be making callbacks to user
 * code (via destroy notifies) we need to consider that the user code
 * might call back into the table again.  In this case, we setup a new
 * set of arrays so that any callers will see an empty (but valid)
 * table.
613
 */
614 615
static void
g_hash_table_remove_all_nodes (GHashTable *hash_table,
616 617
                               gboolean    notify,
                               gboolean    destruction)
618 619
{
  int i;
620 621
  gpointer key;
  gpointer value;
622 623 624 625
  gint old_size;
  gpointer *old_keys;
  gpointer *old_values;
  guint    *old_hashes;
626 627
  gboolean  old_have_big_keys;
  gboolean  old_have_big_values;
628 629 630 631

  /* If the hash table is already empty, there is nothing to be done. */
  if (hash_table->nnodes == 0)
    return;
632 633 634

  hash_table->nnodes = 0;
  hash_table->noccupied = 0;
635

636
  /* Easy case: no callbacks, so we just zero out the arrays */
637 638 639
  if (!notify ||
      (hash_table->key_destroy_func == NULL &&
       hash_table->value_destroy_func == NULL))
640
    {
641 642 643
      if (!destruction)
        {
          memset (hash_table->hashes, 0, hash_table->size * sizeof (guint));
644 645 646 647 648

#ifdef USE_SMALL_ARRAYS
          memset (hash_table->keys, 0, hash_table->size * (hash_table->have_big_keys ? BIG_ENTRY_SIZE : SMALL_ENTRY_SIZE));
          memset (hash_table->values, 0, hash_table->size * (hash_table->have_big_values ? BIG_ENTRY_SIZE : SMALL_ENTRY_SIZE));
#else
649 650
          memset (hash_table->keys, 0, hash_table->size * sizeof (gpointer));
          memset (hash_table->values, 0, hash_table->size * sizeof (gpointer));
651
#endif
652
        }
653

654
      return;
655 656
    }

657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
  /* Hard case: we need to do user callbacks.  There are two
   * possibilities here:
   *
   *   1) there are no outstanding references on the table and therefore
   *   nobody should be calling into it again (destroying == true)
   *
   *   2) there are outstanding references, and there may be future
   *   calls into the table, either after we return, or from the destroy
   *   notifies that we're about to do (destroying == false)
   *
   * We handle both cases by taking the current state of the table into
   * local variables and replacing it with something else: in the "no
   * outstanding references" cases we replace it with a bunch of
   * null/zero values so that any access to the table will fail.  In the
   * "may receive future calls" case, we reinitialise the struct to
   * appear like a newly-created empty table.
   *
   * In both cases, we take over the references for the current state,
   * freeing them below.
   */
677
  old_size = hash_table->size;
678 679
  old_have_big_keys = hash_table->have_big_keys;
  old_have_big_values = hash_table->have_big_values;
680 681 682
  old_keys   = g_steal_pointer (&hash_table->keys);
  old_values = g_steal_pointer (&hash_table->values);
  old_hashes = g_steal_pointer (&hash_table->hashes);
683 684

  if (!destruction)
685
    /* Any accesses will see an empty table */
686
    g_hash_table_setup_storage (hash_table);
687
  else
688
    /* Will cause a quick crash on any attempted access */
689
    hash_table->size = hash_table->mod = hash_table->mask = 0;
690

691
  /* Now do the actual destroy notifies */
692 693 694
  for (i = 0; i < old_size; i++)
    {
      if (HASH_IS_REAL (old_hashes[i]))
695
        {
696 697
          key = g_hash_table_fetch_key_or_value (old_keys, i, old_have_big_keys);
          value = g_hash_table_fetch_key_or_value (old_values, i, old_have_big_values);
698

699
          old_hashes[i] = UNUSED_HASH_VALUE;
700

701 702
          g_hash_table_assign_key_or_value (old_keys, i, old_have_big_keys, NULL);
          g_hash_table_assign_key_or_value (old_values, i, old_have_big_values, NULL);
703 704 705 706 707 708 709 710

          if (hash_table->key_destroy_func != NULL)
            hash_table->key_destroy_func (key);

          if (hash_table->value_destroy_func != NULL)
            hash_table->value_destroy_func (value);
        }
    }
711 712 713 714 715 716 717

  /* Destroy old storage space. */
  if (old_keys != old_values)
    g_free (old_values);

  g_free (old_keys);
  g_free (old_hashes);
718 719
}

720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748
static void
realloc_arrays (GHashTable *hash_table, gboolean is_a_set)
{
  hash_table->hashes = g_renew (guint, hash_table->hashes, hash_table->size);
  hash_table->keys = g_hash_table_realloc_key_or_value_array (hash_table->keys, hash_table->size, hash_table->have_big_keys);

  if (is_a_set)
    hash_table->values = hash_table->keys;
  else
    hash_table->values = g_hash_table_realloc_key_or_value_array (hash_table->values, hash_table->size, hash_table->have_big_values);
}

/* When resizing the table in place, we use a temporary bit array to keep
 * track of which entries have been assigned a proper location in the new
 * table layout.
 *
 * Each bit corresponds to a bucket. A bit is set if an entry was assigned
 * its corresponding location during the resize and thus should not be
 * evicted. The array starts out cleared to zero. */

static inline gboolean
get_status_bit (const guint32 *bitmap, guint index)
{
  return (bitmap[index / 32] >> (index % 32)) & 1;
}

static inline void
set_status_bit (guint32 *bitmap, guint index)
{
749
  bitmap[index / 32] |= 1U << (index % 32);
750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
}

/* By calling dedicated resize functions for sets and maps, we avoid 2x
 * test-and-branch per key in the inner loop. This yields a small
 * performance improvement at the cost of a bit of macro gunk. */

#define DEFINE_RESIZE_FUNC(fname) \
static void fname (GHashTable *hash_table, guint old_size, guint32 *reallocated_buckets_bitmap) \
{                                                                       \
  guint i;                                                              \
                                                                        \
  for (i = 0; i < old_size; i++)                                        \
    {                                                                   \
      guint node_hash = hash_table->hashes[i];                          \
      gpointer key, value G_GNUC_UNUSED;                                \
                                                                        \
      if (!HASH_IS_REAL (node_hash))                                    \
        {                                                               \
          /* Clear tombstones */                                        \
          hash_table->hashes[i] = UNUSED_HASH_VALUE;                    \
          continue;                                                     \
        }                                                               \
                                                                        \
      /* Skip entries relocated through eviction */                     \
      if (get_status_bit (reallocated_buckets_bitmap, i))               \
        continue;                                                       \
                                                                        \
      hash_table->hashes[i] = UNUSED_HASH_VALUE;                        \
      EVICT_KEYVAL (hash_table, i, NULL, NULL, key, value);             \
                                                                        \
      for (;;)                                                          \
        {                                                               \
          guint hash_val;                                               \
          guint replaced_hash;                                          \
          guint step = 0;                                               \
                                                                        \
          hash_val = g_hash_table_hash_to_index (hash_table, node_hash); \
                                                                        \
          while (get_status_bit (reallocated_buckets_bitmap, hash_val)) \
            {                                                           \
              step++;                                                   \
              hash_val += step;                                         \
              hash_val &= hash_table->mask;                             \
            }                                                           \
                                                                        \
          set_status_bit (reallocated_buckets_bitmap, hash_val);        \
                                                                        \
          replaced_hash = hash_table->hashes[hash_val];                 \
          hash_table->hashes[hash_val] = node_hash;                     \
          if (!HASH_IS_REAL (replaced_hash))                            \
            {                                                           \
              ASSIGN_KEYVAL (hash_table, hash_val, key, value);         \
              break;                                                    \
            }                                                           \
                                                                        \
          node_hash = replaced_hash;                                    \
          EVICT_KEYVAL (hash_table, hash_val, key, value, key, value);  \
        }                                                               \
    }                                                                   \
}

#define ASSIGN_KEYVAL(ht, index, key, value) G_STMT_START{ \
    g_hash_table_assign_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
    g_hash_table_assign_key_or_value ((ht)->values, (index), (ht)->have_big_values, (value)); \
  }G_STMT_END

#define EVICT_KEYVAL(ht, index, key, value, outkey, outvalue) G_STMT_START{ \
    (outkey) = g_hash_table_evict_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
    (outvalue) = g_hash_table_evict_key_or_value ((ht)->values, (index), (ht)->have_big_values, (value)); \
  }G_STMT_END

DEFINE_RESIZE_FUNC (resize_map)

#undef ASSIGN_KEYVAL
#undef EVICT_KEYVAL

#define ASSIGN_KEYVAL(ht, index, key, value) G_STMT_START{ \
    g_hash_table_assign_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
  }G_STMT_END

#define EVICT_KEYVAL(ht, index, key, value, outkey, outvalue) G_STMT_START{ \
    (outkey) = g_hash_table_evict_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
  }G_STMT_END

DEFINE_RESIZE_FUNC (resize_set)

#undef ASSIGN_KEYVAL
#undef EVICT_KEYVAL

839
/*
840 841 842 843
 * g_hash_table_resize:
 * @hash_table: our #GHashTable
 *
 * Resizes the hash table to the optimal size based on the number of
Matthias Clasen's avatar
Matthias Clasen committed
844 845 846
 * nodes currently held. If you call this function then a resize will
 * occur, even if one does not need to occur.
 * Use g_hash_table_maybe_resize() instead.
847 848 849 850
 *
 * This function may "resize" the hash table to its current size, with
 * the side effect of cleaning up tombstones and otherwise optimizing
 * the probe sequences.
851
 */
852 853 854
static void
g_hash_table_resize (GHashTable *hash_table)
{
855
  guint32 *reallocated_buckets_bitmap;
856
  gsize old_size;
857
  gboolean is_a_set;
858

859
  old_size = hash_table->size;
860
  is_a_set = hash_table->keys == hash_table->values;
861

862 863 864 865 866 867 868 869 870 871 872 873
  /* The outer checks in g_hash_table_maybe_resize() will only consider
   * cleanup/resize when the load factor goes below .25 (1/4, ignoring
   * tombstones) or above .9375 (15/16, including tombstones).
   *
   * Once this happens, tombstones will always be cleaned out. If our
   * load sans tombstones is greater than .75 (1/1.333, see below), we'll
   * take this opportunity to grow the table too.
   *
   * Immediately after growing, the load factor will be in the range
   * .375 .. .469. After shrinking, it will be exactly .5. */

  g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 1.333);
874

875
  if (hash_table->size > old_size)
876
    {
877 878
      realloc_arrays (hash_table, is_a_set);
      memset (&hash_table->hashes[old_size], 0, (hash_table->size - old_size) * sizeof (guint));
879

880 881 882 883 884
      reallocated_buckets_bitmap = g_new0 (guint32, (hash_table->size + 31) / 32);
    }
  else
    {
      reallocated_buckets_bitmap = g_new0 (guint32, (old_size + 31) / 32);
885
    }
886

887 888 889 890
  if (is_a_set)
    resize_set (hash_table, old_size, reallocated_buckets_bitmap);
  else
    resize_map (hash_table, old_size, reallocated_buckets_bitmap);
891

892
  g_free (reallocated_buckets_bitmap);
893

894 895
  if (hash_table->size < old_size)
    realloc_arrays (hash_table, is_a_set);
896

897
  hash_table->noccupied = hash_table->nnodes;
898
}
Owen Taylor's avatar
Owen Taylor committed
899

900
/*
901 902 903 904 905 906 907
 * g_hash_table_maybe_resize:
 * @hash_table: our #GHashTable
 *
 * Resizes the hash table, if needed.
 *
 * Essentially, calls g_hash_table_resize() if the table has strayed
 * too far from its ideal size for its number of nodes.
908
 */
909 910 911
static inline void
g_hash_table_maybe_resize (GHashTable *hash_table)
{
912
  gint noccupied = hash_table->noccupied;
913 914
  gint size = hash_table->size;

915 916
  if ((size > hash_table->nnodes * 4 && size > 1 << HASH_TABLE_MIN_SHIFT) ||
      (size <= noccupied + (noccupied / 16)))
917 918
    g_hash_table_resize (hash_table);
}
Owen Taylor's avatar
Owen Taylor committed
919

920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966
#ifdef USE_SMALL_ARRAYS

static inline gboolean
entry_is_big (gpointer v)
{
  return (((guintptr) v) >> ((BIG_ENTRY_SIZE - SMALL_ENTRY_SIZE) * 8)) != 0;
}

static inline gboolean
g_hash_table_maybe_make_big_keys_or_values (gpointer *a_p, gpointer v, gint ht_size)
{
  if (entry_is_big (v))
    {
      guint *a = (guint *) *a_p;
      gpointer *a_new;
      gint i;

      a_new = g_new (gpointer, ht_size);

      for (i = 0; i < ht_size; i++)
        {
          a_new[i] = GUINT_TO_POINTER (a[i]);
        }

      g_free (a);
      *a_p = a_new;
      return TRUE;
    }

  return FALSE;
}

#endif

static inline void
g_hash_table_ensure_keyval_fits (GHashTable *hash_table, gpointer key, gpointer value)
{
  gboolean is_a_set = (hash_table->keys == hash_table->values);

#ifdef USE_SMALL_ARRAYS

  /* Convert from set to map? */
  if (is_a_set)
    {
      if (hash_table->have_big_keys)
        {
          if (key != value)
967
            hash_table->values = g_memdup2 (hash_table->keys, sizeof (gpointer) * hash_table->size);
968 969 970 971 972 973 974
          /* Keys and values are both big now, so no need for further checks */
          return;
        }
      else
        {
          if (key != value)
            {
975
              hash_table->values = g_memdup2 (hash_table->keys, sizeof (guint) * hash_table->size);
976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
              is_a_set = FALSE;
            }
        }
    }

  /* Make keys big? */
  if (!hash_table->have_big_keys)
    {
      hash_table->have_big_keys = g_hash_table_maybe_make_big_keys_or_values (&hash_table->keys, key, hash_table->size);

      if (is_a_set)
        {
          hash_table->values = hash_table->keys;
          hash_table->have_big_values = hash_table->have_big_keys;
        }
    }

  /* Make values big? */
  if (!is_a_set && !hash_table->have_big_values)
    {
      hash_table->have_big_values = g_hash_table_maybe_make_big_keys_or_values (&hash_table->values, value, hash_table->size);
    }

#else

  /* Just split if necessary */
  if (is_a_set && key != value)
1003
    hash_table->values = g_memdup2 (hash_table->keys, sizeof (gpointer) * hash_table->size);
1004 1005 1006 1007

#endif
}

1008 1009
/**
 * g_hash_table_new:
Matthias Clasen's avatar
Matthias Clasen committed
1010 1011
 * @hash_func: a function to create a hash value from a key
 * @key_equal_func: a function to check two keys for equality
1012
 *
1013
 * Creates a new #GHashTable with a reference count of 1.
1014
 *
Matthias Clasen's avatar
Matthias Clasen committed
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
 * Hash values returned by @hash_func are used to determine where keys
 * are stored within the #GHashTable data structure. The g_direct_hash(),
 * g_int_hash(), g_int64_hash(), g_double_hash() and g_str_hash()
 * functions are provided for some common types of keys.
 * If @hash_func is %NULL, g_direct_hash() is used.
 *
 * @key_equal_func is used when looking up keys in the #GHashTable.
 * The g_direct_equal(), g_int_equal(), g_int64_equal(), g_double_equal()
 * and g_str_equal() functions are provided for the most common types
 * of keys. If @key_equal_func is %NULL, keys are compared directly in
 * a similar fashion to g_direct_equal(), but without the overhead of
1026 1027 1028
 * a function call. @key_equal_func is called with the key from the hash table
 * as its first parameter, and the user-provided key to check against as
 * its second.
Matthias Clasen's avatar
Matthias Clasen committed
1029
 *
1030
 * Returns: a new #GHashTable
Matthias Clasen's avatar
Matthias Clasen committed
1031 1032 1033 1034
 */
GHashTable *
g_hash_table_new (GHashFunc  hash_func,
                  GEqualFunc key_equal_func)
1035 1036 1037 1038 1039 1040 1041
{
  return g_hash_table_new_full (hash_func, key_equal_func, NULL, NULL);
}


/**
 * g_hash_table_new_full:
Matthias Clasen's avatar
Matthias Clasen committed
1042 1043
 * @hash_func: a function to create a hash value from a key
 * @key_equal_func: a function to check two keys for equality
1044
 * @key_destroy_func: (nullable): a function to free the memory allocated for the key
Matthias Clasen's avatar
Matthias Clasen committed
1045 1046
 *     used when removing the entry from the #GHashTable, or %NULL
 *     if you don't want to supply such a function.
1047
 * @value_destroy_func: (nullable): a function to free the memory allocated for the
Matthias Clasen's avatar
Matthias Clasen committed
1048 1049 1050 1051 1052 1053 1054 1055
 *     value used when removing the entry from the #GHashTable, or %NULL
 *     if you don't want to supply such a function.
 *
 * Creates a new #GHashTable like g_hash_table_new() with a reference
 * count of 1 and allows to specify functions to free the memory
 * allocated for the key and value that get called when removing the
 * entry from the #GHashTable.
 *
1056 1057 1058 1059
 * Since version 2.42 it is permissible for destroy notify functions to
 * recursively remove further items from the hash table. This is only
 * permissible if the application still holds a reference to the hash table.
 * This means that you may need to ensure that the hash table is empty by
1060
 * calling g_hash_table_remove_all() before releasing the last reference using
1061
 * g_hash_table_unref().
1062
 *
1063
 * Returns: a new #GHashTable
Matthias Clasen's avatar
Matthias Clasen committed
1064 1065 1066 1067 1068 1069
 */
GHashTable *
g_hash_table_new_full (GHashFunc      hash_func,
                       GEqualFunc     key_equal_func,
                       GDestroyNotify key_destroy_func,
                       GDestroyNotify value_destroy_func)
Owen Taylor's avatar
Owen Taylor committed
1070
{
1071
  GHashTable *hash_table;
1072

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
  hash_table = g_slice_new (GHashTable);
  g_atomic_ref_count_init (&hash_table->ref_count);
  hash_table->nnodes             = 0;
  hash_table->noccupied          = 0;
  hash_table->hash_func          = hash_func ? hash_func : g_direct_hash;
  hash_table->key_equal_func     = key_equal_func;
#ifndef G_DISABLE_ASSERT
  hash_table->version            = 0;
#endif
  hash_table->key_destroy_func   = key_destroy_func;
  hash_table->value_destroy_func = value_destroy_func;
1084 1085

  g_hash_table_setup_storage (hash_table);
1086

1087
  return hash_table;
Owen Taylor's avatar
Owen Taylor committed
1088 1089
}

1090 1091
/**
 * g_hash_table_iter_init:
Matthias Clasen's avatar
Matthias Clasen committed
1092 1093
 * @iter: an uninitialized #GHashTableIter
 * @hash_table: a #GHashTable
1094 1095 1096 1097
 *
 * Initializes a key/value pair iterator and associates it with
 * @hash_table. Modifying the hash table after calling this function
 * invalidates the returned iterator.
1098 1099 1100 1101
 *
 * The iteration order of a #GHashTableIter over the keys/values in a hash
 * table is not defined.
 *
1102
 * |[<!-- language="C" -->
1103 1104 1105
 * GHashTableIter iter;
 * gpointer key, value;
 *
Matthias Clasen's avatar
Matthias Clasen committed
1106
 * g_hash_table_iter_init (&iter, hash_table);
Matthias Clasen's avatar
Matthias Clasen committed
1107
 * while (g_hash_table_iter_next (&iter, &key, &value))
Matthias Clasen's avatar
Matthias Clasen committed
1108
 *   {
Matthias Clasen's avatar
Matthias Clasen committed
1109
 *     // do something with key and value
Matthias Clasen's avatar
Matthias Clasen committed
1110 1111
 *   }
 * ]|
1112 1113
 *
 * Since: 2.16
Matthias Clasen's avatar
Matthias Clasen committed
1114