diff --git a/src/clue-matches.c b/src/clue-matches.c index a974eb43a1c5dd29e6004ac3c5909c69193e3be2..54507e1e71ce74d043d86b94b67718268519aa9a 100644 --- a/src/clue-matches.c +++ b/src/clue-matches.c @@ -20,6 +20,8 @@ #include "clue-matches.h" #include "word-list-misc.h" +#include "word-array.h" +#include "word-set.h" static gboolean filter_is_unconstrained (const gchar *filter) diff --git a/src/edit-word-list.c b/src/edit-word-list.c index 9449fd222561fbad4e9e422d50d5d6a69c453a6d..0e2f91964826c6220185b8d59869d165f040b0e9 100644 --- a/src/edit-word-list.c +++ b/src/edit-word-list.c @@ -31,6 +31,7 @@ #include "word-list.h" #include "word-list-model.h" #include "clue-matches.h" +#include "word-array.h" enum diff --git a/src/meson.build b/src/meson.build index bd8bfe0458ced95d9618a1dc8fc81d5b705cbca7..9676d2ebe162e9ddec6df810bd36e50954845807 100644 --- a/src/meson.build +++ b/src/meson.build @@ -237,6 +237,8 @@ edit_headers = files ( 'word-list.h', 'word-list-def.h', 'word-list-misc.h', + 'word-array.h', + 'word-set.h', 'word-list-model.h', 'word-list-index.h', 'word-list-resource.h', @@ -259,6 +261,8 @@ word_list_sources = files( 'word-list.c', 'word-list-def.c', 'word-list-misc.c', + 'word-array.c', + 'word-set.c', 'word-list-model.c', 'word-list-index.c', 'word-list-resource.c', diff --git a/src/word-array.c b/src/word-array.c new file mode 100644 index 0000000000000000000000000000000000000000..dd6e56621272fbd15ae44ad91e65bb9ed558012f --- /dev/null +++ b/src/word-array.c @@ -0,0 +1,197 @@ +/* word-array.c + * + * Copyright 2021 Jonathan Blandford + * Copyright 2025 Victor Ma + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "word-array.h" +#include "word-list.h" + +static gint +word_array_cmp (gconstpointer a, + gconstpointer b) +{ + WordIndex *word_a = (WordIndex *) a; + WordIndex *word_b = (WordIndex *) b; + + if (word_a->length == word_b->length) + return word_a->index - word_b->index; + return word_a->length - word_b->length; +} + +WordArray * +word_array_new (void) +{ + return (WordArray *) g_array_new (FALSE, FALSE, sizeof (WordIndex)); +} + +WordArray * +word_array_copy (WordArray *src) +{ + WordArray *dest; + + g_return_val_if_fail (src != NULL, NULL); + + dest = word_array_new (); + + g_array_set_size (dest, word_array_len (src)); + memcpy (((GArray*)dest)->data, ((GArray*)src)->data, word_array_len (src) * sizeof (WordIndex)); + + return dest; +} + +/* Not a macro so we can pass it into other functions */ +void +word_array_unref (WordArray *word_array) +{ + g_array_unref ((GArray *) word_array); +} + +gboolean +word_array_add (WordArray *word_array, + WordIndex word_index) +{ + guint out; + + g_return_val_if_fail (word_array != NULL, FALSE); + + if (g_array_binary_search ((GArray *) word_array, + &word_index, + word_array_cmp, + &out)) + return FALSE; + + g_array_append_val ((GArray *) word_array, word_index); + g_array_sort ((GArray*) word_array, word_array_cmp); + + return TRUE; +} + +gboolean +word_array_remove (WordArray *word_array, + WordIndex word_index) +{ + guint out; + + g_return_val_if_fail (word_array != NULL, FALSE); + + if (g_array_binary_search ((GArray *) word_array, + &word_index, + word_array_cmp, + &out)) + { + g_array_remove_index ((GArray *) word_array, out); + return TRUE; + } + return FALSE; +} + +gboolean +word_array_find (WordArray *word_array, + WordIndex word_index, + guint *out) +{ + g_return_val_if_fail (word_array != NULL, FALSE); + + return g_array_binary_search ((GArray *) word_array, + &word_index, + word_array_cmp, + out); +} + +static gint +word_index_comparator (gconstpointer a, + gconstpointer b) +{ + gint index_a = ((WordIndex *) a)->index; + gint index_b = ((WordIndex *) b)->index; + + return index_a - index_b; +} + +void +word_array_sort (WordArray *word_array) +{ + if (word_array == NULL) + return; + g_array_sort ((GArray *) word_array, word_index_comparator); +} + +gboolean +word_array_equals (WordArray *word_array1, WordArray *word_array2) +{ + if (word_array1->len != word_array2->len) + return FALSE; + for (guint i = 0; i < word_array1->len; i++) + { + gint index1 = (g_array_index (word_array1, WordIndex, i)).index; + gint index2 = (g_array_index (word_array2, WordIndex, i)).index; + if (index1 != index2) + return FALSE; + } + return TRUE; +} + +#define MAX_WORDS 10 +#define HALF_MAX MAX_WORDS / 2 +void +word_array_print_helper (WordList *word_list, WordArray *word_array) +{ + g_assert (word_array != NULL); + + const guint len = word_array_len (word_array); + const guint first_loop_end = MIN (HALF_MAX - 1, len - 1); + const guint second_loop_start = MAX (first_loop_end + 1, len - HALF_MAX); + const guint second_loop_end = len - 1; + + if (len == 0) + return; + + for (guint i = 0; i <= first_loop_end; i++) + { + WordIndex word_index = word_array_index (word_array, i); + g_message ("\t%s\t%d", + word_list_get_indexed_word (word_list, word_index), + word_index.index); + } + + if (first_loop_end == len - 1) + return; + else if (second_loop_start != first_loop_end + 1) + g_message ("\t..."); + + for (guint i = second_loop_start; i <= second_loop_end; i++) + { + WordIndex word_index = word_array_index (word_array, i); + g_message ("\t%s\t%d", + word_list_get_indexed_word (word_list, word_index), + word_index.index); + } +} + +void word_array_print (WordArray *word_array, WordList *word_list) +{ + if (word_array == NULL) + { + g_warning ("word_list_print_word_array: WordArray is NULL."); + return; + } + g_message (" "); + g_message ("WordArray %p:", word_array); + word_array_print_helper (word_list, word_array); +} \ No newline at end of file diff --git a/src/word-array.h b/src/word-array.h new file mode 100644 index 0000000000000000000000000000000000000000..25c2f15112e75f36b208681e5b78030fc763e3cd --- /dev/null +++ b/src/word-array.h @@ -0,0 +1,75 @@ +/* word-array.h + * + * Copyright 2021 Jonathan Blandford + * Copyright 2025 Victor Ma + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include "word-list-misc.h" + + +G_BEGIN_DECLS + +typedef struct _WordList WordList; + +/* A WordArray is a simple list of words represented as + * WordIndex. They are unique: inserting a word multiple times results + * in the word only existing once. It is used to keep a list of words + * we don't want to search through, as well as used internally within + * the word-list. + * + * It's possible to use the GArray functions instead of the WordArray + * functions. But if you do that, you need to manually keep the + * uniqueness invariant true. The WordList does this directly + * at times. + * + * It's not recommended that you use this structure unless you really + * know what you're doing. It's not a user-friendly structure. But it + * is useful. + */ +typedef GArray WordArray; + +/* A WordSet is a GHashTable that represents a set of WordIndex structs. */ +typedef GHashTable WordSet; + +WordArray *word_array_new (void); +WordArray *word_array_copy (WordArray *src); +void word_array_unref (WordArray *word_array); +gboolean word_array_add (WordArray *word_array, + WordIndex word_index); +gboolean word_array_remove (WordArray *word_array, + WordIndex word_index); +gboolean word_array_find (WordArray *word_array, + WordIndex word_index, + guint *out); +void word_array_sort (WordArray *word_array); +gboolean word_array_equals (WordArray *word_array1, + WordArray *word_array2); +void word_array_print_helper (WordList *word_list, + WordArray *word_array); +void word_array_print (WordArray *word_array, + WordList *word_list); + +#define word_array_len(wa) (((GArray*)wa)->len) +#define word_array_index(wa,i) (g_array_index((GArray*)wa,WordIndex,i)) +#define word_array_ref(wa) (g_array_ref((GArray*)wa)) + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(WordArray, word_array_unref) + +G_END_DECLS \ No newline at end of file diff --git a/src/word-list-misc.c b/src/word-list-misc.c index 024417e45aa4e1dd414db88f7c0eb6d68d8fc453..a3a8d41e324f5c553b097075741ffec16661e39c 100644 --- a/src/word-list-misc.c +++ b/src/word-list-misc.c @@ -212,177 +212,6 @@ scored_parse_word (const gchar *unparsed_word, return g_string_free_and_steal (parsed_word); } -/* Word Array */ - -static gint -word_array_cmp (gconstpointer a, - gconstpointer b) -{ - WordIndex *word_a = (WordIndex *) a; - WordIndex *word_b = (WordIndex *) b; - - if (word_a->length == word_b->length) - return word_a->index - word_b->index; - return word_a->length - word_b->length; -} - -WordArray * -word_array_new (void) -{ - return (WordArray *) g_array_new (FALSE, FALSE, sizeof (WordIndex)); -} - -WordArray * -word_array_copy (WordArray *src) -{ - WordArray *dest; - - g_return_val_if_fail (src != NULL, NULL); - - dest = word_array_new (); - - g_array_set_size (dest, word_array_len (src)); - memcpy (((GArray*)dest)->data, ((GArray*)src)->data, word_array_len (src) * sizeof (WordIndex)); - - return dest; -} - -/* Not a macro so we can pass it into other functions */ -void -word_array_unref (WordArray *word_array) -{ - g_array_unref ((GArray *) word_array); -} - -gboolean -word_array_add (WordArray *word_array, - WordIndex word_index) -{ - guint out; - - g_return_val_if_fail (word_array != NULL, FALSE); - - if (g_array_binary_search ((GArray *) word_array, - &word_index, - word_array_cmp, - &out)) - return FALSE; - - g_array_append_val ((GArray *) word_array, word_index); - g_array_sort ((GArray*) word_array, word_array_cmp); - - return TRUE; -} - -gboolean -word_array_remove (WordArray *word_array, - WordIndex word_index) -{ - guint out; - - g_return_val_if_fail (word_array != NULL, FALSE); - - if (g_array_binary_search ((GArray *) word_array, - &word_index, - word_array_cmp, - &out)) - { - g_array_remove_index ((GArray *) word_array, out); - return TRUE; - } - return FALSE; -} - -gboolean -word_array_find (WordArray *word_array, - WordIndex word_index, - guint *out) -{ - g_return_val_if_fail (word_array != NULL, FALSE); - - return g_array_binary_search ((GArray *) word_array, - &word_index, - word_array_cmp, - out); -} - -static gint -word_index_comparator (gconstpointer a, - gconstpointer b) -{ - gint index_a = ((WordIndex *) a)->index; - gint index_b = ((WordIndex *) b)->index; - - return index_a - index_b; -} - -void -word_array_sort (WordArray *word_array) -{ - if (word_array == NULL) - return; - g_array_sort ((GArray *) word_array, word_index_comparator); -} - -gboolean -word_array_equals (WordArray *word_array1, WordArray *word_array2) -{ - if (word_array1->len != word_array2->len) - return FALSE; - for (guint i = 0; i < word_array1->len; i++) - { - gint index1 = (g_array_index (word_array1, WordIndex, i)).index; - gint index2 = (g_array_index (word_array2, WordIndex, i)).index; - if (index1 != index2) - return FALSE; - } - return TRUE; -} - -static void -word_index_print (WordIndex word_index) -{ - g_message ("\t(index: %2d, length: %2d)", - word_index.index, - word_index.length); -} - -#define MAX_WORDS 10 -static void -word_array_print_helper (WordArray *word_array) -{ - /* FIXME(debugging): Print first 5 and last 5 words instead of first 10. */ - for (guint i = 0; i < MIN (word_array->len, MAX_WORDS); i++) - { - WordIndex word_index = word_array_index (word_array, i); - word_index_print (word_index); - } -} - -void -word_array_print (WordArray *word_array) -{ - if (word_array == NULL) - { - g_warning ("word_array_print: WordArray is NULL."); - return; - } - g_message ("WordArray %p:", word_array); - word_array_print_helper (word_array); -} - -void -word_set_print (WordSet *word_set) -{ - if (word_set == NULL) - { - g_warning ("word_set_print: WordSet is NULL."); - return; - } - g_message ("WordSet %p:", word_set); - word_array_print_helper (word_set_to_array (word_set)); -} - gboolean word_index_equals (WordIndex word_index1, WordIndex word_index2) @@ -391,83 +220,6 @@ word_index_equals (WordIndex word_index1, (word_index1.length == word_index2.length); } -static guint -word_index_hash_func (gconstpointer word_index_ptr) -{ - /* A WordSet should only contain words of the same length. - * So, WordIndex.index should be unique, and we can use it - * to create the hash. */ - WordIndex word_index = *(WordIndex *) word_index_ptr; - return g_int_hash (&(word_index.index)); -} - -static gboolean -word_index_key_equal_func (gconstpointer word_index1_ptr, - gconstpointer word_index2_ptr) -{ - WordIndex word_index1 = *(WordIndex *) word_index1_ptr; - WordIndex word_index2 = *(WordIndex *) word_index2_ptr; - return word_index_equals (word_index1, word_index2); -} - -WordSet * -word_set_new (void) -{ - return g_hash_table_new (word_index_hash_func, word_index_key_equal_func); -} - -static gboolean word_not_in_set (gpointer key, - gpointer value, - gpointer user_data) -{ - WordIndex *word_index = (WordIndex *) key; - WordSet *word_set = (WordSet *) user_data; - return !g_hash_table_contains (word_set, word_index); -} - -/* FIXME(lookahead): Attempt the optimization again. */ -void -word_set_remove_unique (WordSet *word_set1, WordSet *word_set2) -{ - /* Based on testing, it's not any faster to swap - * word_set1 and word_set2, when word_set2 is smaller. */ - g_hash_table_foreach_steal (word_set1, word_not_in_set, word_set2); -} - -void word_set_add_array (WordSet *word_set, WordArray *word_array) -{ - guint len = word_array_len (word_array); - for (guint i = 0; i < len; i++) - word_set_add (word_set, &word_array_index (word_array, i)); -} - -static void -foreach_add_to_array (gpointer key, gpointer value, gpointer user_data) -{ - WordArray *word_array = (WordArray *) user_data; - WordIndex word_index = *(WordIndex *) key; - g_array_append_val (word_array, word_index); -} - -WordArray * -word_set_to_array (WordSet *word_set) -{ - WordArray *word_array = word_array_new (); - if (word_set != NULL) - { - g_hash_table_foreach (word_set, foreach_add_to_array, word_array); - word_array_sort (word_array); - } - return word_array; -} - -/* Not a macro so we can pass it into other functions */ -void -word_set_destroy (WordSet *word_set) -{ - g_hash_table_destroy (word_set); -} - const gchar* clue_get_filter (const IpuzClue *clue, IpuzGrid *grid) { diff --git a/src/word-list-misc.h b/src/word-list-misc.h index 50536cf6626359282b6c176f640811e910a877e2..89d4a229b476dff542d24fa83c2e4615a78bf644 100644 --- a/src/word-list-misc.h +++ b/src/word-list-misc.h @@ -78,60 +78,6 @@ gchar *scored_parse_word (const gchar *unparsed_word, IpuzCharset *alphabet); -/* A WordArray is a simple list of words represented as - * WordIndex. They are unique: inserting a word multiple times results - * in the word only existing once. It is used to keep a list of words - * we don't want to search through, as well as used internally within - * the word-list. - * - * It's possible to use the GArray functions instead of the WordArray - * functions. But if you do that, you need to manually keep the - * uniqueness invariant true. The WordList does this directly - * at times. - * - * It's not recommended that you use this structure unless you really - * know what you're doing. It's not a user-friendly structure. But it - * is useful. - */ -typedef GArray WordArray; - -/* A WordSet is a GHashTable that represents a set of WordIndex structs. */ -typedef GHashTable WordSet; - -WordArray *word_array_new (void); -WordArray *word_array_copy (WordArray *src); -void word_array_unref (WordArray *word_array); -gboolean word_array_add (WordArray *word_array, - WordIndex word_index); -gboolean word_array_remove (WordArray *word_array, - WordIndex word_index); -gboolean word_array_find (WordArray *word_array, - WordIndex word_index, - guint *out); -void word_array_sort (WordArray *word_array); -gboolean word_array_equals (WordArray *word_array1, - WordArray *word_array2); -void word_array_print (WordArray *word_array); - -#define word_array_len(wa) (((GArray*)wa)->len) -#define word_array_index(wa,i) (g_array_index((GArray*)wa,WordIndex,i)) -#define word_array_ref(wa) (g_array_ref((GArray*)wa)) - -G_DEFINE_AUTOPTR_CLEANUP_FUNC(WordArray, word_array_unref) - -WordSet *word_set_new (void); -void word_set_destroy (WordSet *word_set); -void word_set_remove_unique (WordSet *word_set1, - WordSet *word_set2); -void word_set_add_array (WordSet *word_set, - WordArray *word_array); -WordArray *word_set_to_array (WordSet *word_set); -void word_set_print (WordSet *word_set); - -#define word_set_add(ws,wi) (g_hash_table_add(ws,wi)) - -G_DEFINE_AUTOPTR_CLEANUP_FUNC(WordSet, word_set_destroy); - const gchar *clue_get_filter (const IpuzClue *clue, IpuzGrid *grid); diff --git a/src/word-list.h b/src/word-list.h index 8d49c763528047ad2a998fd843f6a698e8949065..851e6939644eea7a10538c95eef36f3b7a86b6ac 100644 --- a/src/word-list.h +++ b/src/word-list.h @@ -23,6 +23,7 @@ #include "libipuz/libipuz.h" #include "word-list-misc.h" +#include "word-array.h" #include "word-list-resource.h" diff --git a/src/word-set.c b/src/word-set.c new file mode 100644 index 0000000000000000000000000000000000000000..2ea3fa4459eeb32fc4bdf3dda1e08f852957cb3b --- /dev/null +++ b/src/word-set.c @@ -0,0 +1,110 @@ +/* word-set.c + * + * Copyright 2025 Victor Ma + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "word-set.h" + +static guint +word_index_hash_func (gconstpointer word_index_ptr) +{ + /* A WordSet should only contain words of the same length. + * So, WordIndex.index should be unique, and we can use it + * to create the hash. */ + WordIndex word_index = *(WordIndex *) word_index_ptr; + return g_int_hash (&(word_index.index)); +} + +static gboolean +word_index_key_equal_func (gconstpointer word_index1_ptr, + gconstpointer word_index2_ptr) +{ + WordIndex word_index1 = *(WordIndex *) word_index1_ptr; + WordIndex word_index2 = *(WordIndex *) word_index2_ptr; + return word_index_equals (word_index1, word_index2); +} + +WordSet * +word_set_new (void) +{ + return g_hash_table_new (word_index_hash_func, word_index_key_equal_func); +} + +static gboolean word_not_in_set (gpointer key, + gpointer value, + gpointer user_data) +{ + WordIndex *word_index = (WordIndex *) key; + WordSet *word_set = (WordSet *) user_data; + return !g_hash_table_contains (word_set, word_index); +} + +/* FIXME(lookahead): Attempt the optimization again. */ +void +word_set_remove_unique (WordSet *word_set1, WordSet *word_set2) +{ + /* Based on testing, it's not any faster to swap + * word_set1 and word_set2, when word_set2 is smaller. */ + g_hash_table_foreach_steal (word_set1, word_not_in_set, word_set2); +} + +void word_set_add_array (WordSet *word_set, WordArray *word_array) +{ + guint len = word_array_len (word_array); + for (guint i = 0; i < len; i++) + word_set_add (word_set, &word_array_index (word_array, i)); +} + +static void +foreach_add_to_array (gpointer key, gpointer value, gpointer user_data) +{ + WordArray *word_array = (WordArray *) user_data; + WordIndex word_index = *(WordIndex *) key; + g_array_append_val (word_array, word_index); +} + +WordArray * +word_set_to_array (WordSet *word_set) +{ + WordArray *word_array = word_array_new (); + if (word_set != NULL) + { + g_hash_table_foreach (word_set, foreach_add_to_array, word_array); + word_array_sort (word_array); + } + return word_array; +} + +/* Not a macro so we can pass it into other functions */ +void +word_set_destroy (WordSet *word_set) +{ + g_hash_table_destroy (word_set); +} + +void word_set_print (WordSet *word_set, WordList *word_list) +{ + if (word_set == NULL) + { + g_warning ("word_list_print_word_set: WordSet is NULL."); + return; + } + g_message (" "); + g_message ("WordSet %p:", word_set); + word_array_print_helper (word_list, word_set_to_array (word_set)); +} \ No newline at end of file diff --git a/src/word-set.h b/src/word-set.h new file mode 100644 index 0000000000000000000000000000000000000000..20aeea61aaf9a6a2fe14b20b66d6c87c7fe056ad --- /dev/null +++ b/src/word-set.h @@ -0,0 +1,44 @@ +/* word-set.h + * + * Copyright 2025 Victor Ma + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include "word-array.h" + + +G_BEGIN_DECLS + +typedef struct _WordList WordList; + +WordSet *word_set_new (void); +void word_set_destroy (WordSet *word_set); +void word_set_remove_unique (WordSet *word_set1, + WordSet *word_set2); +void word_set_add_array (WordSet *word_set, + WordArray *word_array); +WordArray *word_set_to_array (WordSet *word_set); +void word_set_print (WordSet *word_set, + WordList *word_list); + +#define word_set_add(ws,wi) (g_hash_table_add(ws,wi)) + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(WordSet, word_set_destroy); + +G_END_DECLS \ No newline at end of file diff --git a/src/word-solver-task.h b/src/word-solver-task.h index 3220e19d6d600929ed05a1aca73088eb94b1c366..bdc87802a4369f3a7a6a4d965fbdc86dd5239b88 100644 --- a/src/word-solver-task.h +++ b/src/word-solver-task.h @@ -24,7 +24,7 @@ #include #include "cell-array.h" #include "puzzle-task.h" -#include "word-list-misc.h" +#include "word-array.h" #include "word-list-resource.h" G_BEGIN_DECLS