gimplanguagestore-parser.c 15.6 KB
Newer Older
1 2 3 4
/* GIMP - The GNU Image Manipulation Program
 * Copyright (C) 1995 Spencer Kimball and Peter Mattis
 *
 * gimplanguagestore-parser.c
5
 * Copyright (C) 2008, 2009  Sven Neumann <sven@gimp.org>
6
 * Copyright (C) 2013  Jehan <jehan at girinstud.io>
7
 *
8
 * This program is free software: you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 3 of the License, or
11 12 13 14 15 16 17 18
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
19
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
20 21 22 23
 */

#include "config.h"

24
#include <locale.h>
25 26 27 28
#include <string.h>

#include <gtk/gtk.h>

29
#include "libgimpbase/gimpbase.h"
30

31 32 33 34 35 36 37
#include "widgets-types.h"

#include "config/gimpxmlparser.h"

#include "gimplanguagestore.h"
#include "gimplanguagestore-parser.h"

38 39
#include "gimp-intl.h"

40 41 42 43 44 45 46 47 48 49 50 51 52 53

typedef enum
{
  ISO_CODES_START,
  ISO_CODES_IN_ENTRIES,
  ISO_CODES_IN_ENTRY,
  ISO_CODES_IN_UNKNOWN
} IsoCodesParserState;

typedef struct
{
  IsoCodesParserState  state;
  IsoCodesParserState  last_known_state;
  gint                 unknown_depth;
54
  GHashTable          *base_lang_list;
55 56 57
} IsoCodesParser;


58 59
static gboolean parse_iso_codes                 (GHashTable  *base_lang_list,
                                                 GError     **error);
60 61

#ifdef HAVE_ISO_CODES
62 63 64 65 66 67 68 69 70 71 72 73 74 75
static void     iso_codes_parser_init           (void);
static void     iso_codes_parser_start_element  (GMarkupParseContext  *context,
                                                 const gchar          *element_name,
                                                 const gchar         **attribute_names,
                                                 const gchar         **attribute_values,
                                                 gpointer              user_data,
                                                 GError              **error);
static void     iso_codes_parser_end_element    (GMarkupParseContext  *context,
                                                 const gchar          *element_name,
                                                 gpointer              user_data,
                                                 GError              **error);

static void     iso_codes_parser_start_unknown  (IsoCodesParser       *parser);
static void     iso_codes_parser_end_unknown    (IsoCodesParser       *parser);
76
#endif /* HAVE_ISO_CODES */
77 78 79 80 81 82 83 84

/*
 * Language lists that we want to generate only once at program startup:
 * @l10n_lang_list: all available localizations self-localized;
 * @all_lang_list: all known languages, in the user-selected language.
 */
static GHashTable *l10n_lang_list = NULL;
static GHashTable *all_lang_list = NULL;
85

86 87 88
/********************\
 * Public Functions *
\********************/
89

90 91 92 93 94 95 96 97
/*
 * Initialize and run the language listing parser. This call must be
 * made only once, at program initialization, but after language_init().
 */
void
gimp_language_store_parser_init (void)
{
  GHashTable     *base_lang_list;
98
  gchar          *current_env;
99
  GDir           *locales_dir;
100
  GError         *error = NULL;
101 102
  GHashTableIter  lang_iter;
  gpointer        key;
103

104 105 106 107 108
  if (l10n_lang_list != NULL)
    {
      g_warning ("gimp_language_store_parser_init() must be run only once.");
      return;
    }
109

110 111
  current_env = g_strdup (g_getenv ("LANGUAGE"));

112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
  l10n_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
                                          (GDestroyNotify) g_free,
                                          (GDestroyNotify) g_free);
  all_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
                                         (GDestroyNotify) g_free,
                                         (GDestroyNotify) g_free);
  base_lang_list = g_hash_table_new_full (g_str_hash, g_str_equal,
                                          (GDestroyNotify) g_free,
                                          (GDestroyNotify) g_free);

  /* Check all locales we have translations for. */
  locales_dir = g_dir_open (gimp_locale_directory (), 0, NULL);
  if (locales_dir)
    {
      const gchar *locale;
127

128 129 130 131 132 133 134 135 136
      while ((locale = g_dir_read_name (locales_dir)) != NULL)
        {
          gchar *filename = g_build_filename (gimp_locale_directory (),
                                              locale,
                                              "LC_MESSAGES",
                                              GETTEXT_PACKAGE ".mo",
                                              NULL);
          if (g_file_test (filename, G_FILE_TEST_EXISTS))
            {
137
              gchar *delimiter = NULL;
138 139
              gchar *base_code = NULL;

140 141
              delimiter = strchr (locale, '_');

142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
              if (delimiter)
                base_code = g_strndup (locale, delimiter - locale);
              else
                base_code = g_strdup (locale);

              delimiter = strchr (base_code, '@');

              if (delimiter)
                {
                  gchar *temp = base_code;
                  base_code = g_strndup (base_code, delimiter - base_code);
                  g_free (temp);
                }

              /* Save the full language code. */
              g_hash_table_insert (l10n_lang_list, g_strdup (locale), NULL);
              /* Save the base language code. */
              g_hash_table_insert (base_lang_list, base_code, NULL);
            }

          g_free (filename);
        }
164

165 166
      g_dir_close (locales_dir);
    }
167

168
  /* Parse ISO-639 file to get full list of language and their names. */
169
  parse_iso_codes (base_lang_list, &error);
170

171 172 173 174
  /* Generate the localized language names. */
  g_hash_table_iter_init (&lang_iter, l10n_lang_list);
  while (g_hash_table_iter_next (&lang_iter, &key, NULL))
    {
175 176 177 178 179 180 181
      gchar *code           = (gchar*) key;
      gchar *localized_name = NULL;
      gchar *english_name   = NULL;
      gchar *delimiter      = NULL;
      gchar *base_code      = NULL;

      delimiter = strchr (code, '_');
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196

      if (delimiter)
        base_code = g_strndup (code, delimiter - code);
      else
        base_code = g_strdup (code);

      delimiter = strchr (base_code, '@');

      if (delimiter)
        {
          gchar *temp = base_code;
          base_code = g_strndup (base_code, delimiter - base_code);
          g_free (temp);
        }

197
      english_name = (gchar*) (g_hash_table_lookup (base_lang_list, base_code));
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235

      if (english_name)
        {
          gchar *semicolon;

          /* If possible, we want to localize a language in itself.
           * If it fails, gettext fallbacks to C (en_US) itself.
           */
          g_setenv ("LANGUAGE", code, TRUE);
          setlocale (LC_ALL, "");

          localized_name = g_strdup (dgettext ("iso_639", english_name));

          /* If original and localized names are the same for other than English,
           * maybe localization failed. Try now in the main dialect. */
          if (g_strcmp0 (english_name, localized_name) == 0 &&
              g_strcmp0 (base_code, "en") != 0 &&
              g_strcmp0 (code, base_code) != 0)
            {
              g_free (localized_name);

              g_setenv ("LANGUAGE", base_code, TRUE);
              setlocale (LC_ALL, "");

              localized_name = g_strdup (dgettext ("iso_639", english_name));
            }

          /*  there might be several language names; use the first one  */
          semicolon = strchr (localized_name, ';');

          if (semicolon)
            {
              gchar *temp = localized_name;
              localized_name = g_strndup (localized_name, semicolon - localized_name);
              g_free (temp);
            }
        }

236 237 238 239 240 241
      g_hash_table_replace (l10n_lang_list, g_strdup(code),
                            g_strdup_printf ("%s [%s]",
                                             localized_name ?
                                             localized_name : "???",
                                             code));
      g_free (localized_name);
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
      g_free (base_code);
    }

  /*  Add special entries for system locale.
   *  We want the system locale to be localized in itself. */
  g_setenv ("LANGUAGE", setlocale (LC_ALL, NULL), TRUE);
  setlocale (LC_ALL, "");

  /* g_str_hash() does not accept NULL. I give an empty code instead.
   * Other solution would to create a custom hash. */
  g_hash_table_insert (l10n_lang_list, g_strdup(""),
                       g_strdup (_("System Language")));

  /* Go back to original localization. */
  if (current_env)
    {
      g_setenv ("LANGUAGE", current_env, TRUE);
      g_free (current_env);
    }
  else
    g_unsetenv ("LANGUAGE");
  setlocale (LC_ALL, "");

  /* Add special entry for C (en_US). */
  g_hash_table_insert (l10n_lang_list, g_strdup ("en_US"),
                       g_strdup ("English [en_US]"));

  g_hash_table_destroy (base_lang_list);
270 271
}

272 273
void
gimp_language_store_parser_clean (void)
274
{
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
  g_hash_table_destroy (l10n_lang_list);
  g_hash_table_destroy (all_lang_list);
}

/*
 * Returns a Hash table of languages.
 * Keys and values are respectively language codes and names from the
 * ISO-639 standard code.
 *
 * If @localization_only is TRUE, it returns only the list of available
 * GIMP localizations, and language names are translated in their own
 * locale.
 * If @localization_only is FALSE, the full list of ISO-639 languages
 * is returned, and language names are in the user-set locale.
 *
 * Do not free the list or elements of the list.
 */
GHashTable *
gimp_language_store_parser_get_languages (gboolean localization_only)
{
  if (localization_only)
    return l10n_lang_list;
  else
    return all_lang_list;
}

/*****************************\
 * Private Parsing Functions *
\*****************************/

/*
 * Parse the ISO-639 code list if available on this system, and fill
 * @base_lang_list with English names of all needed base codes.
 *
 * It will also fill the static @all_lang_list.
 */
static gboolean
parse_iso_codes (GHashTable  *base_lang_list,
                 GError     **error)
{
315 316
  gboolean success = TRUE;

317 318 319 320 321 322 323 324 325 326
#ifdef HAVE_ISO_CODES
  static const GMarkupParser markup_parser =
    {
      iso_codes_parser_start_element,
      iso_codes_parser_end_element,
      NULL,  /*  characters   */
      NULL,  /*  passthrough  */
      NULL   /*  error        */
    };

327 328 329
  GimpXmlParser  *xml_parser;
  GFile          *file;
  IsoCodesParser  parser = { 0, };
330 331 332 333 334

  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);

  iso_codes_parser_init ();

335
  parser.base_lang_list = g_hash_table_ref (base_lang_list);
336 337 338

  xml_parser = gimp_xml_parser_new (&markup_parser, &parser);

339 340 341
#if ENABLE_RELOCATABLE_RESOURCES
  file = gimp_installation_directory_file ("share", "xml", "iso-codes",
                                           "iso_639.xml", NULL);
342
#else
343 344
  file = g_file_new_for_path (ISO_CODES_LOCATION G_DIR_SEPARATOR_S
                              "iso_639.xml");
345
#endif
346

347
  success = gimp_xml_parser_parse_gfile (xml_parser, file, error);
348 349 350 351 352 353 354
  if (error && *error)
    {
      g_warning ("%s: error parsing '%s': %s\n",
                 G_STRFUNC, g_file_get_path (file),
                 (*error)->message);
      g_clear_error (error);
    }
355

356
  g_object_unref (file);
357 358

  gimp_xml_parser_free (xml_parser);
359 360
  g_hash_table_unref (parser.base_lang_list);

361
#endif /* HAVE_ISO_CODES */
Sven Neumann's avatar
Sven Neumann committed
362 363

  return success;
364 365
}

366
#ifdef HAVE_ISO_CODES
367 368 369 370 371 372 373 374 375 376 377 378 379
static void
iso_codes_parser_init (void)
{
  static gboolean initialized = FALSE;

  if (initialized)
    return;

#ifdef G_OS_WIN32
  /*  on Win32, assume iso-codes is installed in the same location as GIMP  */
  bindtextdomain ("iso_639", gimp_locale_directory ());
#else
  bindtextdomain ("iso_639", ISO_CODES_LOCALEDIR);
380 381
#endif

382 383 384
  bind_textdomain_codeset ("iso_639", "UTF-8");

  initialized = TRUE;
385 386
}

Sven Neumann's avatar
Sven Neumann committed
387 388 389 390 391 392 393 394 395 396 397
static void
iso_codes_parser_entry (IsoCodesParser  *parser,
                        const gchar    **names,
                        const gchar    **values)
{
  const gchar *lang = NULL;
  const gchar *code = NULL;

  while (*names && *values)
    {
      if (strcmp (*names, "name") == 0)
398
        lang = *values;
399
      else if (strcmp (*names, "iso_639_2B_code") == 0 && code == NULL)
400 401 402 403
        /* 2-letter ISO 639-1 codes have priority.
         * But some languages have no 2-letter code. Ex: Asturian (ast).
         */
        code = *values;
404
      else if (strcmp (*names, "iso_639_2T_code") == 0 && code == NULL)
405
        code = *values;
Sven Neumann's avatar
Sven Neumann committed
406
      else if (strcmp (*names, "iso_639_1_code") == 0)
407
        code = *values;
Sven Neumann's avatar
Sven Neumann committed
408 409 410 411

      names++;
      values++;
    }
412 413 414

  if (lang && *lang && code && *code)
    {
415 416
      gchar *semicolon;
      gchar *localized_name = g_strdup (dgettext ("iso_639", lang));
417

418 419 420
      /* If the language is in our base table, we save its standard English name. */
      if (g_hash_table_contains (parser->base_lang_list, code))
        g_hash_table_replace (parser->base_lang_list, g_strdup (code), g_strdup (lang));
421 422

      /*  there might be several language names; use the first one  */
423
      semicolon = strchr (localized_name, ';');
424 425 426

      if (semicolon)
        {
427 428 429
          gchar *temp = localized_name;
          localized_name = g_strndup (localized_name, semicolon - localized_name);
          g_free (temp);
430
        }
431 432
      /* In any case, we save the name in user-set language for all lang. */
      g_hash_table_insert (all_lang_list, g_strdup (code), localized_name);
433
    }
Sven Neumann's avatar
Sven Neumann committed
434 435
}

436 437 438 439 440 441 442 443 444 445 446 447
static void
iso_codes_parser_start_element (GMarkupParseContext  *context,
                                const gchar          *element_name,
                                const gchar         **attribute_names,
                                const gchar         **attribute_values,
                                gpointer              user_data,
                                GError              **error)
{
  IsoCodesParser *parser = user_data;

  switch (parser->state)
    {
Sven Neumann's avatar
Sven Neumann committed
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
    case ISO_CODES_START:
      if (strcmp (element_name, "iso_639_entries") == 0)
        {
          parser->state = ISO_CODES_IN_ENTRIES;
          break;
        }

    case ISO_CODES_IN_ENTRIES:
      if (strcmp (element_name, "iso_639_entry") == 0)
        {
          parser->state = ISO_CODES_IN_ENTRY;
          iso_codes_parser_entry (parser, attribute_names, attribute_values);
          break;
        }

    case ISO_CODES_IN_ENTRY:
    case ISO_CODES_IN_UNKNOWN:
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
      iso_codes_parser_start_unknown (parser);
      break;
    }
}

static void
iso_codes_parser_end_element (GMarkupParseContext *context,
                              const gchar         *element_name,
                              gpointer             user_data,
                              GError             **error)
{
  IsoCodesParser *parser = user_data;

  switch (parser->state)
    {
Sven Neumann's avatar
Sven Neumann committed
480 481
    case ISO_CODES_START:
      g_warning ("%s: shouldn't get here", G_STRLOC);
482 483
      break;

Sven Neumann's avatar
Sven Neumann committed
484 485 486
    case ISO_CODES_IN_ENTRIES:
      parser->state = ISO_CODES_START;
      break;
487

Sven Neumann's avatar
Sven Neumann committed
488 489 490 491 492 493
    case ISO_CODES_IN_ENTRY:
      parser->state = ISO_CODES_IN_ENTRIES;
      break;

    case ISO_CODES_IN_UNKNOWN:
      iso_codes_parser_end_unknown (parser);
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
      break;
    }
}

static void
iso_codes_parser_start_unknown (IsoCodesParser *parser)
{
  if (parser->unknown_depth == 0)
    parser->last_known_state = parser->state;

  parser->state = ISO_CODES_IN_UNKNOWN;
  parser->unknown_depth++;
}

static void
iso_codes_parser_end_unknown (IsoCodesParser *parser)
{
511 512
  gimp_assert (parser->unknown_depth > 0 &&
               parser->state == ISO_CODES_IN_UNKNOWN);
513 514 515 516 517 518

  parser->unknown_depth--;

  if (parser->unknown_depth == 0)
    parser->state = parser->last_known_state;
}
519
#endif /* HAVE_ISO_CODES */