uri.c 62.2 KB
Newer Older
1 2 3 4 5 6
/* GLIB - Library of useful routines for C programming
 * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
7
 * version 2.1 of the License, or (at your option) any later version.
8 9 10 11 12 13 14
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 17 18 19 20 21
 */

/*
 * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
 * file for a list of people on the GLib Team.  See the ChangeLog
 * files for a list of changes.  These files are distributed with
22
 * GLib at ftp://ftp.gtk.org/pub/gtk/.
23 24 25 26 27 28 29
 */

#include <glib.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

30 31
#include "gstrfuncsprivate.h"

32 33 34 35 36 37
typedef struct
{
  char *filename;
  char *hostname;
  char *expected_result;
  GConvertError expected_error; /* If failed */
38
} FileToUriTest;
39

40 41
FileToUriTest
file_to_uri_tests[] = {
42 43
  { "/etc", NULL, "file:///etc"},
  { "/etc", "", "file:///etc"},
44
  { "/etc", "otherhost", "file://otherhost/etc"},
45
#ifdef G_OS_WIN32
46 47 48 49 50 51
  { "/etc", "localhost", "file:///etc"},
  { "c:\\windows", NULL, "file:///c:/windows"},
  { "c:\\windows", "localhost", "file:///c:/windows"},
  { "c:\\windows", "otherhost", "file://otherhost/c:/windows"},
  { "\\\\server\\share\\dir", NULL, "file:////server/share/dir"},
  { "\\\\server\\share\\dir", "localhost", "file:////server/share/dir"},
52
#else
53
  { "/etc", "localhost", "file://localhost/etc"},
54 55 56
  { "c:\\windows", NULL, NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH}, /* it's important to get this error on Unix */
  { "c:\\windows", "localhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "c:\\windows", "otherhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
57 58
#endif
  { "etc", "localhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
59
#ifndef G_PLATFORM_WIN32
60
  { "/etc/\xE5\xE4\xF6", NULL, "file:///etc/%E5%E4%F6" },
61
  { "/etc/\xC3\xB6\xC3\xA4\xC3\xA5", NULL, "file:///etc/%C3%B6%C3%A4%C3%A5"},
62
#endif
63
  { "/etc", "\xC3\xB6\xC3\xA4\xC3\xA5", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
64
  { "/etc", "\xE5\xE4\xF6", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
65
  { "/etc/file with #%", NULL, "file:///etc/file%20with%20%23%25"},
66 67 68 69 70 71 72 73
  { "", NULL, NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "", "", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "", "localhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "", "otherhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "/0123456789", NULL, "file:///0123456789"},
  { "/ABCDEFGHIJKLMNOPQRSTUVWXYZ", NULL, "file:///ABCDEFGHIJKLMNOPQRSTUVWXYZ"},
  { "/abcdefghijklmnopqrstuvwxyz", NULL, "file:///abcdefghijklmnopqrstuvwxyz"},
  { "/-_.!~*'()", NULL, "file:///-_.!~*'()"},
74 75 76 77 78
#ifdef G_OS_WIN32
  /* As '\\' is a path separator on Win32, it gets turned into '/' in the URI */
  { "/\"#%<>[\\]^`{|}\x7F", NULL, "file:///%22%23%25%3C%3E%5B/%5D%5E%60%7B%7C%7D%7F"},
#else
  /* On Unix, '\\' is a normal character in the file name */
79
  { "/\"#%<>[\\]^`{|}\x7F", NULL, "file:///%22%23%25%3C%3E%5B%5C%5D%5E%60%7B%7C%7D%7F"},
80
#endif
81
  { "/;@+$,", NULL, "file:///%3B@+$,"},
82 83 84 85
  /* This and some of the following are of course as such illegal file names on Windows,
   * and would not occur in real life.
   */
  { "/:", NULL, "file:///:"},
86
  { "/?&=", NULL, "file:///%3F&="},
87
  { "/", "0123456789-", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
88 89
  { "/", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "file://ABCDEFGHIJKLMNOPQRSTUVWXYZ/"},
  { "/", "abcdefghijklmnopqrstuvwxyz", "file://abcdefghijklmnopqrstuvwxyz/"},
90 91 92 93 94
  { "/", "_.!~*'()", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
  { "/", "\"#%<>[\\]^`{|}\x7F", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
  { "/", ";?&=+$,", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
  { "/", "/", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
  { "/", "@:", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
95
  { "/", "\x80\xFF", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
96
  { "/", "\xC3\x80\xC3\xBF", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
97 98 99 100 101 102 103 104 105
};


typedef struct
{
  char *uri;
  char *expected_filename;
  char *expected_hostname;
  GConvertError expected_error; /* If failed */
106
} FileFromUriTest;
107

108 109
FileFromUriTest
file_from_uri_tests[] = {
110 111
  { "file:///etc", "/etc"},
  { "file:/etc", "/etc"},
112 113 114 115 116 117
#ifdef G_OS_WIN32
  /* On Win32 we don't return "localhost" hostames, just in case
   * it isn't recognized anyway.
   */
  { "file://localhost/etc", "/etc", NULL},
  { "file://localhost/etc/%23%25%20file", "/etc/#% file", NULL},
118 119
  { "file://localhost/\xE5\xE4\xF6", "/\xe5\xe4\xf6", NULL},
  { "file://localhost/%E5%E4%F6", "/\xe5\xe4\xf6", NULL},
120
#else
121 122
  { "file://localhost/etc", "/etc", "localhost"},
  { "file://localhost/etc/%23%25%20file", "/etc/#% file", "localhost"},
123 124
  { "file://localhost/\xE5\xE4\xF6", "/\xe5\xe4\xf6", "localhost"},
  { "file://localhost/%E5%E4%F6", "/\xe5\xe4\xf6", "localhost"},
125
#endif
126 127
  { "file://otherhost/etc", "/etc", "otherhost"},
  { "file://otherhost/etc/%23%25%20file", "/etc/#% file", "otherhost"},
128
  { "file://%C3%B6%C3%A4%C3%A5/etc", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
129
  { "file:////etc/%C3%B6%C3%C3%C3%A5", "//etc/\xc3\xb6\xc3\xc3\xc3\xa5", NULL},
130 131
  { "file://\xE5\xE4\xF6/etc", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://%E5%E4%F6/etc", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
132 133 134 135 136
  { "file:///some/file#bad", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://some", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file:test", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "http://www.yahoo.com/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
  { "file:////etc", "//etc"},
  { "file://///etc", "///etc"},
#ifdef G_OS_WIN32
  /* URIs with backslashes come from some nonstandard application, but accept them anyhow */
  { "file:///c:\\foo", "c:\\foo"},
  { "file:///c:/foo\\bar", "c:\\foo\\bar"},
  /* Accept also the old Netscape drive-letter-and-vertical bar convention */
  { "file:///c|/foo", "c:\\foo"},
  { "file:////server/share/dir", "\\\\server\\share\\dir"},
  { "file://localhost//server/share/foo", "\\\\server\\share\\foo"},
  { "file://otherhost//server/share/foo", "\\\\server\\share\\foo", "otherhost"},
#else
  { "file:///c:\\foo", "/c:\\foo"},
  { "file:///c:/foo", "/c:/foo"},
  { "file:////c:/foo", "//c:/foo"},
#endif
153
  { "file://0123456789/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
154 155
  { "file://ABCDEFGHIJKLMNOPQRSTUVWXYZ/", "/", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"},
  { "file://abcdefghijklmnopqrstuvwxyz/", "/", "abcdefghijklmnopqrstuvwxyz"},
156 157 158
  { "file://-_.!~*'()/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://\"<>[\\]^`{|}\x7F/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://;?&=+$,/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
159
  { "file://%C3%80%C3%BF/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
160 161
  { "file://@/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://:/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
162
  { "file://#/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
163 164
  { "file://%23/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://%2F/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
165 166 167
};

static void
168
run_file_to_uri_tests (void)
169 170 171 172
{
  int i;
  gchar *res;
  GError *error;
173

174
  for (i = 0; i < G_N_ELEMENTS (file_to_uri_tests); i++)
175 176
    {
      error = NULL;
177 178
      res = g_filename_to_uri (file_to_uri_tests[i].filename,
                               file_to_uri_tests[i].hostname,
179 180 181
                               &error);

      if (res)
182
        g_assert_cmpstr (res, ==, file_to_uri_tests[i].expected_result);
183
      else
184
        g_assert_error (error, G_CONVERT_ERROR, file_to_uri_tests[i].expected_error);
185

186
      g_free (res);
187
      g_clear_error (&error);
188 189 190 191
    }
}

static void
192
run_file_from_uri_tests (void)
193 194 195 196 197
{
  int i;
  gchar *res;
  gchar *hostname;
  GError *error;
198

199
  for (i = 0; i < G_N_ELEMENTS (file_from_uri_tests); i++)
200 201
    {
      error = NULL;
202
      res = g_filename_from_uri (file_from_uri_tests[i].uri,
203 204
                                 &hostname,
                                 &error);
205

206
#ifdef G_OS_WIN32
207
      if (file_from_uri_tests[i].expected_filename)
208 209
        {
          gchar *p, *slash;
210 211
          p = file_from_uri_tests[i].expected_filename =
            g_strdup (file_from_uri_tests[i].expected_filename);
212 213 214 215 216 217
          while ((slash = strchr (p, '/')) != NULL)
            {
              *slash = '\\';
              p = slash + 1;
            }
        }
218
#endif
219
      if (res)
220
        g_assert_cmpstr (res, ==, file_from_uri_tests[i].expected_filename);
221
      else
222 223
        g_assert_error (error, G_CONVERT_ERROR, file_from_uri_tests[i].expected_error);
      g_assert_cmpstr (hostname, ==, file_from_uri_tests[i].expected_hostname);
224 225 226 227

      g_free (res);
      g_free (hostname);
      g_clear_error (&error);
228 229 230
    }
}

231 232 233
static gint
safe_strcmp_filename (const gchar *a, const gchar *b)
{
Tor Lillqvist's avatar
Tor Lillqvist committed
234
#ifndef G_OS_WIN32
235
  return g_strcmp0 (a, b);
236
#else
237
  if (!a || !b)
238
    return g_strcmp0 (a, b);
239 240 241
  else
    {
      while (*a && *b)
242 243 244 245 246 247 248
        {
          if ((G_IS_DIR_SEPARATOR (*a) && G_IS_DIR_SEPARATOR (*b)) ||
              *a == *b)
            a++, b++;
          else
            return (*a - *b);
        }
Tor Lillqvist's avatar
Tor Lillqvist committed
249
      return (*a - *b);
250 251 252 253
    }
#endif
}

Tor Lillqvist's avatar
Tor Lillqvist committed
254 255 256
static gint
safe_strcmp_hostname (const gchar *a, const gchar *b)
{
257 258 259 260
  if (a == NULL)
    a = "";
  if (b == NULL)
    b = "";
Tor Lillqvist's avatar
Tor Lillqvist committed
261
#ifndef G_OS_WIN32
262
  return strcmp (a, b);
Tor Lillqvist's avatar
Tor Lillqvist committed
263
#else
264
  if (strcmp (a, "localhost") == 0 && !*b)
Tor Lillqvist's avatar
Tor Lillqvist committed
265 266
    return 0;
  else
267
    return strcmp (a, b);
Tor Lillqvist's avatar
Tor Lillqvist committed
268 269 270
#endif
}

271
static void
272
run_file_roundtrip_tests (void)
273 274 275 276
{
  int i;
  gchar *uri, *hostname, *res;
  GError *error;
277

278
  for (i = 0; i < G_N_ELEMENTS (file_to_uri_tests); i++)
279
    {
280
      if (file_to_uri_tests[i].expected_error != 0)
281
        continue;
282 283

      error = NULL;
284 285
      uri = g_filename_to_uri (file_to_uri_tests[i].filename,
                               file_to_uri_tests[i].hostname,
286 287
                               &error);
      g_assert_no_error (error);
288

289 290 291
      hostname = NULL;
      res = g_filename_from_uri (uri, &hostname, &error);
      g_assert_no_error (error);
292

293 294
      g_assert_cmpint (safe_strcmp_filename (file_to_uri_tests[i].filename, res), ==, 0);
      g_assert_cmpint (safe_strcmp_hostname (file_to_uri_tests[i].hostname, hostname), ==, 0);
295 296 297
      g_free (res);
      g_free (uri);
      g_free (hostname);
298 299 300
    }
}

Matthias Clasen's avatar
Matthias Clasen committed
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
static void
run_uri_list_tests (void)
{
  /* straight from the RFC */
  gchar *list =
    "# urn:isbn:0-201-08372-8\r\n"
    "http://www.huh.org/books/foo.html\r\n"
    "http://www.huh.org/books/foo.pdf   \r\n"
    "   ftp://ftp.foo.org/books/foo.txt\r\n";
  gchar *expected_uris[] = {
    "http://www.huh.org/books/foo.html",
    "http://www.huh.org/books/foo.pdf",
    "ftp://ftp.foo.org/books/foo.txt"
  };

  gchar **uris;
  gint j;

  uris = g_uri_list_extract_uris (list);
320 321
  g_assert_cmpint (g_strv_length (uris), ==, 3);

Matthias Clasen's avatar
Matthias Clasen committed
322
  for (j = 0; j < 3; j++)
323
    g_assert_cmpstr (uris[j], ==, expected_uris[j]);
Matthias Clasen's avatar
Matthias Clasen committed
324 325 326 327

  g_strfreev (uris);

  uris = g_uri_list_extract_uris ("# just hot air\r\n# more hot air");
328
  g_assert_cmpint (g_strv_length (uris), ==, 0);
329
  g_strfreev (uris);
Matthias Clasen's avatar
Matthias Clasen committed
330 331
}

Matthias Clasen's avatar
Matthias Clasen committed
332
static void
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
test_uri_unescape_string (void)
{
  const struct
    {
      /* Inputs */
      const gchar *escaped;  /* (nullable) */
      const gchar *illegal_characters;  /* (nullable) */
      /* Outputs */
      const gchar *expected_unescaped;  /* (nullable) */
    }
  tests[] =
    {
      { "%2Babc %4F", NULL, "+abc O" },
      { "%2Babc %4F", "+", NULL },
      { "%00abc %4F", "+/", NULL },
348 349
      { "/cursors/none.png", "/", "/cursors/none.png" },
      { "/cursors%2fbad-subdir/none.png", "/", NULL },
350 351 352
      { "%0", NULL, NULL },
      { "%ra", NULL, NULL },
      { "%2r", NULL, NULL },
353
      { "Timm B\344der", NULL, "Timm B\344der" },
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
      { NULL, NULL, NULL },  /* actually a valid test, not a delimiter */
    };
  gsize i;

  for (i = 0; i < G_N_ELEMENTS (tests); i++)
    {
      gchar *s = NULL;

      g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, tests[i].escaped);

      s = g_uri_unescape_string (tests[i].escaped, tests[i].illegal_characters);
      g_assert_cmpstr (s, ==, tests[i].expected_unescaped);
      g_free (s);
    }
}

static void
371
test_uri_unescape_bytes (gconstpointer test_data)
372
{
373
  GError *error = NULL;
374
  gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
375 376 377 378
  const struct
    {
      /* Inputs */
      const gchar *escaped;  /* (nullable) */
379
      const gchar *illegal;
380 381 382 383 384 385
      /* Outputs */
      gssize expected_unescaped_len;  /* -1 => error expected */
      const guint8 *expected_unescaped;  /* (nullable) */
    }
  tests[] =
    {
386
      { "%00%00", NULL, 2, (const guint8 *) "\x00\x00" },
387
      { "/cursors/none.png", "/", 17, (const guint8 *) "/cursors/none.png" },
388 389 390
      { "/cursors%2fbad-subdir/none.png", "/", -1, NULL },
      { "%%", NULL, -1, NULL },
      { "%", NULL, -1, NULL },
391 392 393 394 395
    };
  gsize i;

  for (i = 0; i < G_N_ELEMENTS (tests); i++)
    {
396 397
      gssize escaped_len = 0;
      gchar *escaped = NULL;
398 399 400 401
      GBytes *bytes = NULL;

      g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, tests[i].escaped);

402 403 404 405 406 407 408 409 410 411 412 413
      /* The tests get run twice: once with the length unspecified, using a
       * nul-terminated string; and once with the length specified and a copy of
       * the string with the trailing nul explicitly removed (to help catch
       * buffer overflows). */
      if (use_nul_terminated)
        {
          escaped_len = -1;
          escaped = g_strdup (tests[i].escaped);
        }
      else
        {
          escaped_len = strlen (tests[i].escaped);  /* no trailing nul */
414
          escaped = g_memdup2 (tests[i].escaped, escaped_len);
415 416
        }

417
      bytes = g_uri_unescape_bytes (escaped, escaped_len, tests[i].illegal, &error);
418 419 420 421

      if (tests[i].expected_unescaped_len < 0)
        {
          g_assert_null (bytes);
422
          g_assert_error (error, G_URI_ERROR, G_URI_ERROR_FAILED);
423
          g_clear_error (&error);
424 425 426
        }
      else
        {
427
          g_assert_no_error (error);
428 429 430 431 432 433 434
          g_assert_cmpmem (g_bytes_get_data (bytes, NULL),
                           g_bytes_get_size (bytes),
                           tests[i].expected_unescaped,
                           tests[i].expected_unescaped_len);
        }

      g_clear_pointer (&bytes, g_bytes_unref);
435
      g_free (escaped);
436 437 438 439 440
    }
}

static void
test_uri_unescape_segment (void)
Matthias Clasen's avatar
Matthias Clasen committed
441
{
442
  const gchar *escaped_segment = "%2Babc %4F---";
443
  gchar *s = NULL;
444

445 446 447
  s = g_uri_unescape_segment (escaped_segment, escaped_segment + 10, NULL);
  g_assert_cmpstr (s, ==, "+abc O");
  g_free (s);
448 449 450

  s = g_uri_unescape_segment ("%2Babc%00cde", NULL, NULL);
  g_assert_null (s);
Matthias Clasen's avatar
Matthias Clasen committed
451 452
}

453
static void
454
test_uri_escape_string (void)
455
{
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
  const struct
    {
      /* Inputs */
      const gchar *unescaped;
      const gchar *reserved_chars_allowed;
      gboolean allow_utf8;
      /* Outputs */
      const gchar *expected_escaped;
    }
  tests[] =
    {
      { "abcdefgABCDEFG._~", NULL, FALSE, "abcdefgABCDEFG._~" },
      { ":+ \\?#", NULL, FALSE, "%3A%2B%20%5C%3F%23" },
      { "a+b:c", "+", FALSE, "a+b%3Ac" },
      { "a+b:c\303\234", "+", TRUE, "a+b%3Ac\303\234" },
471 472 473 474
      /* Incomplete UTF-8 sequence: */
      { "\xfc\x3b\xd2", NULL, TRUE, "%FC%3B%D2" },
      /* Invalid sequence: */
      { "\xc3\xb1\xc3\x28", NULL, TRUE, "ñ%C3%28" },
475 476
    };
  gsize i;
477

478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
  for (i = 0; i < G_N_ELEMENTS (tests); i++)
    {
      gchar *s = NULL;

      g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, tests[i].unescaped);

      s = g_uri_escape_string (tests[i].unescaped,
                               tests[i].reserved_chars_allowed,
                               tests[i].allow_utf8);
      g_assert_cmpstr (s, ==, tests[i].expected_escaped);
      g_free (s);
    }
}

static void
test_uri_escape_bytes (void)
{
  gchar *s = NULL;
496 497 498 499

  s = g_uri_escape_bytes ((guchar*)"\0\0", 2, NULL);
  g_assert_cmpstr (s, ==, "%00%00");
  g_free (s);
500 501 502 503 504
}

static void
test_uri_scheme (void)
{
505
  const gchar *s1, *s2;
506 507 508 509 510
  gchar *s;

  s = g_uri_parse_scheme ("ftp://ftp.gtk.org");
  g_assert_cmpstr (s, ==, "ftp");
  g_free (s);
511

512 513 514 515
  s = g_uri_parse_scheme ("good-scheme.but+weird:gtk.org");
  g_assert_cmpstr (s, ==, "good-scheme.but+weird");
  g_free (s);

516
  s = g_uri_parse_scheme ("1bad:");
517
  g_assert_null (s);
518
  s = g_uri_parse_scheme ("bad");
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
  g_assert_null (s);
  s = g_uri_parse_scheme ("99http://host/path");
  g_assert_null (s);
  s = g_uri_parse_scheme (".http://host/path");
  g_assert_null (s);
  s = g_uri_parse_scheme ("+http://host/path");
  g_assert_null (s);

  s1 = g_uri_peek_scheme ("ftp://ftp.gtk.org");
  g_assert_cmpstr (s1, ==, "ftp");
  s2 = g_uri_peek_scheme ("FTP://ftp.gtk.org");
  g_assert_cmpstr (s2, ==, "ftp");
  g_assert_true (s1 == s2);
  s1 = g_uri_peek_scheme ("1bad:");
  g_assert_null (s1);
  s1 = g_uri_peek_scheme ("bad");
  g_assert_null (s1);
}

typedef struct {
  const gchar *scheme;
  const gchar *userinfo;
  const gchar *host;
  gint         port;
  const gchar *path;
  const gchar *query;
  const gchar *fragment;
} UriParts;

typedef struct {
549
  /* Inputs */
550
  const gchar *orig;
551
  GUriFlags flags;
552 553 554 555
  /* Outputs */
  gboolean expected_success;
  GUriError expected_error_code;  /* unused if @expected_success is true */
  const UriParts expected_parts;  /* unused if @expected_success is false */
556 557 558
} UriAbsoluteTest;

static const UriAbsoluteTest absolute_tests[] = {
559
  { "foo:", G_URI_FLAGS_NONE, TRUE, 0,
560 561
    { "foo", NULL, NULL, -1, "", NULL, NULL }
  },
562
  { "file:/dev/null", G_URI_FLAGS_NONE, TRUE, 0,
563 564
    { "file", NULL, NULL, -1, "/dev/null", NULL, NULL }
  },
565
  { "file:///dev/null", G_URI_FLAGS_NONE, TRUE, 0,
566 567
    { "file", NULL, "", -1, "/dev/null", NULL, NULL }
  },
568
  { "ftp://user@host/path", G_URI_FLAGS_NONE, TRUE, 0,
569 570
    { "ftp", "user", "host", -1, "/path", NULL, NULL }
  },
571
  { "ftp://user@host:9999/path", G_URI_FLAGS_NONE, TRUE, 0,
572 573
    { "ftp", "user", "host", 9999, "/path", NULL, NULL }
  },
574
  { "ftp://user:password@host/path", G_URI_FLAGS_NONE, TRUE, 0,
575 576
    { "ftp", "user:password", "host", -1, "/path", NULL, NULL }
  },
577
  { "ftp://user:password@host:9999/path", G_URI_FLAGS_NONE, TRUE, 0,
578 579
    { "ftp", "user:password", "host", 9999, "/path", NULL, NULL }
  },
580
  { "ftp://user:password@host", G_URI_FLAGS_NONE, TRUE, 0,
581 582
    { "ftp", "user:password", "host", -1, "", NULL, NULL }
  },
583
  { "http://us%65r@host", G_URI_FLAGS_NONE, TRUE, 0,
584 585
    { "http", "user", "host", -1, "", NULL, NULL }
  },
586
  { "http://us%40r@host", G_URI_FLAGS_NONE, TRUE, 0,
587 588
    { "http", "us@r", "host", -1, "", NULL, NULL }
  },
589
  { "http://us%3ar@host", G_URI_FLAGS_NONE, TRUE, 0,
590 591
    { "http", "us:r", "host", -1, "", NULL, NULL }
  },
592
  { "http://us%2fr@host", G_URI_FLAGS_NONE, TRUE, 0,
593 594
    { "http", "us/r", "host", -1, "", NULL, NULL }
  },
595
  { "http://us%3fr@host", G_URI_FLAGS_NONE, TRUE, 0,
596 597
    { "http", "us?r", "host", -1, "", NULL, NULL }
  },
598
  { "http://host?query", G_URI_FLAGS_NONE, TRUE, 0,
599 600
    { "http", NULL, "host", -1, "", "query", NULL }
  },
601
  { "http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fchildparam%3Dchildvalue&param=value", G_URI_FLAGS_NONE, TRUE, 0,
602 603
    { "http", NULL, "host", -1, "/path", "query=http://host/path?childparam=childvalue&param=value", NULL }
  },
604
  { "http://control-chars/%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F", G_URI_FLAGS_NONE, TRUE, 0,
605 606
    { "http", NULL, "control-chars", -1, "/\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F", NULL, NULL }
  },
607
  { "http://space/%20", G_URI_FLAGS_NONE, TRUE, 0,
608 609
    { "http", NULL, "space", -1, "/ ", NULL, NULL }
  },
610
  { "http://delims/%3C%3E%23%25%22", G_URI_FLAGS_NONE, TRUE, 0,
611 612
    { "http", NULL, "delims", -1, "/<>#%\"", NULL, NULL }
  },
613
  { "http://unwise-chars/%7B%7D%7C%5C%5E%5B%5D%60", G_URI_FLAGS_NONE, TRUE, 0,
614 615 616 617
    { "http", NULL, "unwise-chars", -1, "/{}|\\^[]`", NULL, NULL }
  },

  /* From RFC 2732 */
618
  { "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html", G_URI_FLAGS_NONE, TRUE, 0,
619 620
    { "http", NULL, "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", 80, "/index.html", NULL, NULL }
  },
621
  { "http://[1080:0:0:0:8:800:200C:417A]/index.html", G_URI_FLAGS_NONE, TRUE, 0,
622 623
    { "http", NULL, "1080:0:0:0:8:800:200C:417A", -1, "/index.html", NULL, NULL }
  },
624
  { "http://[3ffe:2a00:100:7031::1]", G_URI_FLAGS_NONE, TRUE, 0,
625 626
    { "http", NULL, "3ffe:2a00:100:7031::1", -1, "", NULL, NULL }
  },
627
  { "http://[1080::8:800:200C:417A]/foo", G_URI_FLAGS_NONE, TRUE, 0,
628 629
    { "http", NULL, "1080::8:800:200C:417A", -1, "/foo", NULL, NULL }
  },
630
  { "http://[::192.9.5.5]/ipng", G_URI_FLAGS_NONE, TRUE, 0,
631 632
    { "http", NULL, "::192.9.5.5", -1, "/ipng", NULL, NULL }
  },
633
  { "http://[::FFFF:129.144.52.38]:80/index.html", G_URI_FLAGS_NONE, TRUE, 0,
634 635
    { "http", NULL, "::FFFF:129.144.52.38", 80, "/index.html", NULL, NULL }
  },
636
  { "http://[2010:836B:4179::836B:4179]", G_URI_FLAGS_NONE, TRUE, 0,
637 638 639 640
    { "http", NULL, "2010:836B:4179::836B:4179", -1, "", NULL, NULL }
  },

  /* some problematic URIs that are handled differently in libsoup */
641
  { "http://host/path with spaces", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
642 643
    { "http", NULL, "host", -1, "/path with spaces", NULL, NULL }
  },
644
  { "  http://host/path", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
645 646
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },
647
  { "http://host/path  ", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
648 649
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },
650
  { "http://host  ", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
651 652
    { "http", NULL, "host", -1, "", NULL, NULL }
  },
653
  { "http://host:999  ", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
654 655
    { "http", NULL, "host", 999, "", NULL, NULL }
  },
656
  { "http://host/pa\nth", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
657 658
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },
659
  { "http:\r\n//host/path", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
660 661
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },
662
  { "http://\thost/path", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
663 664 665 666
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },

  /* Bug 594405; 0-length is different from not-present */
667
  { "http://host/path?", G_URI_FLAGS_NONE, TRUE, 0,
668 669
    { "http", NULL, "host", -1, "/path", "", NULL }
  },
670
  { "http://host/path#", G_URI_FLAGS_NONE, TRUE, 0,
671 672 673 674
    { "http", NULL, "host", -1, "/path", NULL, "" },
  },

  /* Bug 590524; ignore bad %-encoding */
675
  { "http://host/path%", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
676 677
    { "http", NULL, "host", -1, "/path%", NULL, NULL }
  },
678
  { "http://h%ost/path", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
679 680
    { "http", NULL, "h%ost", -1, "/path", NULL, NULL }
  },
681
  { "http://host/path%%", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
682 683
    { "http", NULL, "host", -1, "/path%%", NULL, NULL }
  },
684
  { "http://host/path%%%", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
685 686
    { "http", NULL, "host", -1, "/path%%%", NULL, NULL }
  },
687
  { "http://host/path%/x/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
688 689
    { "http", NULL, "host", -1, "/path%/x/", NULL, NULL }
  },
690
  { "http://host/path%0x/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
691 692
    { "http", NULL, "host", -1, "/path%0x/", NULL, NULL }
  },
693
  { "http://host/path%ax", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
694 695 696 697
    { "http", NULL, "host", -1, "/path%ax", NULL, NULL }
  },

  /* GUri doesn't %-encode non-ASCII characters */
698
  { "http://host/p\xc3\xa4th/", G_URI_FLAGS_NONE, TRUE, 0,
699 700 701
    { "http", NULL, "host", -1, "/p\xc3\xa4th/", NULL, NULL }
  },

702
  { "HTTP:////////////////", G_URI_FLAGS_NONE, TRUE, 0,
703 704 705
    { "http", NULL, "", -1, "//////////////", NULL, NULL }
  },

706
  { "http://@host", G_URI_FLAGS_NONE, TRUE, 0,
707 708
    { "http", "", "host", -1, "", NULL, NULL }
  },
709
  { "http://:@host", G_URI_FLAGS_NONE, TRUE, 0,
710 711
    { "http", ":", "host", -1, "", NULL, NULL }
  },
712
  { "scheme://foo%3Abar._webdav._tcp.local", G_URI_FLAGS_NONE, TRUE, 0,
713 714
    { "scheme", NULL, "foo:bar._webdav._tcp.local", -1, "", NULL, NULL}
  },
715 716

  /* ".." past top */
717
  { "http://example.com/..", G_URI_FLAGS_NONE, TRUE, 0,
718 719 720 721
    { "http", NULL, "example.com", -1, "/..", NULL, NULL }
  },

  /* scheme parsing */
722
  { "foo0://host/path", G_URI_FLAGS_NONE, TRUE, 0,
723
    { "foo0", NULL, "host", -1, "/path", NULL, NULL } },
724
  { "f0.o://host/path", G_URI_FLAGS_NONE, TRUE, 0,
725
    { "f0.o", NULL, "host", -1, "/path", NULL, NULL } },
726
  { "http++://host/path", G_URI_FLAGS_NONE, TRUE, 0,
727
    { "http++", NULL, "host", -1, "/path", NULL, NULL } },
728
  { "http-ish://host/path", G_URI_FLAGS_NONE, TRUE, 0,
729 730 731
    { "http-ish", NULL, "host", -1, "/path", NULL, NULL } },

  /* IPv6 scope ID parsing (both correct and incorrect) */
732 733
  { "http://[fe80::dead:beef%]/", G_URI_FLAGS_PARSE_RELAXED, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
734
  { "http://[fe80::dead:beef%em1]/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
735
    { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } },
736 737
  { "http://[fe80::dead:beef%em1]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
738
  { "http://[fe80::dead:beef%25em1]/", G_URI_FLAGS_NONE, TRUE, 0,
739
    { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } },
740 741 742 743
  { "http://[fe80::dead:beef%25em1%20]/", G_URI_FLAGS_NONE, TRUE, 0,
    { "http", NULL, "fe80::dead:beef%em1 ", -1, "/", NULL, NULL } },
  { "http://[fe80::dead:beef%25em%31]/", G_URI_FLAGS_NONE, TRUE, 0,
    { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } },
744
  { "http://[fe80::dead:beef%10]/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
745
    { "http", NULL, "fe80::dead:beef%10", -1, "/", NULL, NULL } },
746 747
  { "http://[fe80::dead:beef%10]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
748
  { "http://[fe80::dead:beef%25]/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
749
    { "http", NULL, "fe80::dead:beef%25", -1, "/", NULL, NULL } },
750 751 752 753
  { "http://[fe80::dead:beef%25]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
  { "http://[192.168.0.1%25em1]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
754 755 756 757 758 759 760 761
  { "http://[fe80::dead:beef%2em1]/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
    { "http", NULL, "fe80::dead:beef%2em1", -1, "/", NULL, NULL } },
  { "http://[fe80::dead:beef%2em1]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
  { "http://[fe80::dead:beef%25em1%00]/", G_URI_FLAGS_PARSE_RELAXED, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
  { "http://[fe80::dead:beef%25em1%00]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
762 763 764 765 766
};

static void
test_uri_parsing_absolute (void)
{
767
  gsize i;
768

769
  for (i = 0; i < G_N_ELEMENTS (absolute_tests); i++)
770 771 772 773 774
    {
      const UriAbsoluteTest *test = &absolute_tests[i];
      GError *error = NULL;
      GUri *uri;

775
      g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, test->orig);
776 777

      uri = g_uri_parse (test->orig, test->flags, &error);
778 779 780
      if (test->expected_success)
        {
          g_assert_no_error (error);
781

782 783 784 785 786 787 788 789 790 791 792 793 794
          g_assert_cmpstr (g_uri_get_scheme (uri),   ==, test->expected_parts.scheme);
          g_assert_cmpstr (g_uri_get_userinfo (uri), ==, test->expected_parts.userinfo);
          g_assert_cmpstr (g_uri_get_host (uri),     ==, test->expected_parts.host);
          g_assert_cmpint (g_uri_get_port (uri),     ==, test->expected_parts.port);
          g_assert_cmpstr (g_uri_get_path (uri),     ==, test->expected_parts.path);
          g_assert_cmpstr (g_uri_get_query (uri),    ==, test->expected_parts.query);
          g_assert_cmpstr (g_uri_get_fragment (uri), ==, test->expected_parts.fragment);
        }
      else
        {
          g_assert_error (error, G_URI_ERROR, test->expected_error_code);
          g_assert_null (uri);
        }
795

796 797
      g_clear_pointer (&uri, g_uri_unref);
      g_clear_error (&error);

    }
}

typedef struct {
  const gchar *orig, *resolved;
  UriParts parts;
} UriRelativeTest;

/* This all comes from RFC 3986 */
static const char *relative_test_base = "http://a/b/c/d;p?q";
static const UriRelativeTest relative_tests[] = {
  { "g:h", "g:h",
    { "g", NULL, NULL, -1, "h", NULL, NULL } },
  { "g", "http://a/b/c/g",
    { "http", NULL, "a", -1, "/b/c/g", NULL, NULL } },
  { "./g", "http://a/b/c/g",
    { "http", NULL, "a", -1, "/b/c/g", NULL, NULL } },
  { "g/", "http://a/b/c/g/",
    { "http", NULL, "a", -1, "/b/c/g/", NULL, NULL } },
  { "/g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "//g", "http://g",
    { "http", NULL, "g", -1, "", NULL, NULL } },
  { "?y", "http://a/b/c/d;p?y",
    { "http", NULL, "a", -1, "/b/c/d;p", "y", NULL } },
  { "g?y", "http://a/b/c/g?y",
    { "http", NULL, "a", -1, "/b/c/g", "y", NULL } },
  { "#s", "http://a/b/c/d;p?q#s",
    { "http", NULL, "a", -1, "/b/c/d;p", "q", "s" } },
  { "g#s", "http://a/b/c/g#s",
    { "http", NULL, "a", -1, "/b/c/g", NULL, "s" } },
  { "g?y#s", "http://a/b/c/g?y#s",
    { "http", NULL, "a", -1, "/b/c/g", "y", "s" } },
  { ";x", "http://a/b/c/;x",
    { "http", NULL, "a", -1, "/b/c/;x", NULL, NULL } },
  { "g;x", "http://a/b/c/g;x",
    { "http", NULL, "a", -1, "/b/c/g;x", NULL, NULL } },
  { "g;x?y#s", "http://a/b/c/g;x?y#s",
    { "http", NULL, "a", -1, "/b/c/g;x", "y", "s" } },
  { ".", "http://a/b/c/",
    { "http", NULL, "a", -1, "/b/c/", NULL, NULL } },
  { "./", "http://a/b/c/",
    { "http", NULL, "a", -1, "/b/c/", NULL, NULL } },
  { "..", "http://a/b/",
    { "http", NULL, "a", -1, "/b/", NULL, NULL } },
  { "../", "http://a/b/",
    { "http", NULL, "a", -1, "/b/", NULL, NULL } },
  { "../g", "http://a/b/g",
    { "http", NULL, "a", -1, "/b/g", NULL, NULL } },
  { "../..", "http://a/",
    { "http", NULL, "a", -1, "/", NULL, NULL } },
  { "../../", "http://a/",
    { "http", NULL, "a", -1, "/", NULL, NULL } },
  { "../../g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "", "http://a/b/c/d;p?q",
    { "http", NULL, "a", -1, "/b/c/d;p", "q", NULL } },
  { "../../../g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "../../../../g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "/./g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "/../g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "g.", "http://a/b/c/g.",
    { "http", NULL, "a", -1, "/b/c/g.", NULL, NULL } },
  { ".g", "http://a/b/c/.g",
    { "http", NULL, "a", -1, "/b/c/.g", NULL, NULL } },
  { "g..", "http://a/b/c/g..",
    { "http", NULL, "a", -1, "/b/c/g..", NULL, NULL } },
  { "..g", "http://a/b/c/..g",
    { "http", NULL, "a", -1, "/b/c/..g", NULL, NULL } },
  { "./../g", "http://a/b/g",
    { "http", NULL, "a", -1, "/b/g", NULL, NULL } },
  { "./g/.", "http://a/b/c/g/",
    { "http", NULL, "a", -1, "/b/c/g/", NULL, NULL } },
  { "g/./h", "http://a/b/c/g/h",
    { "http", NULL, "a", -1, "/b/c/g/h", NULL, NULL } },
  { "g/../h", "http://a/b/c/h",
    { "http", NULL, "a", -1, "/b/c/h", NULL, NULL } },
  { "g;x=1/./y", "http://a/b/c/g;x=1/y",
    { "http", NULL, "a", -1, "/b/c/g;x=1/y", NULL, NULL } },
  { "g;x=1/../y", "http://a/b/c/y",
    { "http", NULL, "a", -1, "/b/c/y", NULL, NULL } },
  { "g?y/./x", "http://a/b/c/g?y/./x",
    { "http", NULL, "a", -1, "/b/c/g", "y/./x", NULL } },
  { "g?y/../x", "http://a/b/c/g?y/../x",
    { "http", NULL, "a", -1, "/b/c/g", "y/../x", NULL } },
  { "g#s/./x", "http://a/b/c/g#s/./x",
    { "http", NULL, "a", -1, "/b/c/g", NULL, "s/./x" } },
  { "g#s/../x", "http://a/b/c/g#s/../x",
    { "http", NULL, "a", -1, "/b/c/g", NULL, "s/../x" } },
  { "http:g", "http:g",
    { "http", NULL, NULL, -1, "g", NULL, NULL } },
  { "http://a/../..", "http://a/",
    { "http", NULL, "a", -1, "/", NULL, NULL } }
};
static int num_relative_tests = G_N_ELEMENTS (relative_tests);

static void
test_uri_parsing_relative (void)
{
  int i;
  GUri *base, *uri;
  GError *error = NULL;
  gchar *resolved;

  base = g_uri_parse (relative_test_base, G_URI_FLAGS_NONE, &error);
  g_assert_no_error (error);

  for (i = 0; i < num_relative_tests; i++)
    {
      const UriRelativeTest *test = &relative_tests[i];
      gchar *tostring;

      uri = g_uri_parse_relative (base