uri.c 62.2 KB
Newer Older
1 2 3 4 5 6
/* GLIB - Library of useful routines for C programming
 * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
7
 * version 2.1 of the License, or (at your option) any later version.
8 9 10 11 12 13 14
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 17 18 19 20 21
 */

/*
 * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
 * file for a list of people on the GLib Team.  See the ChangeLog
 * files for a list of changes.  These files are distributed with
22
 * GLib at ftp://ftp.gtk.org/pub/gtk/.
23 24 25 26 27 28 29
 */

#include <glib.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

30 31
#include "gstrfuncsprivate.h"

32 33 34 35 36 37
typedef struct
{
  char *filename;
  char *hostname;
  char *expected_result;
  GConvertError expected_error; /* If failed */
38
} FileToUriTest;
39

40 41
FileToUriTest
file_to_uri_tests[] = {
42 43
  { "/etc", NULL, "file:///etc"},
  { "/etc", "", "file:///etc"},
44
  { "/etc", "otherhost", "file://otherhost/etc"},
45
#ifdef G_OS_WIN32
46 47 48 49 50 51
  { "/etc", "localhost", "file:///etc"},
  { "c:\\windows", NULL, "file:///c:/windows"},
  { "c:\\windows", "localhost", "file:///c:/windows"},
  { "c:\\windows", "otherhost", "file://otherhost/c:/windows"},
  { "\\\\server\\share\\dir", NULL, "file:////server/share/dir"},
  { "\\\\server\\share\\dir", "localhost", "file:////server/share/dir"},
52
#else
53
  { "/etc", "localhost", "file://localhost/etc"},
54 55 56
  { "c:\\windows", NULL, NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH}, /* it's important to get this error on Unix */
  { "c:\\windows", "localhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "c:\\windows", "otherhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
57 58
#endif
  { "etc", "localhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
59
#ifndef G_PLATFORM_WIN32
60
  { "/etc/\xE5\xE4\xF6", NULL, "file:///etc/%E5%E4%F6" },
61
  { "/etc/\xC3\xB6\xC3\xA4\xC3\xA5", NULL, "file:///etc/%C3%B6%C3%A4%C3%A5"},
62
#endif
63
  { "/etc", "\xC3\xB6\xC3\xA4\xC3\xA5", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
64
  { "/etc", "\xE5\xE4\xF6", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
65
  { "/etc/file with #%", NULL, "file:///etc/file%20with%20%23%25"},
66 67 68 69 70 71 72 73
  { "", NULL, NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "", "", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "", "localhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "", "otherhost", NULL, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH},
  { "/0123456789", NULL, "file:///0123456789"},
  { "/ABCDEFGHIJKLMNOPQRSTUVWXYZ", NULL, "file:///ABCDEFGHIJKLMNOPQRSTUVWXYZ"},
  { "/abcdefghijklmnopqrstuvwxyz", NULL, "file:///abcdefghijklmnopqrstuvwxyz"},
  { "/-_.!~*'()", NULL, "file:///-_.!~*'()"},
74 75 76 77 78
#ifdef G_OS_WIN32
  /* As '\\' is a path separator on Win32, it gets turned into '/' in the URI */
  { "/\"#%<>[\\]^`{|}\x7F", NULL, "file:///%22%23%25%3C%3E%5B/%5D%5E%60%7B%7C%7D%7F"},
#else
  /* On Unix, '\\' is a normal character in the file name */
79
  { "/\"#%<>[\\]^`{|}\x7F", NULL, "file:///%22%23%25%3C%3E%5B%5C%5D%5E%60%7B%7C%7D%7F"},
80
#endif
81
  { "/;@+$,", NULL, "file:///%3B@+$,"},
82 83 84 85
  /* This and some of the following are of course as such illegal file names on Windows,
   * and would not occur in real life.
   */
  { "/:", NULL, "file:///:"},
86
  { "/?&=", NULL, "file:///%3F&="},
87
  { "/", "0123456789-", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
88 89
  { "/", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "file://ABCDEFGHIJKLMNOPQRSTUVWXYZ/"},
  { "/", "abcdefghijklmnopqrstuvwxyz", "file://abcdefghijklmnopqrstuvwxyz/"},
90 91 92 93 94
  { "/", "_.!~*'()", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
  { "/", "\"#%<>[\\]^`{|}\x7F", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
  { "/", ";?&=+$,", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
  { "/", "/", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
  { "/", "@:", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
95
  { "/", "\x80\xFF", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
96
  { "/", "\xC3\x80\xC3\xBF", NULL, G_CONVERT_ERROR_ILLEGAL_SEQUENCE},
97 98 99 100 101 102 103 104 105
};


typedef struct
{
  char *uri;
  char *expected_filename;
  char *expected_hostname;
  GConvertError expected_error; /* If failed */
106
} FileFromUriTest;
107

108 109
FileFromUriTest
file_from_uri_tests[] = {
110 111
  { "file:///etc", "/etc"},
  { "file:/etc", "/etc"},
112 113 114 115 116 117
#ifdef G_OS_WIN32
  /* On Win32 we don't return "localhost" hostames, just in case
   * it isn't recognized anyway.
   */
  { "file://localhost/etc", "/etc", NULL},
  { "file://localhost/etc/%23%25%20file", "/etc/#% file", NULL},
118 119
  { "file://localhost/\xE5\xE4\xF6", "/\xe5\xe4\xf6", NULL},
  { "file://localhost/%E5%E4%F6", "/\xe5\xe4\xf6", NULL},
120
#else
121 122
  { "file://localhost/etc", "/etc", "localhost"},
  { "file://localhost/etc/%23%25%20file", "/etc/#% file", "localhost"},
123 124
  { "file://localhost/\xE5\xE4\xF6", "/\xe5\xe4\xf6", "localhost"},
  { "file://localhost/%E5%E4%F6", "/\xe5\xe4\xf6", "localhost"},
125
#endif
126 127
  { "file://otherhost/etc", "/etc", "otherhost"},
  { "file://otherhost/etc/%23%25%20file", "/etc/#% file", "otherhost"},
128
  { "file://%C3%B6%C3%A4%C3%A5/etc", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
129
  { "file:////etc/%C3%B6%C3%C3%C3%A5", "//etc/\xc3\xb6\xc3\xc3\xc3\xa5", NULL},
130 131
  { "file://\xE5\xE4\xF6/etc", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://%E5%E4%F6/etc", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
132 133 134 135 136
  { "file:///some/file#bad", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://some", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file:test", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "http://www.yahoo.com/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
  { "file:////etc", "//etc"},
  { "file://///etc", "///etc"},
#ifdef G_OS_WIN32
  /* URIs with backslashes come from some nonstandard application, but accept them anyhow */
  { "file:///c:\\foo", "c:\\foo"},
  { "file:///c:/foo\\bar", "c:\\foo\\bar"},
  /* Accept also the old Netscape drive-letter-and-vertical bar convention */
  { "file:///c|/foo", "c:\\foo"},
  { "file:////server/share/dir", "\\\\server\\share\\dir"},
  { "file://localhost//server/share/foo", "\\\\server\\share\\foo"},
  { "file://otherhost//server/share/foo", "\\\\server\\share\\foo", "otherhost"},
#else
  { "file:///c:\\foo", "/c:\\foo"},
  { "file:///c:/foo", "/c:/foo"},
  { "file:////c:/foo", "//c:/foo"},
#endif
153
  { "file://0123456789/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
154 155
  { "file://ABCDEFGHIJKLMNOPQRSTUVWXYZ/", "/", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"},
  { "file://abcdefghijklmnopqrstuvwxyz/", "/", "abcdefghijklmnopqrstuvwxyz"},
156 157 158
  { "file://-_.!~*'()/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://\"<>[\\]^`{|}\x7F/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://;?&=+$,/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
159
  { "file://%C3%80%C3%BF/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
160 161
  { "file://@/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://:/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
162
  { "file://#/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
163 164
  { "file://%23/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
  { "file://%2F/", NULL, NULL, G_CONVERT_ERROR_BAD_URI},
165 166 167
};

static void
168
run_file_to_uri_tests (void)
169 170 171 172
{
  int i;
  gchar *res;
  GError *error;
173

174
  for (i = 0; i < G_N_ELEMENTS (file_to_uri_tests); i++)
175 176
    {
      error = NULL;
177 178
      res = g_filename_to_uri (file_to_uri_tests[i].filename,
                               file_to_uri_tests[i].hostname,
179 180 181
                               &error);

      if (res)
182
        g_assert_cmpstr (res, ==, file_to_uri_tests[i].expected_result);
183
      else
184
        g_assert_error (error, G_CONVERT_ERROR, file_to_uri_tests[i].expected_error);
185

186
      g_free (res);
187
      g_clear_error (&error);
188 189 190 191
    }
}

static void
192
run_file_from_uri_tests (void)
193 194 195 196 197
{
  int i;
  gchar *res;
  gchar *hostname;
  GError *error;
198

199
  for (i = 0; i < G_N_ELEMENTS (file_from_uri_tests); i++)
200 201
    {
      error = NULL;
202
      res = g_filename_from_uri (file_from_uri_tests[i].uri,
203 204
                                 &hostname,
                                 &error);
205

206
#ifdef G_OS_WIN32
207
      if (file_from_uri_tests[i].expected_filename)
208 209
        {
          gchar *p, *slash;
210 211
          p = file_from_uri_tests[i].expected_filename =
            g_strdup (file_from_uri_tests[i].expected_filename);
212 213 214 215 216 217
          while ((slash = strchr (p, '/')) != NULL)
            {
              *slash = '\\';
              p = slash + 1;
            }
        }
218
#endif
219
      if (res)
220
        g_assert_cmpstr (res, ==, file_from_uri_tests[i].expected_filename);
221
      else
222 223
        g_assert_error (error, G_CONVERT_ERROR, file_from_uri_tests[i].expected_error);
      g_assert_cmpstr (hostname, ==, file_from_uri_tests[i].expected_hostname);
224 225 226 227

      g_free (res);
      g_free (hostname);
      g_clear_error (&error);
228 229 230
    }
}

231 232 233
static gint
safe_strcmp_filename (const gchar *a, const gchar *b)
{
Tor Lillqvist's avatar
Tor Lillqvist committed
234
#ifndef G_OS_WIN32
235
  return g_strcmp0 (a, b);
236
#else
237
  if (!a || !b)
238
    return g_strcmp0 (a, b);
239 240 241
  else
    {
      while (*a && *b)
242 243 244 245 246 247 248
        {
          if ((G_IS_DIR_SEPARATOR (*a) && G_IS_DIR_SEPARATOR (*b)) ||
              *a == *b)
            a++, b++;
          else
            return (*a - *b);
        }
Tor Lillqvist's avatar
Tor Lillqvist committed
249
      return (*a - *b);
250 251 252 253
    }
#endif
}

Tor Lillqvist's avatar
Tor Lillqvist committed
254 255 256
static gint
safe_strcmp_hostname (const gchar *a, const gchar *b)
{
257 258 259 260
  if (a == NULL)
    a = "";
  if (b == NULL)
    b = "";
Tor Lillqvist's avatar
Tor Lillqvist committed
261
#ifndef G_OS_WIN32
262
  return strcmp (a, b);
Tor Lillqvist's avatar
Tor Lillqvist committed
263
#else
264
  if (strcmp (a, "localhost") == 0 && !*b)
Tor Lillqvist's avatar
Tor Lillqvist committed
265 266
    return 0;
  else
267
    return strcmp (a, b);
Tor Lillqvist's avatar
Tor Lillqvist committed
268 269 270
#endif
}

271
static void
272
run_file_roundtrip_tests (void)
273 274 275 276
{
  int i;
  gchar *uri, *hostname, *res;
  GError *error;
277

278
  for (i = 0; i < G_N_ELEMENTS (file_to_uri_tests); i++)
279
    {
280
      if (file_to_uri_tests[i].expected_error != 0)
281
        continue;
282 283

      error = NULL;
284 285
      uri = g_filename_to_uri (file_to_uri_tests[i].filename,
                               file_to_uri_tests[i].hostname,
286 287
                               &error);
      g_assert_no_error (error);
288

289 290 291
      hostname = NULL;
      res = g_filename_from_uri (uri, &hostname, &error);
      g_assert_no_error (error);
292

293 294
      g_assert_cmpint (safe_strcmp_filename (file_to_uri_tests[i].filename, res), ==, 0);
      g_assert_cmpint (safe_strcmp_hostname (file_to_uri_tests[i].hostname, hostname), ==, 0);
295 296 297
      g_free (res);
      g_free (uri);
      g_free (hostname);
298 299 300
    }
}

Matthias Clasen's avatar
Matthias Clasen committed
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
static void
run_uri_list_tests (void)
{
  /* straight from the RFC */
  gchar *list =
    "# urn:isbn:0-201-08372-8\r\n"
    "http://www.huh.org/books/foo.html\r\n"
    "http://www.huh.org/books/foo.pdf   \r\n"
    "   ftp://ftp.foo.org/books/foo.txt\r\n";
  gchar *expected_uris[] = {
    "http://www.huh.org/books/foo.html",
    "http://www.huh.org/books/foo.pdf",
    "ftp://ftp.foo.org/books/foo.txt"
  };

  gchar **uris;
  gint j;

  uris = g_uri_list_extract_uris (list);
320 321
  g_assert_cmpint (g_strv_length (uris), ==, 3);

Matthias Clasen's avatar
Matthias Clasen committed
322
  for (j = 0; j < 3; j++)
323
    g_assert_cmpstr (uris[j], ==, expected_uris[j]);
Matthias Clasen's avatar
Matthias Clasen committed
324 325 326 327

  g_strfreev (uris);

  uris = g_uri_list_extract_uris ("# just hot air\r\n# more hot air");
328
  g_assert_cmpint (g_strv_length (uris), ==, 0);
329
  g_strfreev (uris);
Matthias Clasen's avatar
Matthias Clasen committed
330 331
}

Matthias Clasen's avatar
Matthias Clasen committed
332
static void
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
test_uri_unescape_string (void)
{
  const struct
    {
      /* Inputs */
      const gchar *escaped;  /* (nullable) */
      const gchar *illegal_characters;  /* (nullable) */
      /* Outputs */
      const gchar *expected_unescaped;  /* (nullable) */
    }
  tests[] =
    {
      { "%2Babc %4F", NULL, "+abc O" },
      { "%2Babc %4F", "+", NULL },
      { "%00abc %4F", "+/", NULL },
348 349
      { "/cursors/none.png", "/", "/cursors/none.png" },
      { "/cursors%2fbad-subdir/none.png", "/", NULL },
350 351 352
      { "%0", NULL, NULL },
      { "%ra", NULL, NULL },
      { "%2r", NULL, NULL },
353
      { "Timm B\344der", NULL, "Timm B\344der" },
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
      { NULL, NULL, NULL },  /* actually a valid test, not a delimiter */
    };
  gsize i;

  for (i = 0; i < G_N_ELEMENTS (tests); i++)
    {
      gchar *s = NULL;

      g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, tests[i].escaped);

      s = g_uri_unescape_string (tests[i].escaped, tests[i].illegal_characters);
      g_assert_cmpstr (s, ==, tests[i].expected_unescaped);
      g_free (s);
    }
}

static void
371
test_uri_unescape_bytes (gconstpointer test_data)
372
{
373
  GError *error = NULL;
374
  gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
375 376 377 378
  const struct
    {
      /* Inputs */
      const gchar *escaped;  /* (nullable) */
379
      const gchar *illegal;
380 381 382 383 384 385
      /* Outputs */
      gssize expected_unescaped_len;  /* -1 => error expected */
      const guint8 *expected_unescaped;  /* (nullable) */
    }
  tests[] =
    {
386
      { "%00%00", NULL, 2, (const guint8 *) "\x00\x00" },
387
      { "/cursors/none.png", "/", 17, (const guint8 *) "/cursors/none.png" },
388 389 390
      { "/cursors%2fbad-subdir/none.png", "/", -1, NULL },
      { "%%", NULL, -1, NULL },
      { "%", NULL, -1, NULL },
391 392 393 394 395
    };
  gsize i;

  for (i = 0; i < G_N_ELEMENTS (tests); i++)
    {
396 397
      gssize escaped_len = 0;
      gchar *escaped = NULL;
398 399 400 401
      GBytes *bytes = NULL;

      g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, tests[i].escaped);

402 403 404 405 406 407 408 409 410 411 412 413
      /* The tests get run twice: once with the length unspecified, using a
       * nul-terminated string; and once with the length specified and a copy of
       * the string with the trailing nul explicitly removed (to help catch
       * buffer overflows). */
      if (use_nul_terminated)
        {
          escaped_len = -1;
          escaped = g_strdup (tests[i].escaped);
        }
      else
        {
          escaped_len = strlen (tests[i].escaped);  /* no trailing nul */
414
          escaped = g_memdup2 (tests[i].escaped, escaped_len);
415 416
        }

417
      bytes = g_uri_unescape_bytes (escaped, escaped_len, tests[i].illegal, &error);
418 419 420 421

      if (tests[i].expected_unescaped_len < 0)
        {
          g_assert_null (bytes);
422
          g_assert_error (error, G_URI_ERROR, G_URI_ERROR_FAILED);
423
          g_clear_error (&error);
424 425 426
        }
      else
        {
427
          g_assert_no_error (error);
428 429 430 431 432 433 434
          g_assert_cmpmem (g_bytes_get_data (bytes, NULL),
                           g_bytes_get_size (bytes),
                           tests[i].expected_unescaped,
                           tests[i].expected_unescaped_len);
        }

      g_clear_pointer (&bytes, g_bytes_unref);
435
      g_free (escaped);
436 437 438 439 440
    }
}

static void
test_uri_unescape_segment (void)
Matthias Clasen's avatar
Matthias Clasen committed
441
{
442
  const gchar *escaped_segment = "%2Babc %4F---";
443
  gchar *s = NULL;
444

445 446 447
  s = g_uri_unescape_segment (escaped_segment, escaped_segment + 10, NULL);
  g_assert_cmpstr (s, ==, "+abc O");
  g_free (s);
448 449 450

  s = g_uri_unescape_segment ("%2Babc%00cde", NULL, NULL);
  g_assert_null (s);
Matthias Clasen's avatar
Matthias Clasen committed
451 452
}

453
static void
454
test_uri_escape_string (void)
455
{
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
  const struct
    {
      /* Inputs */
      const gchar *unescaped;
      const gchar *reserved_chars_allowed;
      gboolean allow_utf8;
      /* Outputs */
      const gchar *expected_escaped;
    }
  tests[] =
    {
      { "abcdefgABCDEFG._~", NULL, FALSE, "abcdefgABCDEFG._~" },
      { ":+ \\?#", NULL, FALSE, "%3A%2B%20%5C%3F%23" },
      { "a+b:c", "+", FALSE, "a+b%3Ac" },
      { "a+b:c\303\234", "+", TRUE, "a+b%3Ac\303\234" },
471 472 473 474
      /* Incomplete UTF-8 sequence: */
      { "\xfc\x3b\xd2", NULL, TRUE, "%FC%3B%D2" },
      /* Invalid sequence: */
      { "\xc3\xb1\xc3\x28", NULL, TRUE, "ñ%C3%28" },
475 476
    };
  gsize i;
477

478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
  for (i = 0; i < G_N_ELEMENTS (tests); i++)
    {
      gchar *s = NULL;

      g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, tests[i].unescaped);

      s = g_uri_escape_string (tests[i].unescaped,
                               tests[i].reserved_chars_allowed,
                               tests[i].allow_utf8);
      g_assert_cmpstr (s, ==, tests[i].expected_escaped);
      g_free (s);
    }
}

static void
test_uri_escape_bytes (void)
{
  gchar *s = NULL;
496 497 498 499

  s = g_uri_escape_bytes ((guchar*)"\0\0", 2, NULL);
  g_assert_cmpstr (s, ==, "%00%00");
  g_free (s);
500 501 502 503 504
}

static void
test_uri_scheme (void)
{
505
  const gchar *s1, *s2;
506 507 508 509 510
  gchar *s;

  s = g_uri_parse_scheme ("ftp://ftp.gtk.org");
  g_assert_cmpstr (s, ==, "ftp");
  g_free (s);
511

512 513 514 515
  s = g_uri_parse_scheme ("good-scheme.but+weird:gtk.org");
  g_assert_cmpstr (s, ==, "good-scheme.but+weird");
  g_free (s);

516
  s = g_uri_parse_scheme ("1bad:");
517
  g_assert_null (s);
518
  s = g_uri_parse_scheme ("bad");
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
  g_assert_null (s);
  s = g_uri_parse_scheme ("99http://host/path");
  g_assert_null (s);
  s = g_uri_parse_scheme (".http://host/path");
  g_assert_null (s);
  s = g_uri_parse_scheme ("+http://host/path");
  g_assert_null (s);

  s1 = g_uri_peek_scheme ("ftp://ftp.gtk.org");
  g_assert_cmpstr (s1, ==, "ftp");
  s2 = g_uri_peek_scheme ("FTP://ftp.gtk.org");
  g_assert_cmpstr (s2, ==, "ftp");
  g_assert_true (s1 == s2);
  s1 = g_uri_peek_scheme ("1bad:");
  g_assert_null (s1);
  s1 = g_uri_peek_scheme ("bad");
  g_assert_null (s1);
}

typedef struct {
  const gchar *scheme;
  const gchar *userinfo;
  const gchar *host;
  gint         port;
  const gchar *path;
  const gchar *query;
  const gchar *fragment;
} UriParts;

typedef struct {
549
  /* Inputs */
550
  const gchar *orig;
551
  GUriFlags flags;
552 553 554 555
  /* Outputs */
  gboolean expected_success;
  GUriError expected_error_code;  /* unused if @expected_success is true */
  const UriParts expected_parts;  /* unused if @expected_success is false */
556 557 558
} UriAbsoluteTest;

static const UriAbsoluteTest absolute_tests[] = {
559
  { "foo:", G_URI_FLAGS_NONE, TRUE, 0,
560 561
    { "foo", NULL, NULL, -1, "", NULL, NULL }
  },
562
  { "file:/dev/null", G_URI_FLAGS_NONE, TRUE, 0,
563 564
    { "file", NULL, NULL, -1, "/dev/null", NULL, NULL }
  },
565
  { "file:///dev/null", G_URI_FLAGS_NONE, TRUE, 0,
566 567
    { "file", NULL, "", -1, "/dev/null", NULL, NULL }
  },
568
  { "ftp://user@host/path", G_URI_FLAGS_NONE, TRUE, 0,
569 570
    { "ftp", "user", "host", -1, "/path", NULL, NULL }
  },
571
  { "ftp://user@host:9999/path", G_URI_FLAGS_NONE, TRUE, 0,
572 573
    { "ftp", "user", "host", 9999, "/path", NULL, NULL }
  },
574
  { "ftp://user:password@host/path", G_URI_FLAGS_NONE, TRUE, 0,
575 576
    { "ftp", "user:password", "host", -1, "/path", NULL, NULL }
  },
577
  { "ftp://user:password@host:9999/path", G_URI_FLAGS_NONE, TRUE, 0,
578 579
    { "ftp", "user:password", "host", 9999, "/path", NULL, NULL }
  },
580
  { "ftp://user:password@host", G_URI_FLAGS_NONE, TRUE, 0,
581 582
    { "ftp", "user:password", "host", -1, "", NULL, NULL }
  },
583
  { "http://us%65r@host", G_URI_FLAGS_NONE, TRUE, 0,
584 585
    { "http", "user", "host", -1, "", NULL, NULL }
  },
586
  { "http://us%40r@host", G_URI_FLAGS_NONE, TRUE, 0,
587 588
    { "http", "us@r", "host", -1, "", NULL, NULL }
  },
589
  { "http://us%3ar@host", G_URI_FLAGS_NONE, TRUE, 0,
590 591
    { "http", "us:r", "host", -1, "", NULL, NULL }
  },
592
  { "http://us%2fr@host", G_URI_FLAGS_NONE, TRUE, 0,
593 594
    { "http", "us/r", "host", -1, "", NULL, NULL }
  },
595
  { "http://us%3fr@host", G_URI_FLAGS_NONE, TRUE, 0,
596 597
    { "http", "us?r", "host", -1, "", NULL, NULL }
  },
598
  { "http://host?query", G_URI_FLAGS_NONE, TRUE, 0,
599 600
    { "http", NULL, "host", -1, "", "query", NULL }
  },
601
  { "http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fchildparam%3Dchildvalue&param=value", G_URI_FLAGS_NONE, TRUE, 0,
602 603
    { "http", NULL, "host", -1, "/path", "query=http://host/path?childparam=childvalue&param=value", NULL }
  },
604
  { "http://control-chars/%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F", G_URI_FLAGS_NONE, TRUE, 0,
605 606
    { "http", NULL, "control-chars", -1, "/\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F", NULL, NULL }
  },
607
  { "http://space/%20", G_URI_FLAGS_NONE, TRUE, 0,
608 609
    { "http", NULL, "space", -1, "/ ", NULL, NULL }
  },
610
  { "http://delims/%3C%3E%23%25%22", G_URI_FLAGS_NONE, TRUE, 0,
611 612
    { "http", NULL, "delims", -1, "/<>#%\"", NULL, NULL }
  },
613
  { "http://unwise-chars/%7B%7D%7C%5C%5E%5B%5D%60", G_URI_FLAGS_NONE, TRUE, 0,
614 615 616 617
    { "http", NULL, "unwise-chars", -1, "/{}|\\^[]`", NULL, NULL }
  },

  /* From RFC 2732 */
618
  { "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html", G_URI_FLAGS_NONE, TRUE, 0,
619 620
    { "http", NULL, "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", 80, "/index.html", NULL, NULL }
  },
621
  { "http://[1080:0:0:0:8:800:200C:417A]/index.html", G_URI_FLAGS_NONE, TRUE, 0,
622 623
    { "http", NULL, "1080:0:0:0:8:800:200C:417A", -1, "/index.html", NULL, NULL }
  },
624
  { "http://[3ffe:2a00:100:7031::1]", G_URI_FLAGS_NONE, TRUE, 0,
625 626
    { "http", NULL, "3ffe:2a00:100:7031::1", -1, "", NULL, NULL }
  },
627
  { "http://[1080::8:800:200C:417A]/foo", G_URI_FLAGS_NONE, TRUE, 0,
628 629
    { "http", NULL, "1080::8:800:200C:417A", -1, "/foo", NULL, NULL }
  },
630
  { "http://[::192.9.5.5]/ipng", G_URI_FLAGS_NONE, TRUE, 0,
631 632
    { "http", NULL, "::192.9.5.5", -1, "/ipng", NULL, NULL }
  },
633
  { "http://[::FFFF:129.144.52.38]:80/index.html", G_URI_FLAGS_NONE, TRUE, 0,
634 635
    { "http", NULL, "::FFFF:129.144.52.38", 80, "/index.html", NULL, NULL }
  },
636
  { "http://[2010:836B:4179::836B:4179]", G_URI_FLAGS_NONE, TRUE, 0,
637 638 639 640
    { "http", NULL, "2010:836B:4179::836B:4179", -1, "", NULL, NULL }
  },

  /* some problematic URIs that are handled differently in libsoup */
641
  { "http://host/path with spaces", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
642 643
    { "http", NULL, "host", -1, "/path with spaces", NULL, NULL }
  },
644
  { "  http://host/path", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
645 646
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },
647
  { "http://host/path  ", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
648 649
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },
650
  { "http://host  ", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
651 652
    { "http", NULL, "host", -1, "", NULL, NULL }
  },
653
  { "http://host:999  ", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
654 655
    { "http", NULL, "host", 999, "", NULL, NULL }
  },
656
  { "http://host/pa\nth", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
657 658
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },
659
  { "http:\r\n//host/path", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
660 661
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },
662
  { "http://\thost/path", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
663 664 665 666
    { "http", NULL, "host", -1, "/path", NULL, NULL }
  },

  /* Bug 594405; 0-length is different from not-present */
667
  { "http://host/path?", G_URI_FLAGS_NONE, TRUE, 0,
668 669
    { "http", NULL, "host", -1, "/path", "", NULL }
  },
670
  { "http://host/path#", G_URI_FLAGS_NONE, TRUE, 0,
671 672 673 674
    { "http", NULL, "host", -1, "/path", NULL, "" },
  },

  /* Bug 590524; ignore bad %-encoding */
675
  { "http://host/path%", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
676 677
    { "http", NULL, "host", -1, "/path%", NULL, NULL }
  },
678
  { "http://h%ost/path", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
679 680
    { "http", NULL, "h%ost", -1, "/path", NULL, NULL }
  },
681
  { "http://host/path%%", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
682 683
    { "http", NULL, "host", -1, "/path%%", NULL, NULL }
  },
684
  { "http://host/path%%%", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
685 686
    { "http", NULL, "host", -1, "/path%%%", NULL, NULL }
  },
687
  { "http://host/path%/x/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
688 689
    { "http", NULL, "host", -1, "/path%/x/", NULL, NULL }
  },
690
  { "http://host/path%0x/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
691 692
    { "http", NULL, "host", -1, "/path%0x/", NULL, NULL }
  },
693
  { "http://host/path%ax", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
694 695 696 697
    { "http", NULL, "host", -1, "/path%ax", NULL, NULL }
  },

  /* GUri doesn't %-encode non-ASCII characters */
698
  { "http://host/p\xc3\xa4th/", G_URI_FLAGS_NONE, TRUE, 0,
699 700 701
    { "http", NULL, "host", -1, "/p\xc3\xa4th/", NULL, NULL }
  },

702
  { "HTTP:////////////////", G_URI_FLAGS_NONE, TRUE, 0,
703 704 705
    { "http", NULL, "", -1, "//////////////", NULL, NULL }
  },

706
  { "http://@host", G_URI_FLAGS_NONE, TRUE, 0,
707 708
    { "http", "", "host", -1, "", NULL, NULL }
  },
709
  { "http://:@host", G_URI_FLAGS_NONE, TRUE, 0,
710 711
    { "http", ":", "host", -1, "", NULL, NULL }
  },
712
  { "scheme://foo%3Abar._webdav._tcp.local", G_URI_FLAGS_NONE, TRUE, 0,
713 714
    { "scheme", NULL, "foo:bar._webdav._tcp.local", -1, "", NULL, NULL}
  },
715 716

  /* ".." past top */
717
  { "http://example.com/..", G_URI_FLAGS_NONE, TRUE, 0,
718 719 720 721
    { "http", NULL, "example.com", -1, "/..", NULL, NULL }
  },

  /* scheme parsing */
722
  { "foo0://host/path", G_URI_FLAGS_NONE, TRUE, 0,
723
    { "foo0", NULL, "host", -1, "/path", NULL, NULL } },
724
  { "f0.o://host/path", G_URI_FLAGS_NONE, TRUE, 0,
725
    { "f0.o", NULL, "host", -1, "/path", NULL, NULL } },
726
  { "http++://host/path", G_URI_FLAGS_NONE, TRUE, 0,
727
    { "http++", NULL, "host", -1, "/path", NULL, NULL } },
728
  { "http-ish://host/path", G_URI_FLAGS_NONE, TRUE, 0,
729 730 731
    { "http-ish", NULL, "host", -1, "/path", NULL, NULL } },

  /* IPv6 scope ID parsing (both correct and incorrect) */
732 733
  { "http://[fe80::dead:beef%]/", G_URI_FLAGS_PARSE_RELAXED, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
734
  { "http://[fe80::dead:beef%em1]/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
735
    { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } },
736 737
  { "http://[fe80::dead:beef%em1]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
738
  { "http://[fe80::dead:beef%25em1]/", G_URI_FLAGS_NONE, TRUE, 0,
739
    { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } },
740 741 742 743
  { "http://[fe80::dead:beef%25em1%20]/", G_URI_FLAGS_NONE, TRUE, 0,
    { "http", NULL, "fe80::dead:beef%em1 ", -1, "/", NULL, NULL } },
  { "http://[fe80::dead:beef%25em%31]/", G_URI_FLAGS_NONE, TRUE, 0,
    { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } },
744
  { "http://[fe80::dead:beef%10]/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
745
    { "http", NULL, "fe80::dead:beef%10", -1, "/", NULL, NULL } },
746 747
  { "http://[fe80::dead:beef%10]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
748
  { "http://[fe80::dead:beef%25]/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
749
    { "http", NULL, "fe80::dead:beef%25", -1, "/", NULL, NULL } },
750 751 752 753
  { "http://[fe80::dead:beef%25]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
  { "http://[192.168.0.1%25em1]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
754 755 756 757 758 759 760 761
  { "http://[fe80::dead:beef%2em1]/", G_URI_FLAGS_PARSE_RELAXED, TRUE, 0,
    { "http", NULL, "fe80::dead:beef%2em1", -1, "/", NULL, NULL } },
  { "http://[fe80::dead:beef%2em1]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
  { "http://[fe80::dead:beef%25em1%00]/", G_URI_FLAGS_PARSE_RELAXED, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
  { "http://[fe80::dead:beef%25em1%00]/", G_URI_FLAGS_NONE, FALSE, G_URI_ERROR_BAD_HOST,
    { NULL, NULL, NULL, -1, NULL, NULL, NULL } },
762 763 764 765 766
};

static void
test_uri_parsing_absolute (void)
{
767
  gsize i;
768

769
  for (i = 0; i < G_N_ELEMENTS (absolute_tests); i++)
770 771 772 773 774
    {
      const UriAbsoluteTest *test = &absolute_tests[i];
      GError *error = NULL;
      GUri *uri;

775
      g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, test->orig);
776 777

      uri = g_uri_parse (test->orig, test->flags, &error);
778 779 780
      if (test->expected_success)
        {
          g_assert_no_error (error);
781

782 783 784 785 786 787 788 789 790 791 792 793 794
          g_assert_cmpstr (g_uri_get_scheme (uri),   ==, test->expected_parts.scheme);
          g_assert_cmpstr (g_uri_get_userinfo (uri), ==, test->expected_parts.userinfo);
          g_assert_cmpstr (g_uri_get_host (uri),     ==, test->expected_parts.host);
          g_assert_cmpint (g_uri_get_port (uri),     ==, test->expected_parts.port);
          g_assert_cmpstr (g_uri_get_path (uri),     ==, test->expected_parts.path);
          g_assert_cmpstr (g_uri_get_query (uri),    ==, test->expected_parts.query);
          g_assert_cmpstr (g_uri_get_fragment (uri), ==, test->expected_parts.fragment);
        }
      else
        {
          g_assert_error (error, G_URI_ERROR, test->expected_error_code);
          g_assert_null (uri);
        }
795

796 797
      g_clear_pointer (&uri, g_uri_unref);
      g_clear_error (&error);
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
    }
}

typedef struct {
  const gchar *orig, *resolved;
  UriParts parts;
} UriRelativeTest;

/* This all comes from RFC 3986 */
static const char *relative_test_base = "http://a/b/c/d;p?q";
static const UriRelativeTest relative_tests[] = {
  { "g:h", "g:h",
    { "g", NULL, NULL, -1, "h", NULL, NULL } },
  { "g", "http://a/b/c/g",
    { "http", NULL, "a", -1, "/b/c/g", NULL, NULL } },
  { "./g", "http://a/b/c/g",
    { "http", NULL, "a", -1, "/b/c/g", NULL, NULL } },
  { "g/", "http://a/b/c/g/",
    { "http", NULL, "a", -1, "/b/c/g/", NULL, NULL } },
  { "/g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "//g", "http://g",
    { "http", NULL, "g", -1, "", NULL, NULL } },
  { "?y", "http://a/b/c/d;p?y",
    { "http", NULL, "a", -1, "/b/c/d;p", "y", NULL } },
  { "g?y", "http://a/b/c/g?y",
    { "http", NULL, "a", -1, "/b/c/g", "y", NULL } },
  { "#s", "http://a/b/c/d;p?q#s",
    { "http", NULL, "a", -1, "/b/c/d;p", "q", "s" } },
  { "g#s", "http://a/b/c/g#s",
    { "http", NULL, "a", -1, "/b/c/g", NULL, "s" } },
  { "g?y#s", "http://a/b/c/g?y#s",
    { "http", NULL, "a", -1, "/b/c/g", "y", "s" } },
  { ";x", "http://a/b/c/;x",
    { "http", NULL, "a", -1, "/b/c/;x", NULL, NULL } },
  { "g;x", "http://a/b/c/g;x",
    { "http", NULL, "a", -1, "/b/c/g;x", NULL, NULL } },
  { "g;x?y#s", "http://a/b/c/g;x?y#s",
    { "http", NULL, "a", -1, "/b/c/g;x", "y", "s" } },
  { ".", "http://a/b/c/",
    { "http", NULL, "a", -1, "/b/c/", NULL, NULL } },
  { "./", "http://a/b/c/",
    { "http", NULL, "a", -1, "/b/c/", NULL, NULL } },
  { "..", "http://a/b/",
    { "http", NULL, "a", -1, "/b/", NULL, NULL } },
  { "../", "http://a/b/",
    { "http", NULL, "a", -1, "/b/", NULL, NULL } },
  { "../g", "http://a/b/g",
    { "http", NULL, "a", -1, "/b/g", NULL, NULL } },
  { "../..", "http://a/",
    { "http", NULL, "a", -1, "/", NULL, NULL } },
  { "../../", "http://a/",
    { "http", NULL, "a", -1, "/", NULL, NULL } },
  { "../../g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "", "http://a/b/c/d;p?q",
    { "http", NULL, "a", -1, "/b/c/d;p", "q", NULL } },
  { "../../../g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "../../../../g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "/./g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "/../g", "http://a/g",
    { "http", NULL, "a", -1, "/g", NULL, NULL } },
  { "g.", "http://a/b/c/g.",
    { "http", NULL, "a", -1, "/b/c/g.", NULL, NULL } },
  { ".g", "http://a/b/c/.g",
    { "http", NULL, "a", -1, "/b/c/.g", NULL, NULL } },
  { "g..", "http://a/b/c/g..",
    { "http", NULL, "a", -1, "/b/c/g..", NULL, NULL } },
  { "..g", "http://a/b/c/..g",
    { "http", NULL, "a", -1, "/b/c/..g", NULL, NULL } },
  { "./../g", "http://a/b/g",
    { "http", NULL, "a", -1, "/b/g", NULL, NULL } },
  { "./g/.", "http://a/b/c/g/",
    { "http", NULL, "a", -1, "/b/c/g/", NULL, NULL } },
  { "g/./h", "http://a/b/c/g/h",
    { "http", NULL, "a", -1, "/b/c/g/h", NULL, NULL } },
  { "g/../h", "http://a/b/c/h",
    { "http", NULL, "a", -1, "/b/c/h", NULL, NULL } },
  { "g;x=1/./y", "http://a/b/c/g;x=1/y",
    { "http", NULL, "a", -1, "/b/c/g;x=1/y", NULL, NULL } },
  { "g;x=1/../y", "http://a/b/c/y",
    { "http", NULL, "a", -1, "/b/c/y", NULL, NULL } },
  { "g?y/./x", "http://a/b/c/g?y/./x",
    { "http", NULL, "a", -1, "/b/c/g", "y/./x", NULL } },
  { "g?y/../x", "http://a/b/c/g?y/../x",
    { "http", NULL, "a", -1, "/b/c/g", "y/../x", NULL } },
  { "g#s/./x", "http://a/b/c/g#s/./x",
    { "http", NULL, "a", -1, "/b/c/g", NULL, "s/./x" } },
  { "g#s/../x", "http://a/b/c/g#s/../x",
    { "http", NULL, "a", -1, "/b/c/g", NULL, "s/../x" } },
  { "http:g", "http:g",
    { "http", NULL, NULL, -1, "g", NULL, NULL } },
  { "http://a/../..", "http://a/",
    { "http", NULL, "a", -1, "/", NULL, NULL } }
};
static int num_relative_tests = G_N_ELEMENTS (relative_tests);

static void
test_uri_parsing_relative (void)
{
  int i;
  GUri *base, *uri;
  GError *error = NULL;
  gchar *resolved;

  base = g_uri_parse (relative_test_base, G_URI_FLAGS_NONE, &error);
  g_assert_no_error (error);

  for (i = 0; i < num_relative_tests; i++)
    {
      const UriRelativeTest *test = &relative_tests[i];
      gchar *tostring;

      uri = g_uri_parse_relative (base