From 709373586d3eba56997ad72b83b804e1b45291ac Mon Sep 17 00:00:00 2001 From: Patrick Griffis Date: Thu, 27 Mar 2025 14:43:26 -0500 Subject: [PATCH 1/6] cookie: Always normalize domain value In order for libpsl to give accurate results the domain must be lowercased. To make it easiest we normalize it at construction time of the cookie. --- libsoup/cookies/soup-cookie-jar.c | 11 ++++++++--- libsoup/cookies/soup-cookie.c | 22 +++++++++++++++++++--- libsoup/soup-tld.c | 11 ++++++++++- libsoup/soup-uri-utils-private.h | 2 ++ libsoup/soup-uri-utils.c | 18 ++++++++++++++++++ tests/cookies-test.c | 19 +++++++++++++++++++ 6 files changed, 76 insertions(+), 7 deletions(-) diff --git a/libsoup/cookies/soup-cookie-jar.c b/libsoup/cookies/soup-cookie-jar.c index f79cc3f9..bfe18af9 100644 --- a/libsoup/cookies/soup-cookie-jar.c +++ b/libsoup/cookies/soup-cookie-jar.c @@ -519,6 +519,7 @@ incoming_cookie_is_third_party (SoupCookieJar *jar, { SoupCookieJarPrivate *priv; const char *normalized_cookie_domain; + char *normalized_first_party_host; const char *cookie_base_domain; const char *first_party_base_domain; const char *first_party_host; @@ -540,12 +541,16 @@ incoming_cookie_is_third_party (SoupCookieJar *jar, if (cookie_base_domain == NULL) cookie_base_domain = soup_cookie_get_domain (cookie); - first_party_base_domain = soup_tld_get_base_domain (first_party_host, NULL); + normalized_first_party_host = soup_uri_normalize_domain (first_party_host); + first_party_base_domain = soup_tld_get_base_domain (normalized_first_party_host, NULL); if (first_party_base_domain == NULL) - first_party_base_domain = first_party_host; + first_party_base_domain = normalized_first_party_host; - if (soup_host_matches_host (cookie_base_domain, first_party_base_domain)) + if (soup_host_matches_host (cookie_base_domain, first_party_base_domain)) { + g_free (normalized_first_party_host); return FALSE; + } + g_free (normalized_first_party_host); if (policy == SOUP_COOKIE_JAR_ACCEPT_NO_THIRD_PARTY) return TRUE; diff --git a/libsoup/cookies/soup-cookie.c b/libsoup/cookies/soup-cookie.c index ba949239..6132a8d0 100644 --- a/libsoup/cookies/soup-cookie.c +++ b/libsoup/cookies/soup-cookie.c @@ -168,6 +168,16 @@ parse_date (const char **val_p) return date; } +static gboolean +is_lowercase_ascii_string (const char *str) +{ + for (; *str; str++) { + if (!g_ascii_islower (*str)) + return FALSE; + } + return TRUE; +} + #define MAX_AGE_CAP_IN_SECONDS 31536000 // 1 year #define MAX_ATTRIBUTE_SIZE 1024 @@ -311,6 +321,12 @@ parse_one_cookie (const char *header, GUri *origin) g_free (cookie->domain); cookie->domain = tmp; } + + if (!is_lowercase_ascii_string (cookie->domain)) { + char *tmp = soup_uri_normalize_domain (cookie->domain); + g_free (cookie->domain); + cookie->domain = tmp; + } } if (origin) { @@ -321,7 +337,7 @@ parse_one_cookie (const char *header, GUri *origin) return NULL; } } else - cookie->domain = g_strdup (g_uri_get_host (origin)); + cookie->domain = soup_uri_normalize_domain (g_uri_get_host (origin)); /* The original cookie spec didn't say that pages * could only set cookies for paths they were under. @@ -364,7 +380,7 @@ cookie_new_internal (const char *name, const char *value, cookie = g_slice_new0 (SoupCookie); cookie->name = g_strdup (name); cookie->value = g_strdup (value); - cookie->domain = g_strdup (domain); + cookie->domain = soup_uri_normalize_domain (domain); cookie->path = g_strdup (path); soup_cookie_set_max_age (cookie, max_age); cookie->same_site_policy = SOUP_SAME_SITE_POLICY_LAX; @@ -537,7 +553,7 @@ void soup_cookie_set_domain (SoupCookie *cookie, const char *domain) { g_free (cookie->domain); - cookie->domain = g_strdup (domain); + cookie->domain = soup_uri_normalize_domain (domain); } /** diff --git a/libsoup/soup-tld.c b/libsoup/soup-tld.c index 02e54059..398d94e9 100644 --- a/libsoup/soup-tld.c +++ b/libsoup/soup-tld.c @@ -15,6 +15,7 @@ #include #include "soup-tld.h" +#include "soup-uri-utils-private.h" #include "soup.h" static const char *soup_tld_get_base_domain_internal (const char *hostname, @@ -41,6 +42,8 @@ static const char *soup_tld_get_base_domain_internal (const char *hostname, * UTF-8 or ASCII format (and the return value will be in the same * format). * + * For accurate results @hostname must be lowercase. + * * Returns: a pointer to the start of the base domain in @hostname. If * an error occurs, %NULL will be returned and @error set. **/ @@ -80,6 +83,8 @@ gboolean soup_tld_domain_is_public_suffix (const char *domain) { const psl_ctx_t* psl = soup_psl_context (); + char *normalized; + gboolean is_public_suffix; g_return_val_if_fail (domain, FALSE); @@ -88,7 +93,11 @@ soup_tld_domain_is_public_suffix (const char *domain) return FALSE; } - return psl_is_public_suffix2 (psl, domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE); + normalized = soup_uri_normalize_domain (domain); + is_public_suffix = psl_is_public_suffix2 (psl, domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE); + g_free (normalized); + + return is_public_suffix; } /** diff --git a/libsoup/soup-uri-utils-private.h b/libsoup/soup-uri-utils-private.h index 0119f081..c2f984f8 100644 --- a/libsoup/soup-uri-utils-private.h +++ b/libsoup/soup-uri-utils-private.h @@ -28,6 +28,8 @@ GUri *soup_uri_copy_with_normalized_flags (GUri *uri); char *soup_uri_get_host_for_headers (GUri *uri); +char *soup_uri_normalize_domain (const char *domain); + #define SOUP_URI_IS_VALID(x) ((x) && g_uri_get_host(x) && g_uri_get_host(x)[0]) G_END_DECLS diff --git a/libsoup/soup-uri-utils.c b/libsoup/soup-uri-utils.c index 0963a114..1f65faed 100644 --- a/libsoup/soup-uri-utils.c +++ b/libsoup/soup-uri-utils.c @@ -506,3 +506,21 @@ soup_uri_get_host_for_headers (GUri *uri) return g_strdup (host); } + +char * +soup_uri_normalize_domain (const char *domain) +{ + char *lower; + char *normalized; + + g_assert (domain); + + if (g_str_is_ascii (domain)) + return g_ascii_strdown (domain, -1); + + lower = g_utf8_casefold (domain, -1); + normalized = g_utf8_normalize (lower, -1, G_NORMALIZE_NFKC); + g_free (lower); + + return normalized; +} diff --git a/tests/cookies-test.c b/tests/cookies-test.c index 1d2d4563..7007aaf5 100644 --- a/tests/cookies-test.c +++ b/tests/cookies-test.c @@ -695,6 +695,24 @@ do_cookies_threads_test (void) soup_test_session_abort_unref (session); } +static void +do_cookies_public_suffix_test (void) +{ + SoupCookieJar *jar = soup_cookie_jar_new (); + GUri *uri = g_uri_parse ("http://example.CO.uk", SOUP_HTTP_URI_FLAGS, NULL); + GSList *cookies; + + soup_cookie_jar_set_cookie (jar, uri, "value=1; domain=.co.uk"); + soup_cookie_jar_set_cookie (jar, uri, "value=1; domain=.CO.uk"); + soup_cookie_jar_set_cookie (jar, uri, "value=1; domain=.CO.UK"); + + cookies = soup_cookie_jar_all_cookies (jar); + g_assert_cmpint (g_slist_length (cookies), ==, 0); + + g_uri_unref (uri); + g_object_unref (jar); +} + int main (int argc, char **argv) { @@ -726,6 +744,7 @@ main (int argc, char **argv) g_test_add_func ("/cookies/secure-cookies", do_cookies_strict_secure_test); g_test_add_func ("/cookies/prefix", do_cookies_prefix_test); g_test_add_func ("/cookies/threads", do_cookies_threads_test); + g_test_add_func ("/cookies/public-suffix", do_cookies_public_suffix_test); ret = g_test_run (); -- GitLab From aec2261f9f9e1b2e74ca4384f621d182a4f93583 Mon Sep 17 00:00:00 2001 From: Patrick Griffis Date: Sun, 20 Apr 2025 15:50:51 -0500 Subject: [PATCH 2/6] 3.7.0 --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index b31a8791..6bc73710 100644 --- a/meson.build +++ b/meson.build @@ -1,5 +1,5 @@ project('libsoup', 'c', - version: '3.6.4', + version: '3.7.0', meson_version : '>= 0.54', license : 'LGPL-2.0-or-later', default_options : [ -- GitLab From 5115bf1b03d9b76e13eb9e5a3e1c6db0c627ef85 Mon Sep 17 00:00:00 2001 From: Patrick Griffis Date: Sun, 20 Apr 2025 15:52:47 -0500 Subject: [PATCH 3/6] Add soup_tld_get_base_domain_normalized() Resolves: #443 --- libsoup/soup-tld.c | 35 +++++++++++++++++++++++++++++++++-- libsoup/soup-tld.h | 4 ++++ tests/tld-test.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 2 deletions(-) diff --git a/libsoup/soup-tld.c b/libsoup/soup-tld.c index 398d94e9..adbb3911 100644 --- a/libsoup/soup-tld.c +++ b/libsoup/soup-tld.c @@ -42,7 +42,8 @@ static const char *soup_tld_get_base_domain_internal (const char *hostname, * UTF-8 or ASCII format (and the return value will be in the same * format). * - * For accurate results @hostname must be lowercase. + * For accurate results @hostname must be lowercase. Otherwise use + * [func@Soup.tld_get_base_domain_normalized]. * * Returns: a pointer to the start of the base domain in @hostname. If * an error occurs, %NULL will be returned and @error set. @@ -55,6 +56,36 @@ soup_tld_get_base_domain (const char *hostname, GError **error) return soup_tld_get_base_domain_internal (hostname, error); } +/** + * soup_tld_get_base_domain_normalized: + * @hostname: a hostname + * @error: return location for a #GError, or %NULL to ignore + * errors. See #SoupTLDError for the available error codes + * + * Finds the base domain for a given @hostname. See [func@tld_get_base_domain] + * for specific details. + * + * The difference between that function and this is that @hostname will + * first be normalized and this returns a copy of the base domain. + * + * Since: 3.8 + * Returns: the base domain in @hostname. If + * an error occurs, %NULL will be returned and @error set. + **/ +char * +soup_tld_get_base_domain_normalized (const char *hostname, GError **error) +{ + g_return_val_if_fail (hostname, NULL); + g_return_val_if_fail (error == NULL || *error == NULL, NULL); + + char *normalized = soup_uri_normalize_domain (hostname); + const char *base = soup_tld_get_base_domain (normalized, error); + char *ret = g_strdup (base); + g_free (normalized); + + return ret; +} + static psl_ctx_t * soup_psl_context (void) { @@ -94,7 +125,7 @@ soup_tld_domain_is_public_suffix (const char *domain) } normalized = soup_uri_normalize_domain (domain); - is_public_suffix = psl_is_public_suffix2 (psl, domain, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE); + is_public_suffix = psl_is_public_suffix2 (psl, normalized, PSL_TYPE_ANY | PSL_TYPE_NO_STAR_RULE); g_free (normalized); return is_public_suffix; diff --git a/libsoup/soup-tld.h b/libsoup/soup-tld.h index 4825cd34..53756b5d 100644 --- a/libsoup/soup-tld.h +++ b/libsoup/soup-tld.h @@ -13,6 +13,10 @@ SOUP_AVAILABLE_IN_ALL const char *soup_tld_get_base_domain (const char *hostname, GError **error); +SOUP_AVAILABLE_IN_3_8 +char *soup_tld_get_base_domain_normalized (const char *hostname, + GError **error); + SOUP_AVAILABLE_IN_ALL gboolean soup_tld_domain_is_public_suffix (const char *domain); diff --git a/tests/tld-test.c b/tests/tld-test.c index ce6edfc0..cb553361 100644 --- a/tests/tld-test.c +++ b/tests/tld-test.c @@ -177,6 +177,39 @@ do_non_inet_tests (void) } } +static struct { + const char *hostname; + const char *expected_base; +} normalized_tests[] = { + { "bar.foo.CO.uk", "foo.co.uk" }, +}; + +static void +do_normalized_tests (void) +{ + for (guint i = 0; i < G_N_ELEMENTS (normalized_tests); i++) { + char *base = soup_tld_get_base_domain_normalized (normalized_tests[i].hostname, NULL); + g_assert_cmpstr (base, ==, normalized_tests[i].expected_base); + g_free (base); + } +} + +static struct { + const char *suffix; + gboolean is_suffix; +} normalized_public_suffix_tests[] = { + { "CO.uk", TRUE }, +}; + +static void +do_normalized_suffix_tests (void) +{ + for (guint i = 0; i < G_N_ELEMENTS (normalized_public_suffix_tests); i++) { + gboolean is_suffix = soup_tld_domain_is_public_suffix (normalized_public_suffix_tests[i].suffix); + g_assert_cmpint(is_suffix, ==, normalized_public_suffix_tests[i].is_suffix); + } +} + int main (int argc, char **argv) { @@ -186,6 +219,9 @@ main (int argc, char **argv) g_test_add_func ("/tld/inet", do_inet_tests); g_test_add_func ("/tld/non-inet", do_non_inet_tests); + g_test_add_func ("/tld/normalized-base", do_normalized_tests); + g_test_add_func ("/tld/normalized-suffix", do_normalized_suffix_tests); + ret = g_test_run (); -- GitLab From 1f3f76496167ccd53fb07e25230937664c493e4d Mon Sep 17 00:00:00 2001 From: Patrick Griffis Date: Wed, 4 Jun 2025 10:00:45 +0200 Subject: [PATCH 4/6] soup_cookie_new: Return on NULL domain This has been required for many years and documented as so. --- libsoup/cookies/soup-cookie.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/libsoup/cookies/soup-cookie.c b/libsoup/cookies/soup-cookie.c index 6132a8d0..8bb4c443 100644 --- a/libsoup/cookies/soup-cookie.c +++ b/libsoup/cookies/soup-cookie.c @@ -427,15 +427,7 @@ soup_cookie_new (const char *name, const char *value, { g_return_val_if_fail (name != NULL, NULL); g_return_val_if_fail (value != NULL, NULL); - - /* We ought to return if domain is NULL too, but this used to - * do be incorrectly documented as legal, and it wouldn't - * break anything as long as you called - * soup_cookie_set_domain() immediately after. So we warn but - * don't return, to discourage that behavior but not actually - * break anyone doing it. - */ - g_warn_if_fail (domain != NULL); + g_return_val_if_fail (domain != NULL, NULL); return cookie_new_internal (name, value, domain, path, max_age); } -- GitLab From 005b46116cee0bb53bf4ec714607da1e9a4fc4ed Mon Sep 17 00:00:00 2001 From: Patrick Griffis Date: Wed, 4 Jun 2025 10:12:10 +0200 Subject: [PATCH 5/6] Deprecate soup_cookies_from_request() This method creates an invalid SoupCookie as it does not have fields such as domain set. It should be replaced by a function that takes a domain value. According to Debian codesearch this function is not used so for now deprecating it should be fine. --- libsoup/cookies/soup-cookie.c | 12 ++++++++---- libsoup/cookies/soup-cookie.h | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/libsoup/cookies/soup-cookie.c b/libsoup/cookies/soup-cookie.c index 8bb4c443..32589943 100644 --- a/libsoup/cookies/soup-cookie.c +++ b/libsoup/cookies/soup-cookie.c @@ -380,7 +380,10 @@ cookie_new_internal (const char *name, const char *value, cookie = g_slice_new0 (SoupCookie); cookie->name = g_strdup (name); cookie->value = g_strdup (value); - cookie->domain = soup_uri_normalize_domain (domain); + // This is only supported because of soup_cookies_from_request(), + // If unset this is not a valid cookie without calling soup_cookie_set_domain(). + if (domain) + cookie->domain = soup_uri_normalize_domain (domain); cookie->path = g_strdup (path); soup_cookie_set_max_age (cookie, max_age); cookie->same_site_policy = SOUP_SAME_SITE_POLICY_LAX; @@ -940,12 +943,13 @@ soup_cookies_from_response (SoupMessage *msg) * `SoupCookie`s. * * As the "Cookie" header, unlike "Set-Cookie", only contains cookie names and - * values, none of the other [struct@Cookie] fields will be filled in. (Thus, you - * can't generally pass a cookie returned from this method directly to - * [func@cookies_to_response].) + * values, none of the other [struct@Cookie] fields will be filled in. To be valid + * you must set the domain manually, otherwise you can't pass a cookie returned from + * this method directly to [func@cookies_to_response]. * * Returns: (element-type SoupCookie) (transfer full): a #GSList of * `SoupCookie`s, which can be freed with [method@Cookie.free]. + * Deprecated: 3.8 **/ GSList * soup_cookies_from_request (SoupMessage *msg) diff --git a/libsoup/cookies/soup-cookie.h b/libsoup/cookies/soup-cookie.h index 5d0fe9b5..32d2654a 100644 --- a/libsoup/cookies/soup-cookie.h +++ b/libsoup/cookies/soup-cookie.h @@ -108,7 +108,7 @@ void soup_cookie_free (SoupCookie *cookie); SOUP_AVAILABLE_IN_ALL GSList *soup_cookies_from_response (SoupMessage *msg); -SOUP_AVAILABLE_IN_ALL +SOUP_DEPRECATED_IN_3_8 GSList *soup_cookies_from_request (SoupMessage *msg); SOUP_AVAILABLE_IN_ALL -- GitLab From 2af64704dddef3d96b386aed9c43876af3050530 Mon Sep 17 00:00:00 2001 From: Patrick Griffis Date: Thu, 5 Jun 2025 08:30:40 +0200 Subject: [PATCH 6/6] soup_tld_get_base_domain: Warn if passed an uppercase string --- libsoup/cookies/soup-cookie.c | 12 +----------- libsoup/soup-misc.c | 11 +++++++++++ libsoup/soup-misc.h | 2 ++ libsoup/soup-tld.c | 4 ++++ 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/libsoup/cookies/soup-cookie.c b/libsoup/cookies/soup-cookie.c index 32589943..f54a81d0 100644 --- a/libsoup/cookies/soup-cookie.c +++ b/libsoup/cookies/soup-cookie.c @@ -168,16 +168,6 @@ parse_date (const char **val_p) return date; } -static gboolean -is_lowercase_ascii_string (const char *str) -{ - for (; *str; str++) { - if (!g_ascii_islower (*str)) - return FALSE; - } - return TRUE; -} - #define MAX_AGE_CAP_IN_SECONDS 31536000 // 1 year #define MAX_ATTRIBUTE_SIZE 1024 @@ -322,7 +312,7 @@ parse_one_cookie (const char *header, GUri *origin) cookie->domain = tmp; } - if (!is_lowercase_ascii_string (cookie->domain)) { + if (!soup_string_is_lowercase_ascii (cookie->domain)) { char *tmp = soup_uri_normalize_domain (cookie->domain); g_free (cookie->domain); cookie->domain = tmp; diff --git a/libsoup/soup-misc.c b/libsoup/soup-misc.c index dd00abe8..3a139f26 100644 --- a/libsoup/soup-misc.c +++ b/libsoup/soup-misc.c @@ -307,3 +307,14 @@ soup_http_version_to_string (SoupHTTPVersion version) g_assert_not_reached (); return NULL; } + +gboolean +soup_string_is_lowercase_ascii (const char *str) +{ + for (; *str; str++) { + if (!g_ascii_islower (*str)) + return FALSE; + } + return TRUE; +} + diff --git a/libsoup/soup-misc.h b/libsoup/soup-misc.h index f31b87d4..db8d49ea 100644 --- a/libsoup/soup-misc.h +++ b/libsoup/soup-misc.h @@ -77,6 +77,8 @@ gchar *soup_get_accept_languages_from_system (void); const char *soup_http_version_to_string (SoupHTTPVersion version); +gboolean soup_string_is_lowercase_ascii (const char *str); + G_END_DECLS #endif /* __SOUP_MISC_H__ */ diff --git a/libsoup/soup-tld.c b/libsoup/soup-tld.c index adbb3911..b858fb2f 100644 --- a/libsoup/soup-tld.c +++ b/libsoup/soup-tld.c @@ -15,6 +15,7 @@ #include #include "soup-tld.h" +#include "soup-misc.h" #include "soup-uri-utils-private.h" #include "soup.h" @@ -53,6 +54,9 @@ soup_tld_get_base_domain (const char *hostname, GError **error) { g_return_val_if_fail (hostname, NULL); + if (!soup_string_is_lowercase_ascii (hostname)) + g_warning ("Passing a non-lowercase string (\"%s\") to soup_tld_get_base_domain() may give incorrect results", hostname); + return soup_tld_get_base_domain_internal (hostname, error); } -- GitLab