Commit 18ef5ed9 authored by Carlos Garcia Campos's avatar Carlos Garcia Campos
Browse files

headers: stop interning all header names

To still optimize the headers handling we now follow an approach similar
to the WebKit one. We use gperf to generate a fast conversion from
commonly used name headers to an enum value. Headers are now stored in
two arrays, one for common headers where the enum value is used as the
name, and another one for uncommon headers where the name string is
stored duplicated and g_ascii_strcasecmp() is used for comparisons. Both
arrays are created on demand with preallocated space for 6 headers. This
changes a bit the order in which headers are iterated, because we
iterate the common headers first and then the uncommon ones.

Fixes #111
parent 309ff12a
......@@ -44,6 +44,8 @@ ignore_headers = [
'soup-client-message-io-http2.h',
'soup-body-input-stream-http2.h',
'soup-tls-interaction.h',
'soup-header-names.h',
'soup-message-headers-private.h',
]
mkdb_args = [
......
#!/usr/bin/env python3
import sys
import subprocess
http_header_name_to_id = { }
http_header_names = []
with open('soup-header-names.in') as i:
for line in i.readlines():
name = line.strip();
if not name or name[0] == '#':
continue
http_header_name_to_id[name] = 'SOUP_HEADER_' + name.upper().replace('-', '_')
http_header_names.append (name)
http_header_names.sort()
gperf_file = '''%{
/* This file has been generated with generate-header-names.py script, do not edit */
#include "soup-header-names.h"
#include <string.h>
static const char * const soup_headr_name_strings[] = {
'''
for name in http_header_names:
gperf_file += ' "%s",\n' % name
gperf_file += '''};
%}
%language=ANSI-C
%struct-type
struct SoupHeaderHashEntry {
int name;
SoupHeaderName header_name;
};
%define hash-function-name soup_header_name_hash_function
%define lookup-function-name soup_header_name_find
%readonly-tables
%global-table
%compare-strncmp
%ignore-case
%pic
%%
'''
for name in http_header_names:
gperf_file += '%s, %s\n' % (name, http_header_name_to_id[name])
gperf_file += '''%%
SoupHeaderName soup_header_name_from_string (const char *str)
{
const struct SoupHeaderHashEntry *entry;
entry = soup_header_name_find (str, strlen (str));
return entry ? entry->header_name : SOUP_HEADER_UNKNOWN;
}
const char *soup_header_name_to_string (SoupHeaderName name)
{
if (name == SOUP_HEADER_UNKNOWN)
return NULL;
return soup_headr_name_strings[name];
}
'''
command = ['gperf', '-k', '*', '-D', '-n', '-s', '2']
p = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
output, error = p.communicate(gperf_file)
if p.returncode != 0:
print (error)
sys.exit(p.returncode)
with open('soup-header-names.c', 'w+') as o:
o.write(output.replace('const struct SoupHeaderHashEntry *', 'static const struct SoupHeaderHashEntry *', 1))
output = '''/* This file has been generated with generate-header-names.py script, do not edit */
#pragma once
typedef enum {
'''
for name in http_header_names:
output += ' %s,\n' % http_header_name_to_id[name]
output +='''
SOUP_HEADER_UNKNOWN
} SoupHeaderName;
SoupHeaderName soup_header_name_from_string (const char *str);
const char *soup_header_name_to_string (SoupHeaderName name);
'''
with open('soup-header-names.h', 'w+') as o:
o.write(output)
......@@ -65,6 +65,7 @@ soup_sources = [
'soup-filter-input-stream.c',
'soup-form.c',
'soup-headers.c',
'soup-header-names.c',
'soup-init.c',
'soup-io-stream.c',
'soup-logger.c',
......
This diff is collapsed.
/* This file has been generated with generate-header-names.py script, do not edit */
#pragma once
typedef enum {
SOUP_HEADER_ACCEPT,
SOUP_HEADER_ACCEPT_CHARSET,
SOUP_HEADER_ACCEPT_ENCODING,
SOUP_HEADER_ACCEPT_LANGUAGE,
SOUP_HEADER_ACCEPT_RANGES,
SOUP_HEADER_ACCESS_CONTROL_ALLOW_CREDENTIALS,
SOUP_HEADER_ACCESS_CONTROL_ALLOW_HEADERS,
SOUP_HEADER_ACCESS_CONTROL_ALLOW_METHODS,
SOUP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN,
SOUP_HEADER_ACCESS_CONTROL_EXPOSE_HEADERS,
SOUP_HEADER_ACCESS_CONTROL_MAX_AGE,
SOUP_HEADER_ACCESS_CONTROL_REQUEST_HEADERS,
SOUP_HEADER_ACCESS_CONTROL_REQUEST_METHOD,
SOUP_HEADER_AGE,
SOUP_HEADER_AUTHENTICATION_INFO,
SOUP_HEADER_AUTHORIZATION,
SOUP_HEADER_CACHE_CONTROL,
SOUP_HEADER_CONNECTION,
SOUP_HEADER_CONTENT_DISPOSITION,
SOUP_HEADER_CONTENT_ENCODING,
SOUP_HEADER_CONTENT_LANGUAGE,
SOUP_HEADER_CONTENT_LENGTH,
SOUP_HEADER_CONTENT_LOCATION,
SOUP_HEADER_CONTENT_RANGE,
SOUP_HEADER_CONTENT_SECURITY_POLICY,
SOUP_HEADER_CONTENT_SECURITY_POLICY_REPORT_ONLY,
SOUP_HEADER_CONTENT_TYPE,
SOUP_HEADER_COOKIE,
SOUP_HEADER_COOKIE2,
SOUP_HEADER_CROSS_ORIGIN_RESOURCE_POLICY,
SOUP_HEADER_DNT,
SOUP_HEADER_DATE,
SOUP_HEADER_DEFAULT_STYLE,
SOUP_HEADER_ETAG,
SOUP_HEADER_EXPECT,
SOUP_HEADER_EXPIRES,
SOUP_HEADER_HOST,
SOUP_HEADER_IF_MATCH,
SOUP_HEADER_IF_MODIFIED_SINCE,
SOUP_HEADER_IF_NONE_MATCH,
SOUP_HEADER_IF_RANGE,
SOUP_HEADER_IF_UNMODIFIED_SINCE,
SOUP_HEADER_KEEP_ALIVE,
SOUP_HEADER_LAST_EVENT_ID,
SOUP_HEADER_LAST_MODIFIED,
SOUP_HEADER_LINK,
SOUP_HEADER_LOCATION,
SOUP_HEADER_ORIGIN,
SOUP_HEADER_PING_FROM,
SOUP_HEADER_PING_TO,
SOUP_HEADER_PRAGMA,
SOUP_HEADER_PROXY_AUTHENTICATE,
SOUP_HEADER_PROXY_AUTHENTICATION_INFO,
SOUP_HEADER_PROXY_AUTHORIZATION,
SOUP_HEADER_PURPOSE,
SOUP_HEADER_RANGE,
SOUP_HEADER_REFERER,
SOUP_HEADER_REFERRER_POLICY,
SOUP_HEADER_REFRESH,
SOUP_HEADER_SEC_WEBSOCKET_ACCEPT,
SOUP_HEADER_SEC_WEBSOCKET_EXTENSIONS,
SOUP_HEADER_SEC_WEBSOCKET_KEY,
SOUP_HEADER_SEC_WEBSOCKET_PROTOCOL,
SOUP_HEADER_SEC_WEBSOCKET_VERSION,
SOUP_HEADER_SERVER,
SOUP_HEADER_SERVER_TIMING,
SOUP_HEADER_SERVICE_WORKER,
SOUP_HEADER_SERVICE_WORKER_ALLOWED,
SOUP_HEADER_SET_COOKIE,
SOUP_HEADER_SET_COOKIE2,
SOUP_HEADER_SOURCEMAP,
SOUP_HEADER_TE,
SOUP_HEADER_TIMING_ALLOW_ORIGIN,
SOUP_HEADER_TRAILER,
SOUP_HEADER_TRANSFER_ENCODING,
SOUP_HEADER_UPGRADE,
SOUP_HEADER_UPGRADE_INSECURE_REQUESTS,
SOUP_HEADER_USER_AGENT,
SOUP_HEADER_VARY,
SOUP_HEADER_VIA,
SOUP_HEADER_WWW_AUTHENTICATE,
SOUP_HEADER_X_CONTENT_TYPE_OPTIONS,
SOUP_HEADER_X_DNS_PREFETCH_CONTROL,
SOUP_HEADER_X_FRAME_OPTIONS,
SOUP_HEADER_X_SOURCEMAP,
SOUP_HEADER_X_TEMP_TABLET,
SOUP_HEADER_X_XSS_PROTECTION,
SOUP_HEADER_UNKNOWN
} SoupHeaderName;
SoupHeaderName soup_header_name_from_string (const char *str);
const char *soup_header_name_to_string (SoupHeaderName name);
# This file is the input of script generate-header-names.py to generate
# soup-header-names.c and soup-header-names.h. Run the script after any
# modifification in this file to update the generated code.
Accept
Accept-Charset
Accept-Language
Accept-Encoding
Accept-Ranges
Access-Control-Allow-Credentials
Access-Control-Allow-Headers
Access-Control-Allow-Methods
Access-Control-Allow-Origin
Access-Control-Expose-Headers
Access-Control-Max-Age
Access-Control-Request-Headers
Access-Control-Request-Method
Age
Authentication-Info
Authorization
Cache-Control
Connection
Content-Disposition
Content-Encoding
Content-Language
Content-Length
Content-Location
Content-Security-Policy
Content-Security-Policy-Report-Only
Content-Type
Content-Range
Cookie
Cookie2
Cross-Origin-Resource-Policy
Date
DNT
Default-Style
ETag
Expect
Expires
Host
If-Match
If-Modified-Since
If-None-Match
If-Range
If-Unmodified-Since
Keep-Alive
Last-Event-ID
Last-Modified
Link
Location
Origin
Ping-From
Ping-To
Purpose
Pragma
Proxy-Authorization
Proxy-Authenticate
Proxy-Authentication-Info
Range
Referer
Referrer-Policy
Refresh
Sec-WebSocket-Accept
Sec-WebSocket-Extensions
Sec-WebSocket-Key
Sec-WebSocket-Protocol
Sec-WebSocket-Version
Server
Server-Timing
Service-Worker
Service-Worker-Allowed
Set-Cookie
Set-Cookie2
SourceMap
TE
Timing-Allow-Origin
Trailer
Transfer-Encoding
Upgrade
Upgrade-Insecure-Requests
User-Agent
Vary
Via
WWW-Authenticate
X-Content-Type-Options
X-DNS-Prefetch-Control
X-Frame-Options
X-SourceMap
X-XSS-Protection
X-Temp-Tablet
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
/*
* Copyright (C) 2021 Igalia S.L.
*/
#pragma once
#include "soup-message-headers.h"
#include "soup-header-names.h"
G_BEGIN_DECLS
void soup_message_headers_append_common (SoupMessageHeaders *hdrs,
SoupHeaderName name,
const char *value);
const char *soup_message_headers_get_one_common (SoupMessageHeaders *hdrs,
SoupHeaderName name);
const char *soup_message_headers_get_list_common (SoupMessageHeaders *hdrs,
SoupHeaderName name);
void soup_message_headers_remove_common (SoupMessageHeaders *hdrs,
SoupHeaderName name);
void soup_message_headers_replace_common (SoupMessageHeaders *hdrs,
SoupHeaderName name,
const char *value);
gboolean soup_message_headers_header_contains_common (SoupMessageHeaders *hdrs,
SoupHeaderName name,
const char *token);
gboolean soup_message_headers_header_equals_common (SoupMessageHeaders *hdrs,
SoupHeaderName name,
const char *value);
G_END_DECLS
This diff is collapsed.
......@@ -124,8 +124,8 @@ static struct RequestTest {
"GET / HTTP/1.1\r\nFoo: bar\r\n baz\r\nConnection: close\r\nBlah: blah\r\n", -1,
SOUP_STATUS_OK,
"GET", "/", SOUP_HTTP_1_1,
{ { "Foo", "bar baz" },
{ "Connection", "close" },
{ { "Connection", "close" },
{ "Foo", "bar baz" },
{ "Blah", "blah" },
{ NULL }
}
......@@ -166,8 +166,8 @@ static struct RequestTest {
"GET / HTTP/1.0\r\nFoo: bar\r\nConnection: Bar, Quux\r\nBar: baz\r\nQuux: foo\r\n", -1,
SOUP_STATUS_OK,
"GET", "/", SOUP_HTTP_1_0,
{ { "Foo", "bar" },
{ "Connection", "Bar, Quux" },
{ { "Connection", "Bar, Quux" },
{ "Foo", "bar" },
{ NULL }
}
},
......@@ -321,8 +321,8 @@ static struct RequestTest {
"GET / HTTP/1.1\r\na: b\r\nHost: example\rcom\r\np: \rq\r\ns: t\r\r\nc: d\r\n", -1,
SOUP_STATUS_OK,
"GET", "/", SOUP_HTTP_1_1,
{ { "a", "b" },
{ "Host", "example com" }, /* CR in the middle turns to space */
{ { "Host", "example com" }, /* CR in the middle turns to space */
{ "a", "b" },
{ "p", "q" }, /* CR at beginning is ignored */
{ "s", "t" }, /* CR at end is ignored */
{ "c", "d" },
......@@ -528,8 +528,8 @@ static struct ResponseTest {
{ "Connection header on HTTP/1.0 message", NULL,
"HTTP/1.0 200 ok\r\nFoo: bar\r\nConnection: Bar\r\nBar: quux\r\n", -1,
SOUP_HTTP_1_0, SOUP_STATUS_OK, "ok",
{ { "Foo", "bar" },
{ "Connection", "Bar" },
{ { "Connection", "Bar" },
{ "Foo", "bar" },
{ NULL }
}
},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment