Use glibc strlen to speed up xmlStrlen
10% speedup with a patch.
Before the patch
$ cd ~/code/libxml2
$ git checkout origin/master
$ CFLAGS="-O2 -pipe -g" ../configure --host=x86_64-pc-linux-gnu --enable-static --disable-shared --with-iconv=yes --without-python --without-readline --with-c14n --with-debug --with-threads && make clean && make -j4
$ sudo make install
$ cd ~/code/benchmark && make && ./slow_parsing_benchmark ./pc_game_big_shopping.html "//*[contains(concat(' ',@class,' '), ' sh-dlr__list-result ')]//*[contains(concat(' ', @class, ' '), ' hBUZL ')]"
# Some parsing errors like
./pc_game_big_shopping.html:62: HTML parser error : Tag path invalid
4h2c0-1.1.9-2 2-2s2 .9 2 2c0 2-3 1.75-3 5h2c0-2.25 3-2.5 3-5 0-2.21-1.79-4-4-4z"
searching './pc_game_big_shopping.html' with '//*[contains(concat(' ',@class,' '), ' sh-dlr__list-result ')]//*[contains(concat(' ', @class, ' '), ' hBUZL ')]' 1000 times
NODESET with 260 results
67855 ms
After the patch
$ cd ~/code/libxml2
$ git checkout use-glibc-strlen-in-xmlStrlen-212
$ CFLAGS="-O2 -pipe -g" ../configure --host=x86_64-pc-linux-gnu --enable-static --disable-shared --with-iconv=yes --without-python --without-readline --with-c14n --with-debug --with-threads && make clean && make -j4
$ sudo make install
$ cd ~/code/benchmark && make && ./slow_parsing_benchmark ./pc_game_big_shopping.html "//*[contains(concat(' ',@class,' '), ' sh-dlr__list-result ')]//*[contains(concat(' ', @class, ' '), ' hBUZL ')]"
searching './pc_game_big_shopping.html' with '//*[contains(concat(' ',@class,' '), ' sh-dlr__list-result ')]//*[contains(concat(' ', @class, ' '), ' hBUZL ')]' 1000 times
NODESET with 260 results
59767 ms
Benchmark code
# Makefile
CFLAGS=$(shell xml2-config --cflags) -O2
LDFLAGS=$(shell xml2-config --libs)
all: clean slow_parsing_benchmark
slow_parsing_benchmark: slow_parsing_benchmark.o
$(CC) -o slow_parsing_benchmark slow_parsing_benchmark.o $(LDFLAGS)
slow_parsing_benchmark.o: slow_parsing_benchmark.c
$(CC) $(CFLAGS) -c -o slow_parsing_benchmark.o slow_parsing_benchmark.c
clean:
rm -f *.o slow_parsing_benchmark
PHONY: clean
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
#include "libxml/parserInternals.h"
#include "libxml/xpath.h"
#include "libxml/xpathInternals.h"
long time_in_ms()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000.0 ;
}
long elapsed_ms_since(long start)
{
return time_in_ms() - start ;
}
int main(int argc, char *argv[])
{
long start_time;
int loop = 1000;
if (argc != 3) {
fprintf(stderr, "USAGE: %s <html_file> <class_name>\n", argv[0]);
exit(2);
}
char* filename = argv[1];
char* query = argv[2];
htmlDocPtr doc = htmlReadFile(filename, NULL, 0);
if (doc == NULL) {
exit(1);
}
xmlXPathInit();
xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
start_time = time_in_ms();
fprintf(stdout, "searching '%s' with '%s' %d times\n", filename, query, loop);
for (int count = 0; count < loop; ++count) {
xmlXPathObjectPtr xpath = xmlXPathEvalExpression(query, ctx);
if (xpath == NULL) {
fprintf(stderr, "xpath null for query '%s'\n", query);
exit(1);
}
if (count == 0 && xpath->type == XPATH_NODESET) {
fprintf(stdout, "NODESET with %d results\n", xpath->nodesetval->nodeNr);
}
}
fprintf(stdout, "%ld ms\n", elapsed_ms_since(start_time));
}
I'm not sure why it runs faster with the patch. The flame graph of the sample program from #212 (comment 991383) looks the same for nokogiri on master
and on the branch of this PR. xmlXPathCompOpEval
is dominating before and after the patch.
xmlStrlen
vs strlen
Timing results of direct comparison xmlStrlen (entire HTML file): 926171.936981 μs
glibc_xmlStrlen (entire HTML file): 36905.903992 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times
xmlStrlen (average string): 57479.204010 μs
glibc_xmlStrlen (average string): 5802.069000 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times
xmlStrlen (bigger string): 388056.315979 μs
glibc_xmlStrlen (bigger string): 12797.856995 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times
xmlStrlen (smallest string): 15870.046021 μs
glibc_xmlStrlen (smallest string): 6282.208984 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times
Resolves #212 (closed). See #212 (comment 992091) for reference.