Skip to content

Use glibc strlen to speed up xmlStrlen

10% speedup with a patch.

Before the patch

$ cd ~/code/libxml2
$ git checkout origin/master
$ CFLAGS="-O2 -pipe -g" ../configure --host=x86_64-pc-linux-gnu --enable-static --disable-shared --with-iconv=yes --without-python --without-readline --with-c14n --with-debug --with-threads && make clean && make -j4
$ sudo make install
$ cd ~/code/benchmark && make && ./slow_parsing_benchmark ./pc_game_big_shopping.html "//*[contains(concat(' ',@class,' '), ' sh-dlr__list-result ')]//*[contains(concat(' ', @class, ' '), ' hBUZL ')]"
# Some parsing errors like
./pc_game_big_shopping.html:62: HTML parser error : Tag path invalid            
4h2c0-1.1.9-2 2-2s2 .9 2 2c0 2-3 1.75-3 5h2c0-2.25 3-2.5 3-5 0-2.21-1.79-4-4-4z"

searching './pc_game_big_shopping.html' with '//*[contains(concat(' ',@class,' '), ' sh-dlr__list-result ')]//*[contains(concat(' ', @class, ' '), ' hBUZL ')]' 1000 times
NODESET with 260 results
67855 ms

After the patch

$ cd ~/code/libxml2
$ git checkout use-glibc-strlen-in-xmlStrlen-212
$ CFLAGS="-O2 -pipe -g" ../configure --host=x86_64-pc-linux-gnu --enable-static --disable-shared --with-iconv=yes --without-python --without-readline --with-c14n --with-debug --with-threads && make clean && make -j4
$ sudo make install
$ cd ~/code/benchmark && make && ./slow_parsing_benchmark ./pc_game_big_shopping.html "//*[contains(concat(' ',@class,' '), ' sh-dlr__list-result ')]//*[contains(concat(' ', @class, ' '), ' hBUZL ')]"
searching './pc_game_big_shopping.html' with '//*[contains(concat(' ',@class,' '), ' sh-dlr__list-result ')]//*[contains(concat(' ', @class, ' '), ' hBUZL ')]' 1000 times
NODESET with 260 results
59767 ms
Benchmark code
# Makefile

CFLAGS=$(shell xml2-config --cflags) -O2
LDFLAGS=$(shell xml2-config --libs)

all: clean slow_parsing_benchmark

slow_parsing_benchmark: slow_parsing_benchmark.o
	$(CC) -o slow_parsing_benchmark slow_parsing_benchmark.o $(LDFLAGS)

slow_parsing_benchmark.o: slow_parsing_benchmark.c
	$(CC) $(CFLAGS) -c -o slow_parsing_benchmark.o slow_parsing_benchmark.c

clean:
	rm -f *.o slow_parsing_benchmark

PHONY: clean
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <sys/time.h>

#include "libxml/parserInternals.h"
#include "libxml/xpath.h"
#include "libxml/xpathInternals.h"


long time_in_ms()
{
  struct timeval  tv;
  gettimeofday(&tv, NULL);

  return (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000.0 ;
}

long elapsed_ms_since(long start)
{
  return time_in_ms() - start ;
}

int main(int argc, char *argv[])
{
  long start_time;
  int loop = 1000;

  if (argc != 3) {
    fprintf(stderr, "USAGE: %s <html_file> <class_name>\n", argv[0]);
    exit(2);
  }

  char* filename = argv[1];
  char* query = argv[2];

  htmlDocPtr doc = htmlReadFile(filename, NULL, 0);
  if (doc == NULL) {
    exit(1);
  }

  xmlXPathInit();
  xmlXPathContextPtr ctx = xmlXPathNewContext(doc);

  start_time = time_in_ms();

  fprintf(stdout, "searching '%s' with '%s' %d times\n", filename, query, loop);

  for (int count = 0; count < loop; ++count) {
    xmlXPathObjectPtr xpath = xmlXPathEvalExpression(query, ctx);

    if (xpath == NULL) {
      fprintf(stderr, "xpath null for query '%s'\n", query);
      exit(1);
    }

    if (count == 0 && xpath->type == XPATH_NODESET) {
      fprintf(stdout, "NODESET with %d results\n", xpath->nodesetval->nodeNr);
    }
  }

  fprintf(stdout, "%ld ms\n", elapsed_ms_since(start_time));
}

I'm not sure why it runs faster with the patch. The flame graph of the sample program from #212 (comment 991383) looks the same for nokogiri on master and on the branch of this PR. xmlXPathCompOpEval is dominating before and after the patch.

flamegraph

Timing results of direct comparison xmlStrlen vs strlen
xmlStrlen (entire HTML file): 926171.936981 μs
glibc_xmlStrlen (entire HTML file): 36905.903992 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times

xmlStrlen (average string): 57479.204010 μs
glibc_xmlStrlen (average string): 5802.069000 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times

xmlStrlen (bigger string): 388056.315979 μs
glibc_xmlStrlen (bigger string): 12797.856995 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times

xmlStrlen (smallest string): 15870.046021 μs
glibc_xmlStrlen (smallest string): 6282.208984 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times

Resolves #212 (closed). See #212 (comment 992091) for reference.

Edited by Ilya Zub

Merge request reports