xmlStrlen is 2 - 30 times slower than glibc strlen

Given the simplest benchmark, xmlStrlen is two times slower than strlen from glibc on small strings, ten times slower on average strings, and thirty times slower on big strings and an entire HTML file.

The performance of xmlStrlen resulted in the 0.7 - 1.5 seconds to parse and search through 2 MB HTML document by using Nokogiri. I described this issue in the Nokogiri reposirory which led to 2x speedup on the Nokogiri side, but xmlStrlen still could be faster.

libxml2 version

$ xml2-config --version
2.9.10

Benchmark results

$ ./slow_parsing_benchmark 
xmlStrlen (entire HTML file): 926171.936981 μs
glibc_xmlStrlen (entire HTML file): 36905.903992 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 25.094584 times

xmlStrlen (average string): 57479.204010 μs
glibc_xmlStrlen (average string): 5802.069000 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 9.905937 times

xmlStrlen (bigger string): 388056.315979 μs
glibc_xmlStrlen (bigger string): 12797.856995 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 30.318382 times

xmlStrlen (smallest string): 15870.046021 μs
glibc_xmlStrlen (smallest string): 6282.208984 μs
delta (xmlStrlen ÷ glibc_xmlStrlen): 2.527903 times

Benchmark code

big_html.zip I've used in the code below.

#include <string.h>
#include <stdio.h>
#include <time.h>

#include "libxml/HTMLtree.h"
#include "libxml/xpath.h"

double time_in_microsec()
{
  struct timespec tv;
  clock_gettime(CLOCK_MONOTONIC_RAW, &tv);

  return (double) (tv.tv_sec * 1000000 + tv.tv_nsec / 1000.0);
}

double elapsed_ms_since(double start)
{
  return time_in_microsec() - start;
}

int glibc_xmlStrlen(const xmlChar *str) {
    if (str == NULL) return(0);

    return strlen((const char*)str);
}

int main(int argc, char *argv[])
{
  FILE *fp = fopen ("big_html.html", "rb");
  char *buffer = NULL;
  size_t len;
  ssize_t bytes_read = getdelim(&buffer, &len, '\0', fp);

  const int N = 1000000;

  double start_time, prev_start_time;

  prev_start_time = start_time;
  start_time = time_in_microsec();

  for (int j = 0; j < 1000; j++) {
    xmlStrlen(buffer);
  }

  prev_start_time = start_time;

  start_time = time_in_microsec();

  for (int j = 0; j < 1000; j++) {
    glibc_xmlStrlen(buffer);
  }

  fprintf(stderr, "xmlStrlen (entire HTML file): %f μs\n", elapsed_ms_since(prev_start_time));
  fprintf(stderr, "glibc_xmlStrlen (entire HTML file): %f μs\n", elapsed_ms_since(start_time));
  fprintf(stderr, "delta (xmlStrlen ÷ glibc_xmlStrlen): %f times\n\n", elapsed_ms_since(prev_start_time) / elapsed_ms_since(start_time));

  xmlChar *str = ".//*[contains(concat(' ', normalize-space(@class), ' '), ' p7n7Ze ') and not(.//a)]";

  prev_start_time = start_time;
  start_time = time_in_microsec();

  for (int j = 0; j < N; j++) {
    xmlStrlen(str);
  }

  prev_start_time = start_time;

  start_time = time_in_microsec();

  for (int j = 0; j < N; j++) {
    glibc_xmlStrlen(str);
  }

  fprintf(stderr, "xmlStrlen (average string): %f μs\n", elapsed_ms_since(prev_start_time));
  fprintf(stderr, "glibc_xmlStrlen (average string): %f μs\n", elapsed_ms_since(start_time));
  fprintf(stderr, "delta (xmlStrlen ÷ glibc_xmlStrlen): %f times\n\n", elapsed_ms_since(prev_start_time) / elapsed_ms_since(start_time));

  str = ".//*[contains(concat(' ', normalize-space(@class), ' '), ' na4ICd ') and not(contains(concat(' ', normalize-space(@class), ' '), ' rOwove ')) and not(contains(concat(' ', normalize-space(@class), ' '), ' TxCHDf '))][position() = 1] | .//*[contains(concat(' ', normalize-space(@class), ' '), ' hBUZL ') and not(contains(concat(' ', normalize-space(@class), ' '), ' Rv2Cae '))][position() = 2] | .//*[contains(concat(' ', normalize-space(@class), ' '), ' hBUZL ') and not(contains(concat(' ', normalize-space(@class), ' '), ' Rv2Cae ')) and not(contains(concat(' ', normalize-space(@class), ' '), ' Fxxvzc ')) and not(.//span) and not(.//div)] | .//*[contains(concat(' ', normalize-space(@class), ' '), ' dWRflb ')] | .//*[contains(concat(' ', normalize-space(@class), ' '), ' p7n7Ze ') and not(.//a)]";

  prev_start_time = start_time;
  start_time = time_in_microsec();

  for (int j = 0; j < N; j++) {
    xmlStrlen(str);
  }

  prev_start_time = start_time;
  start_time = time_in_microsec();

  for (int j = 0; j < N; j++) {
    glibc_xmlStrlen(str);
  }

  fprintf(stderr, "xmlStrlen (bigger string): %f μs\n", elapsed_ms_since(prev_start_time));
  fprintf(stderr, "glibc_xmlStrlen (bigger string): %f μs\n", elapsed_ms_since(start_time));
  fprintf(stderr, "delta (xmlStrlen ÷ glibc_xmlStrlen): %f times\n\n", elapsed_ms_since(prev_start_time) / elapsed_ms_since(start_time));

  str = ".//*[not(.//a)]";

  prev_start_time = start_time;
  start_time = time_in_microsec();

  for (int j = 0; j < N; j++) {
    xmlStrlen(str);
  }

  prev_start_time = start_time;
  start_time = time_in_microsec();

  for (int j = 0; j < N; j++) {
    glibc_xmlStrlen(str);
  }

  fprintf(stderr, "xmlStrlen (smallest string): %f μs\n", elapsed_ms_since(prev_start_time));
  fprintf(stderr, "glibc_xmlStrlen (smallest string): %f μs\n", elapsed_ms_since(start_time));
  fprintf(stderr, "delta (xmlStrlen ÷ glibc_xmlStrlen): %f times\n\n", elapsed_ms_since(prev_start_time) / elapsed_ms_since(start_time));
}

Makefile

CFLAGS=$(shell xml2-config --cflags)
LDFLAGS=$(shell xml2-config --libs)

slow_parsing_benchmark: slow_parsing_benchmark.o
	$(CC) -O2 -o slow_parsing_benchmark slow_parsing_benchmark.o $(LDFLAGS)

slow_parsing_benchmark.o: slow_parsing_benchmark.c
	$(CC) $(CFLAGS) -c -o slow_parsing_benchmark.o slow_parsing_benchmark.c

clean:
	rm -f slow_parsing_benchmark.o slow_parsing_benchmark slow_parsing_benchmark_v2

The naive approach to simply reuse strlen in xmlStrlen speeds up our document parsing and searching from 1.4 seconds to about 700 ms.

diff --git a/xmlstring.c b/xmlstring.c
index e8a1e45d..df247dff 100644
--- a/xmlstring.c
+++ b/xmlstring.c
@@ -423,14 +423,9 @@ xmlStrsub(const xmlChar *str, int start, int len) {
 
 int
 xmlStrlen(const xmlChar *str) {
-    int len = 0;
-
     if (str == NULL) return(0);
-    while (*str != 0) { /* non input consuming */
-        str++;
-        len++;
-    }
-    return(len);
+
+    return strlen((const char*)str);
 }
 
 /**

I'd be happy to open a PR. What do you think about speeding up xmlStrlen?

Edited Dec 21, 2020 by Ilya Zub