Commit 24f6a071 authored by Daniel Veillard's avatar Daniel Veillard
Browse files

Some preliminary stats for queries extraction, Daniel

parent d99224d7
......@@ -7,7 +7,7 @@
extension-element-prefixes="exsl"
exclude-result-prefixes="exsl">
<!-- Import the resto of the site stylesheets -->
<!-- Import the rest of the site stylesheets -->
<xsl:import href="site.xsl"/>
<!-- Generate XHTML-1.0 transitional -->
......
......@@ -99,6 +99,12 @@ TABLES={
Count int(11) NOT NULL,
UNIQUE KEY id (ID,Value(35)),
INDEX (ID))""",
"AllQueries" : """CREATE TABLE AllQueries (
ID int(11) NOT NULL auto_increment,
Value varchar(50) NOT NULL,
Count int(11) NOT NULL,
UNIQUE KEY id (ID,Value(35)),
INDEX (ID))""",
}
#
......@@ -132,14 +138,15 @@ def createTable(db, name):
return -1
return ret
def checkTables(db):
def checkTables(db, verbose = 1):
global TABLES
if db == None:
return -1
c = db.cursor()
nbtables = c.execute("show tables")
print "Found %d tables" % (nbtables)
if verbose:
print "Found %d tables" % (nbtables)
tables = {}
i = 0
while i < nbtables:
......@@ -155,7 +162,8 @@ def checkTables(db):
try:
ret = c.execute("SELECT count(*) from %s" % table);
row = c.fetchone()
print "Table %s contains %d records" % (table, row[0])
if verbose:
print "Table %s contains %d records" % (table, row[0])
except:
print "Troubles with table %s : repairing" % (table)
ret = c.execute("repair table %s" % table);
......@@ -163,7 +171,8 @@ def checkTables(db):
ret = c.execute("SELECT count(*) from %s" % table);
row = c.fetchone()
print "Table %s contains %d records" % (table, row[0])
print "checkTables finished"
if verbose:
print "checkTables finished"
# make sure apache can access the tables read-only
try:
......@@ -173,7 +182,7 @@ def checkTables(db):
pass
return 0
def openMySQL(db="xmlsoft", passwd=None):
def openMySQL(db="xmlsoft", passwd=None, verbose = 1):
global DB
if passwd == None:
......@@ -186,7 +195,7 @@ def openMySQL(db="xmlsoft", passwd=None):
DB = MySQLdb.connect(passwd=passwd, db=db)
if DB == None:
return -1
ret = checkTables(DB)
ret = checkTables(DB, verbose)
return ret
def updateWord(name, symbol, relevance):
......@@ -1121,13 +1130,6 @@ def scanXMLDateArchive(t = None, force = 0):
# Main code: open the DB, the API XML and analyze it #
# #
#########################################################################
try:
openMySQL()
except:
print "Failed to open the database"
print sys.exc_type, sys.exc_value
sys.exit(1)
def analyzeArchives(t = None, force = 0):
global wordsDictArchive
......@@ -1201,6 +1203,13 @@ def usage():
sys.exit(1)
def main():
try:
openMySQL()
except:
print "Failed to open the database"
print sys.exc_type, sys.exc_value
sys.exit(1)
args = sys.argv[1:]
force = 0
if args:
......
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /><link rel="SHORTCUT ICON" href="/favicon.ico" /><style type="text/css">
TD {font-family: Verdana,Arial,Helvetica}
BODY {font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em}
H1 {font-family: Verdana,Arial,Helvetica}
H2 {font-family: Verdana,Arial,Helvetica}
H3 {font-family: Verdana,Arial,Helvetica}
A:link, A:visited, A:active { text-decoration: underline }
</style><title>Search statistics for 20040408</title></head><body bgcolor="#8b7765" text="#000000" link="#000000" vlink="#000000"><table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr><td width="120"><a href="http://swpat.ffii.org/"><img src="epatents.png" alt="Action against software patents" /></a></td><td width="180"><a href="http://www.gnome.org/"><img src="gnome2.png" alt="Gnome2 Logo" /></a><a href="http://www.w3.org/Status"><img src="w3c.png" alt="W3C Logo" /></a><a href="http://www.redhat.com/"><img src="redhat.gif" alt="Red Hat Logo" /></a><div align="left"><a href="http://xmlsoft.org/"><img src="Libxml2-Logo-180x168.gif" alt="Made with Libxml2 Logo" /></a></div></td><td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center"><h1></h1><h2>Search statistics for 20040408</h2></td></tr></table></td></tr></table></td></tr></table><table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr><td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Main Menu</b></center></td></tr><tr><td bgcolor="#fffacd"><form action="search.php" enctype="application/x-www-form-urlencoded" method="get"><input name="query" type="text" size="20" value="" /><input name="submit" type="submit" value="Search ..." /></form><ul><li><a href="index.html">Home</a></li><li><a href="examples/index.html" style="font-weight:bold">Code Examples</a></li><li><a href="html/index.html" style="font-weight:bold">API Menu</a></li><li><a href="guidelines.html">XML Guidelines</a></li></ul></td></tr></table><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr><tr><td bgcolor="#fffacd"><ul><li><a href="http://mail.gnome.org/archives/xml/">Mail archive</a></li><li><a href="http://xmlsoft.org/XSLT/">XSLT libxslt</a></li><li><a href="http://phd.cs.unibo.it/gdome2/">DOM gdome2</a></li><li><a href="http://www.aleksey.com/xmlsec/">XML-DSig xmlsec</a></li><li><a href="ftp://xmlsoft.org/">FTP</a></li><li><a href="http://www.zlatkovic.com/projects/libxml/">Windows binaries</a></li><li><a href="http://garypennington.net/libxml2/">Solaris binaries</a></li><li><a href="http://www.zveno.com/open_source/libxml2xslt.html">MacOsX binaries</a></li><li><a href="http://sourceforge.net/projects/libxml2-pas/">Pascal bindings</a></li><li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxml2">Bug Tracker</a></li></ul></td></tr></table></td></tr></table></td><td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd"><h2> weekly statistics: </h2><p>416435 total words,
9875 uniq words.</p><p> Top 50 queries:</p><p><br /><a href="search.php?query=libxml2">libxml2</a> 11812 times.
<br /><a href="search.php?query=libxml">libxml</a> 10170 times.
<br /><a href="search.php?query=xpath">xpath</a> 6172 times.
<br /><a href="search.php?query=schema">schema</a> 5798 times.
<br /><a href="search.php?query=xmllint">xmllint</a> 5472 times.
<br /><a href="search.php?query=XML">XML</a> 5435 times.
<br /><a href="search.php?query=xmlParseFile">xmlParseFile</a> 4219 times.
<br /><a href="search.php?query=php">php</a> 3935 times.
<br /><a href="search.php?query=DTD">DTD</a> 3270 times.
<br /><a href="search.php?query=encoding">encoding</a> 3101 times.
<br /><a href="search.php?query=xmlGetProp">xmlGetProp</a> 3084 times.
<br /><a href="search.php?query=xsltproc">xsltproc</a> 3074 times.
<br /><a href="search.php?query=download">download</a> 2971 times.
<br /><a href="search.php?query=xmlNodeListGetString">xmlNodeListGetString</a> 2917 times.
<br /><a href="search.php?query=python">python</a> 2789 times.
<br /><a href="search.php?query=SAX">SAX</a> 2621 times.
<br /><a href="search.php?query=xmlParseMemory">xmlParseMemory</a> 2472 times.
<br /><a href="search.php?query=perl">perl</a> 2385 times.
<br /><a href="search.php?query=iconv">iconv</a> 2318 times.
<br /><a href="search.php?query=error">error</a> 2298 times.
<br /><a href="search.php?query=html">html</a> 2255 times.
<br /><a href="search.php?query=xmlChar">xmlChar</a> 2136 times.
<br /><a href="search.php?query=libxslt">libxslt</a> 2055 times.
<br /><a href="search.php?query=c++">c++</a> 2020 times.
<br /><a href="search.php?query=xmlNodePtr">xmlNodePtr</a> 1928 times.
<br /><a href="search.php?query=windows">windows</a> 1918 times.
<br /><a href="search.php?query=to">to</a> 1891 times.
<br /><a href="search.php?query=node">node</a> 1860 times.
<br /><a href="search.php?query=xmlFree">xmlFree</a> 1854 times.
<br /><a href="search.php?query=example">example</a> 1784 times.
<br /><a href="search.php?query=install">install</a> 1763 times.
<br /><a href="search.php?query=parser">parser</a> 1715 times.
<br /><a href="search.php?query=xmlNewDoc">xmlNewDoc</a> 1695 times.
<br /><a href="search.php?query=namespace">namespace</a> 1693 times.
<br /><a href="search.php?query=xmlStrcmp">xmlStrcmp</a> 1564 times.
<br /><a href="search.php?query=xmlnode">xmlnode</a> 1558 times.
<br /><a href="search.php?query=parse">parse</a> 1517 times.
<br /><a href="search.php?query=memory">memory</a> 1484 times.
<br /><a href="search.php?query=dom">dom</a> 1457 times.
<br /><a href="search.php?query=XInclude">XInclude</a> 1444 times.
<br /><a href="search.php?query=entity">entity</a> 1423 times.
<br /><a href="search.php?query=xmlSaveFormatFile">xmlSaveFormatFile</a> 1390 times.
<br /><a href="search.php?query=xslt">xslt</a> 1361 times.
<br /><a href="search.php?query=attribute">attribute</a> 1360 times.
<br /><a href="search.php?query=xmlDocPtr">xmlDocPtr</a> 1350 times.
<br /><a href="search.php?query=xsd">xsd</a> 1319 times.
<br /><a href="search.php?query=xmlDocGetRootElement">xmlDocGetRootElement</a> 1285 times.
<br /><a href="search.php?query=validate">validate</a> 1270 times.
<br /><a href="search.php?query=validation">validation</a> 1234 times.
<br /><a href="search.php?query=tutorial">tutorial</a> 1140 times.
</p><p><a href="bugs.html">Daniel Veillard</a></p></td></tr></table></td></tr></table></td></tr></table></td></tr></table></td></tr></table></body></html>
<?xml version="1.0"?>
<!-- this stylesheet builds the API*.html , it works based on libxml2-refs.xml
-->
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:exsl="http://exslt.org/common"
extension-element-prefixes="exsl"
exclude-result-prefixes="exsl">
<!-- Import the rest of the site stylesheets -->
<xsl:import href="site.xsl"/>
<!-- Generate XHTML-1.0 transitional -->
<xsl:output method="xml" encoding="ISO-8859-1" indent="yes"
doctype-public="-//W3C//DTD XHTML 1.0 Transitional//EN"
doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"/>
<xsl:variable name="href_base" select="''"/>
<xsl:template name="statistics">
<h2> weekly statistics: </h2>
<p><xsl:value-of select="@total"/> total words,
<xsl:value-of select="@uniq"/> uniq words.</p>
<p> Top <xsl:value-of select="@nr"/> queries:</p>
</xsl:template>
<xsl:template match="query">
<br/><a href="search.php?query={string(.)}"><xsl:value-of
select="string(.)"/></a>
<xsl:text> </xsl:text><xsl:value-of select="@count"/> times.
</xsl:template>
<xsl:template match="queries">
<xsl:variable name="date" select="@date"/>
<xsl:variable name="title">Search statistics for <xsl:value-of select="$date"/></xsl:variable>
<xsl:document href="searches.html" method="xml" encoding="ISO-8859-1"
doctype-public="-//W3C//DTD XHTML 1.0 Transitional//EN"
doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<xsl:call-template name="style"/>
<xsl:element name="title">
<xsl:value-of select="$title"/>
</xsl:element>
</head>
<body bgcolor="#8b7765" text="#000000" link="#000000" vlink="#000000">
<xsl:call-template name="titlebox">
<xsl:with-param name="title" select="$title"/>
</xsl:call-template>
<table border="0" cellpadding="4" cellspacing="0" width="100%" align="center">
<tr>
<td bgcolor="#8b7765">
<table border="0" cellspacing="0" cellpadding="2" width="100%">
<tr>
<td valign="top" width="200" bgcolor="#8b7765">
<xsl:call-template name="toc"/>
</td>
<td valign="top" bgcolor="#8b7765">
<table border="0" cellspacing="0" cellpadding="1" width="100%">
<tr>
<td>
<table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000">
<tr>
<td>
<table border="0" cellpadding="3" cellspacing="1" width="100%">
<tr>
<td bgcolor="#fffacd">
<xsl:call-template name="statistics"/>
<p>
<xsl:apply-templates select="query"/>
</p>
<p><a href="{$href_base}bugs.html">Daniel Veillard</a></p>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
</table>
</body>
</html>
</xsl:document>
</xsl:template>
<xsl:template match="/">
<xsl:apply-templates select="queries"/>
</xsl:template>
</xsl:stylesheet>
<queries total='411778' uniq='9370' nr='50' date='20040408'>
<query count='11812'>libxml2</query>
<query count='10170'>libxml</query>
<query count='6172'>xpath</query>
<query count='5798'>schema</query>
<query count='5472'>xmllint</query>
<query count='5435'>XML</query>
<query count='4219'>xmlParseFile</query>
<query count='3935'>php</query>
<query count='3270'>DTD</query>
<query count='3101'>encoding</query>
<query count='3084'>xmlGetProp</query>
<query count='3074'>xsltproc</query>
<query count='2971'>download</query>
<query count='2917'>xmlNodeListGetString</query>
<query count='2789'>python</query>
<query count='2621'>SAX</query>
<query count='2472'>xmlParseMemory</query>
<query count='2385'>perl</query>
<query count='2318'>iconv</query>
<query count='2298'>error</query>
<query count='2255'>html</query>
<query count='2136'>xmlChar</query>
<query count='2055'>libxslt</query>
<query count='2020'>c++</query>
<query count='1928'>xmlNodePtr</query>
<query count='1918'>windows</query>
<query count='1891'>to</query>
<query count='1860'>node</query>
<query count='1854'>xmlFree</query>
<query count='1784'>example</query>
<query count='1763'>install</query>
<query count='1715'>parser</query>
<query count='1695'>xmlNewDoc</query>
<query count='1693'>namespace</query>
<query count='1564'>xmlStrcmp</query>
<query count='1558'>xmlnode</query>
<query count='1517'>parse</query>
<query count='1484'>memory</query>
<query count='1457'>dom</query>
<query count='1444'>XInclude</query>
<query count='1423'>entity</query>
<query count='1390'>xmlSaveFormatFile</query>
<query count='1361'>xslt</query>
<query count='1360'>attribute</query>
<query count='1350'>xmlDocPtr</query>
<query count='1319'>xsd</query>
<query count='1285'>xmlDocGetRootElement</query>
<query count='1270'>validate</query>
<query count='1234'>validation</query>
<query count='1140'>tutorial</query>
</queries>
<queries total='416435' uniq='9875' nr='50' date='20040408'>
<query count='11812'>libxml2</query>
<query count='10170'>libxml</query>
<query count='6172'>xpath</query>
<query count='5798'>schema</query>
<query count='5472'>xmllint</query>
<query count='5435'>XML</query>
<query count='4219'>xmlParseFile</query>
<query count='3935'>php</query>
<query count='3270'>DTD</query>
<query count='3101'>encoding</query>
<query count='3084'>xmlGetProp</query>
<query count='3074'>xsltproc</query>
<query count='2971'>download</query>
<query count='2917'>xmlNodeListGetString</query>
<query count='2789'>python</query>
<query count='2621'>SAX</query>
<query count='2472'>xmlParseMemory</query>
<query count='2385'>perl</query>
<query count='2318'>iconv</query>
<query count='2298'>error</query>
<query count='2255'>html</query>
<query count='2136'>xmlChar</query>
<query count='2055'>libxslt</query>
<query count='2020'>c++</query>
<query count='1928'>xmlNodePtr</query>
<query count='1918'>windows</query>
<query count='1891'>to</query>
<query count='1860'>node</query>
<query count='1854'>xmlFree</query>
<query count='1784'>example</query>
<query count='1763'>install</query>
<query count='1715'>parser</query>
<query count='1695'>xmlNewDoc</query>
<query count='1693'>namespace</query>
<query count='1564'>xmlStrcmp</query>
<query count='1558'>xmlnode</query>
<query count='1517'>parse</query>
<query count='1484'>memory</query>
<query count='1457'>dom</query>
<query count='1444'>XInclude</query>
<query count='1423'>entity</query>
<query count='1390'>xmlSaveFormatFile</query>
<query count='1361'>xslt</query>
<query count='1360'>attribute</query>
<query count='1350'>xmlDocPtr</query>
<query count='1319'>xsd</query>
<query count='1285'>xmlDocGetRootElement</query>
<query count='1270'>validate</query>
<query count='1234'>validation</query>
<query count='1140'>tutorial</query>
</queries>
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment