Commit ee7c6689 authored by Carlos Garnacho's avatar Carlos Garnacho

tracker-extract: store as many words as the FTS config says.

parent 51a2d15a
......@@ -253,6 +253,8 @@ tracker_extract_SOURCES = \
tracker-dbus.h \
tracker-extract.c \
tracker-extract.h \
tracker-fts-config.c \
tracker-fts.config.h \
tracker-main.c \
tracker-main.h
......
......@@ -250,6 +250,8 @@ extract_msoffice (const gchar *uri,
GsfInput *stream;
gchar *filename, *content;
gboolean rdf_type_added = FALSE;
TrackerFTSConfig *fts_config;
guint n_words;
gsf_init ();
......@@ -327,7 +329,9 @@ extract_msoffice (const gchar *uri,
g_object_unref (stream);
}
content = extract_content (uri, 1000);
fts_config = tracker_main_get_fts_config ();
n_words = tracker_fts_config_get_max_words_to_index (fts_config);
content = extract_content (uri, n_words);
if (content) {
tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
......
......@@ -111,6 +111,8 @@ extract_oasis (const gchar *uri,
gchar *xml;
gchar *filename = g_filename_from_uri (uri, NULL, NULL);
gchar *content;
TrackerFTSConfig *fts_config;
guint n_words;
ODTParseInfo info = {
metadata,
-1,
......@@ -144,7 +146,9 @@ extract_oasis (const gchar *uri,
g_free (xml);
}
content = extract_content (filename, 1000);
fts_config = tracker_main_get_fts_config ();
n_words = tracker_fts_config_get_max_words_to_index (fts_config);
content = extract_content (filename, n_words);
if (content) {
tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
......
......@@ -134,6 +134,8 @@ extract_pdf (const gchar *uri,
gchar *metadata_xml = NULL;
GTime creation_date;
GError *error = NULL;
TrackerFTSConfig *fts_config;
guint n_words;
g_type_init ();
......@@ -347,8 +349,9 @@ extract_pdf (const gchar *uri,
}
}
/* FIXME: Fixed word limit at the moment */
content = extract_content (document, 1000);
fts_config = tracker_main_get_fts_config ();
n_words = tracker_fts_config_get_max_words_to_index (fts_config);
content = extract_content (document, n_words);
if (content) {
tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
......
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* Copyright (C) 2009, Nokia (urho.konttori@nokia.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "config.h"
#include <string.h>
#include <stdlib.h>
#include <glib.h>
#include <gio/gio.h>
#include <libtracker-common/tracker-keyfile-object.h>
#include "tracker-fts-config.h"
#define TRACKER_FTS_CONFIG_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), TRACKER_TYPE_FTS_CONFIG, TrackerFTSConfigPrivate))
/* GKeyFile defines */
#define GROUP_INDEXING "Indexing"
/* Default values */
#define DEFAULT_MIN_WORD_LENGTH 3 /* 0->30 */
#define DEFAULT_MAX_WORD_LENGTH 30 /* 0->200 */
#define DEFAULT_MAX_WORDS_TO_INDEX 10000
typedef struct {
/* Indexing */
gint min_word_length;
gint max_word_length;
gint max_words_to_index;
} TrackerFTSConfigPrivate;
typedef struct {
GType type;
const gchar *property;
const gchar *group;
const gchar *key;
} ObjectToKeyFile;
static void config_set_property (GObject *object,
guint param_id,
const GValue *value,
GParamSpec *pspec);
static void config_get_property (GObject *object,
guint param_id,
GValue *value,
GParamSpec *pspec);
static void config_finalize (GObject *object);
static void config_constructed (GObject *object);
static void config_create_with_defaults (TrackerFTSConfig *config,
GKeyFile *key_file,
gboolean overwrite);
static void config_load (TrackerFTSConfig *config);
enum {
PROP_0,
/* Indexing */
PROP_MIN_WORD_LENGTH,
PROP_MAX_WORD_LENGTH,
/* Performance */
PROP_MAX_WORDS_TO_INDEX,
};
static ObjectToKeyFile conversions[] = {
{ G_TYPE_INT, "min-word-length", GROUP_INDEXING, "MinWordLength" },
{ G_TYPE_INT, "max-word-length", GROUP_INDEXING, "MaxWordLength" },
{ G_TYPE_INT, "max-words-to-index", GROUP_INDEXING, "MaxWordsToIndex" },
};
G_DEFINE_TYPE (TrackerFTSConfig, tracker_fts_config, TRACKER_TYPE_CONFIG_FILE);
static void
tracker_fts_config_class_init (TrackerFTSConfigClass *klass)
{
GObjectClass *object_class = G_OBJECT_CLASS (klass);
object_class->set_property = config_set_property;
object_class->get_property = config_get_property;
object_class->finalize = config_finalize;
object_class->constructed = config_constructed;
/* Indexing */
g_object_class_install_property (object_class,
PROP_MIN_WORD_LENGTH,
g_param_spec_int ("min-word-length",
"Minimum word length",
" Set the minimum length of words to index (0->30, default=3)",
0,
30,
DEFAULT_MIN_WORD_LENGTH,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
g_object_class_install_property (object_class,
PROP_MAX_WORD_LENGTH,
g_param_spec_int ("max-word-length",
"Maximum word length",
" Set the maximum length of words to index (0->200, default=30)",
0,
200, /* Is this a reasonable limit? */
DEFAULT_MAX_WORD_LENGTH,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
g_object_class_install_property (object_class,
PROP_MAX_WORDS_TO_INDEX,
g_param_spec_int ("max-words-to-index",
"Maximum words to index",
" Maximum unique words to index from a file's content (default=10000)",
0,
G_MAXINT,
DEFAULT_MAX_WORDS_TO_INDEX,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
g_type_class_add_private (object_class, sizeof (TrackerFTSConfigPrivate));
}
static void
tracker_fts_config_init (TrackerFTSConfig *object)
{
}
static void
config_set_property (GObject *object,
guint param_id,
const GValue *value,
GParamSpec *pspec)
{
switch (param_id) {
/* Indexing */
case PROP_MIN_WORD_LENGTH:
tracker_fts_config_set_min_word_length (TRACKER_FTS_CONFIG (object),
g_value_get_int (value));
break;
case PROP_MAX_WORD_LENGTH:
tracker_fts_config_set_max_word_length (TRACKER_FTS_CONFIG (object),
g_value_get_int (value));
break;
case PROP_MAX_WORDS_TO_INDEX:
tracker_fts_config_set_max_words_to_index (TRACKER_FTS_CONFIG (object),
g_value_get_int (value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
break;
};
}
static void
config_get_property (GObject *object,
guint param_id,
GValue *value,
GParamSpec *pspec)
{
TrackerFTSConfigPrivate *priv;
priv = TRACKER_FTS_CONFIG_GET_PRIVATE (object);
switch (param_id) {
/* Indexing */
case PROP_MIN_WORD_LENGTH:
g_value_set_int (value, priv->min_word_length);
break;
case PROP_MAX_WORD_LENGTH:
g_value_set_int (value, priv->max_word_length);
break;
case PROP_MAX_WORDS_TO_INDEX:
g_value_set_int (value, priv->max_words_to_index);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
break;
};
}
static void
config_finalize (GObject *object)
{
/* For now we do nothing here, we left this override in for
* future expansion.
*/
(G_OBJECT_CLASS (tracker_fts_config_parent_class)->finalize) (object);
}
static void
config_constructed (GObject *object)
{
(G_OBJECT_CLASS (tracker_fts_config_parent_class)->constructed) (object);
config_load (TRACKER_FTS_CONFIG (object));
}
static void
config_create_with_defaults (TrackerFTSConfig *config,
GKeyFile *key_file,
gboolean overwrite)
{
gint i;
g_message ("Loading defaults into GKeyFile...");
for (i = 0; i < G_N_ELEMENTS (conversions); i++) {
gboolean has_key;
has_key = g_key_file_has_key (key_file,
conversions[i].group,
conversions[i].key,
NULL);
if (!overwrite && has_key) {
continue;
}
switch (conversions[i].type) {
case G_TYPE_INT:
g_key_file_set_integer (key_file,
conversions[i].group,
conversions[i].key,
tracker_keyfile_object_default_int (config,
conversions[i].property));
break;
default:
g_assert_not_reached ();
break;
}
g_key_file_set_comment (key_file,
conversions[i].group,
conversions[i].key,
tracker_keyfile_object_blurb (config,
conversions[i].property),
NULL);
}
}
static void
config_load (TrackerFTSConfig *config)
{
TrackerConfigFile *file;
gint i;
file = TRACKER_CONFIG_FILE (config);
config_create_with_defaults (config, file->key_file, FALSE);
if (!file->file_exists) {
tracker_config_file_save (file);
}
for (i = 0; i < G_N_ELEMENTS (conversions); i++) {
gboolean has_key;
has_key = g_key_file_has_key (file->key_file,
conversions[i].group,
conversions[i].key,
NULL);
switch (conversions[i].type) {
case G_TYPE_INT:
tracker_keyfile_object_load_int (G_OBJECT (file),
conversions[i].property,
file->key_file,
conversions[i].group,
conversions[i].key);
break;
default:
g_assert_not_reached ();
break;
}
}
}
static gboolean
config_save (TrackerFTSConfig *config)
{
TrackerConfigFile *file;
gint i;
file = TRACKER_CONFIG_FILE (config);
if (!file->key_file) {
g_critical ("Could not save config, GKeyFile was NULL, has the config been loaded?");
return FALSE;
}
g_message ("Setting details to GKeyFile object...");
for (i = 0; i < G_N_ELEMENTS (conversions); i++) {
switch (conversions[i].type) {
case G_TYPE_INT:
tracker_keyfile_object_save_int (file,
conversions[i].property,
file->key_file,
conversions[i].group,
conversions[i].key);
break;
default:
g_assert_not_reached ();
break;
}
}
return tracker_config_file_save (TRACKER_CONFIG_FILE (config));
}
TrackerFTSConfig *
tracker_fts_config_new (void)
{
return g_object_new (TRACKER_TYPE_FTS_CONFIG,
"domain", "tracker-fts",
NULL);
}
gboolean
tracker_fts_config_save (TrackerFTSConfig *config)
{
g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), FALSE);
return config_save (config);
}
gint
tracker_fts_config_get_min_word_length (TrackerFTSConfig *config)
{
TrackerFTSConfigPrivate *priv;
g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_MIN_WORD_LENGTH);
priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
return priv->min_word_length;
}
gint
tracker_fts_config_get_max_word_length (TrackerFTSConfig *config)
{
TrackerFTSConfigPrivate *priv;
g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_MAX_WORD_LENGTH);
priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
return priv->max_word_length;
}
gint
tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config)
{
TrackerFTSConfigPrivate *priv;
g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_MAX_WORDS_TO_INDEX);
priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
return priv->max_words_to_index;
}
void
tracker_fts_config_set_min_word_length (TrackerFTSConfig *config,
gint value)
{
TrackerFTSConfigPrivate *priv;
g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
if (!tracker_keyfile_object_validate_int (config, "min-word-length", value)) {
return;
}
priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
priv->min_word_length = value;
g_object_notify (G_OBJECT (config), "min-word-length");
}
void
tracker_fts_config_set_max_word_length (TrackerFTSConfig *config,
gint value)
{
TrackerFTSConfigPrivate *priv;
g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
if (!tracker_keyfile_object_validate_int (config, "max-word-length", value)) {
return;
}
priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
priv->max_word_length = value;
g_object_notify (G_OBJECT (config), "max-word-length");
}
void
tracker_fts_config_set_max_words_to_index (TrackerFTSConfig *config,
gint value)
{
TrackerFTSConfigPrivate *priv;
g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
if (!tracker_keyfile_object_validate_int (config, "max-words-to-index", value)) {
return;
}
priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
priv->max_words_to_index = value;
g_object_notify (G_OBJECT (config), "max-words-to-index");
}
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* Copyright (C) 2009, Nokia (urho.konttori@nokia.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __TRACKER_FTS_CONFIG_H__
#define __TRACKER_FTS_CONFIG_H__
#include <glib-object.h>
#include <libtracker-common/tracker-config-file.h>
G_BEGIN_DECLS
#define TRACKER_TYPE_FTS_CONFIG (tracker_fts_config_get_type ())
#define TRACKER_FTS_CONFIG(o) (G_TYPE_CHECK_INSTANCE_CAST ((o), TRACKER_TYPE_FTS_CONFIG, TrackerFTSConfig))
#define TRACKER_FTS_CONFIG_CLASS(k) (G_TYPE_CHECK_CLASS_CAST ((k), TRACKER_TYPE_FTS_CONFIG, TrackerFTSConfigClass))
#define TRACKER_IS_FTS_CONFIG(o) (G_TYPE_CHECK_INSTANCE_TYPE ((o), TRACKER_TYPE_FTS_CONFIG))
#define TRACKER_IS_FTS_CONFIG_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), TRACKER_TYPE_FTS_CONFIG))
#define TRACKER_FTS_CONFIG_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), TRACKER_TYPE_FTS_CONFIG, TrackerFTSConfigClass))
typedef struct TrackerFTSConfig TrackerFTSConfig;
typedef struct TrackerFTSConfigClass TrackerFTSConfigClass;
struct TrackerFTSConfig {
TrackerConfigFile parent;
};
struct TrackerFTSConfigClass {
TrackerConfigFileClass parent_class;
};
GType tracker_fts_config_get_type (void) G_GNUC_CONST;
TrackerFTSConfig *tracker_fts_config_new (void);
gboolean tracker_fts_config_save (TrackerFTSConfig *config);
gint tracker_fts_config_get_min_word_length (TrackerFTSConfig *config);
gint tracker_fts_config_get_max_word_length (TrackerFTSConfig *config);
gint tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config);
void tracker_fts_config_set_min_word_length (TrackerFTSConfig *config,
gint value);
void tracker_fts_config_set_max_word_length (TrackerFTSConfig *config,
gint value);
void tracker_fts_config_set_max_words_to_index (TrackerFTSConfig *config,
gint value);
G_END_DECLS
#endif /* __TRACKER_FTS_CONFIG_H__ */
......@@ -74,6 +74,7 @@ static gboolean disable_shutdown;
static gint verbosity = -1;
static gchar *filename;
static gchar *mime_type;
static TrackerFTSConfig *fts_config;
static GOptionEntry entries[] = {
{ "version", 'V', 0,
......@@ -254,6 +255,16 @@ log_handler (const gchar *domain,
}
}
TrackerFTSConfig *
tracker_main_get_fts_config (void)
{
if (G_UNLIKELY (!fts_config)) {
fts_config = tracker_fts_config_new ();
}
return fts_config;
}
int
main (int argc, char *argv[])
{
......
......@@ -26,6 +26,7 @@
#include <libtracker-common/tracker-storage.h>
#include <libtracker-common/tracker-statement-list.h>
#include "tracker-fts-config.h"
G_BEGIN_DECLS
......@@ -48,6 +49,8 @@ TrackerExtractData *tracker_get_extract_data (void);
*/
void tracker_main_quit_timeout_reset (void);
TrackerFTSConfig *tracker_main_get_fts_config (void);
G_END_DECLS
#endif /* __TRACKER_MAIN_H__ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment