Commit 10ff7b93 authored by Carlos Garnacho's avatar Carlos Garnacho Committed by Carlos Garnacho

Add option for modules to discard a directory based on its contents.

2009-02-11  Carlos Garnacho  <carlos@imendio.com>

        Add option for modules to discard a directory based on its contents.

        * src/libtracker-common/tracker-module-config.[ch]
        (tracker_module_config_get_ignored_directories_with_content): Added
        function for new module configuration option.

        * src/trackerd/tracker-crawler.c: Modified to obey the new
        configuration option, now it must retrieve all contents for a
        directory before processing them in order to know whether it's
        filtered or not.

        * data/modules/*.module: Add default values for this option.


svn path=/trunk/; revision=2920
parent 014bd4b2
2009-02-11 Carlos Garnacho <carlos@imendio.com>
Add option for modules to discard a directory based on its contents.
* src/libtracker-common/tracker-module-config.[ch]
(tracker_module_config_get_ignored_directories_with_content): Added
function for new module configuration option.
* src/trackerd/tracker-crawler.c: Modified to obey the new
configuration option, now it must retrieve all contents for a
directory before processing them in order to know whether it's
filtered or not.
* data/modules/*.module: Add default values for this option.
2009-02-11 Carlos Garnacho <carlos@imendio.com> 2009-02-11 Carlos Garnacho <carlos@imendio.com>
* src/tracker-indexer/tracker-indexer.c (item_move): And add back that * src/tracker-indexer/tracker-indexer.c (item_move): And add back that
......
...@@ -9,6 +9,7 @@ RecurseDirectories=/usr/share/applications; ...@@ -9,6 +9,7 @@ RecurseDirectories=/usr/share/applications;
[Ignored] [Ignored]
Directories= Directories=
Files= Files=
DirectoriesWithContent=
[Index] [Index]
Service=Applications Service=Applications
......
...@@ -9,6 +9,7 @@ RecurseDirectories=$HOME/.evolution/mail/local/;$HOME/.evolution/mail/imap/ ...@@ -9,6 +9,7 @@ RecurseDirectories=$HOME/.evolution/mail/local/;$HOME/.evolution/mail/imap/
[Ignored] [Ignored]
Directories= Directories=
Files= Files=
DirectoriesWithContent=
[Index] [Index]
Service=EvolutionEmails Service=EvolutionEmails
......
...@@ -9,6 +9,7 @@ RecurseDirectories= ...@@ -9,6 +9,7 @@ RecurseDirectories=
[Ignored] [Ignored]
Directories=po;CVS;.svn;.git;core-dumps; Directories=po;CVS;.svn;.git;core-dumps;
Files=*~;*.o;*.la;*.lo;*.loT;*.in;*.csproj;*.m4;*.rej;*.gmo;*.orig;*.pc;*.omf;*.aux;*.tmp;*.po;*.vmdk;*.vm*;*.nvram;*.part;*.rcore.lzo;autom4te;conftest;confstat;Makefile;SCCS;litmain.sh;libtool;config.status;confdefs.h; Files=*~;*.o;*.la;*.lo;*.loT;*.in;*.csproj;*.m4;*.rej;*.gmo;*.orig;*.pc;*.omf;*.aux;*.tmp;*.po;*.vmdk;*.vm*;*.nvram;*.part;*.rcore.lzo;autom4te;conftest;confstat;Makefile;SCCS;litmain.sh;libtool;config.status;confdefs.h;
DirectoriesWithContent=backup.metadata;
[Index] [Index]
Service=Files Service=Files
......
...@@ -9,6 +9,7 @@ RecurseDirectories=$HOME/.gaim/logs;$HOME/.purple/logs; ...@@ -9,6 +9,7 @@ RecurseDirectories=$HOME/.gaim/logs;$HOME/.purple/logs;
[Ignored] [Ignored]
Directories= Directories=
Files= Files=
DirectoriesWithContent=
[Index] [Index]
Service=GaimConversations Service=GaimConversations
......
...@@ -48,6 +48,7 @@ typedef struct { ...@@ -48,6 +48,7 @@ typedef struct {
/* Ignored */ /* Ignored */
GHashTable *ignored_directories; GHashTable *ignored_directories;
GHashTable *ignored_files; GHashTable *ignored_files;
GHashTable *ignored_directories_with_content;
GList *ignored_directory_patterns; GList *ignored_directory_patterns;
GList *ignored_file_patterns; GList *ignored_file_patterns;
...@@ -96,6 +97,7 @@ module_destroy_notify (gpointer data) ...@@ -96,6 +97,7 @@ module_destroy_notify (gpointer data)
g_hash_table_unref (mc->ignored_files); g_hash_table_unref (mc->ignored_files);
g_hash_table_unref (mc->ignored_directories); g_hash_table_unref (mc->ignored_directories);
g_hash_table_unref (mc->ignored_directories_with_content);
g_hash_table_unref (mc->monitor_recurse_directories); g_hash_table_unref (mc->monitor_recurse_directories);
g_hash_table_unref (mc->monitor_directories); g_hash_table_unref (mc->monitor_directories);
...@@ -461,6 +463,10 @@ load_file (const gchar *filename) ...@@ -461,6 +463,10 @@ load_file (const gchar *filename)
"Files", "Files",
FALSE, FALSE,
FALSE); FALSE);
mc->ignored_directories_with_content = load_string_list (key_file,
GROUP_IGNORED,
"DirectoriesWithContent",
FALSE, FALSE);
/* Index */ /* Index */
mc->index_service = load_string (key_file, mc->index_service = load_string (key_file,
...@@ -769,6 +775,19 @@ tracker_module_config_get_ignored_files (const gchar *name) ...@@ -769,6 +775,19 @@ tracker_module_config_get_ignored_files (const gchar *name)
return g_hash_table_get_keys (mc->ignored_files); return g_hash_table_get_keys (mc->ignored_files);
} }
GList *
tracker_module_config_get_ignored_directories_with_content (const gchar *name)
{
ModuleConfig *mc;
g_return_val_if_fail (name != NULL, NULL);
mc = g_hash_table_lookup (modules, name);
g_return_val_if_fail (mc, NULL);
return g_hash_table_get_keys (mc->ignored_directories_with_content);
}
const gchar * const gchar *
tracker_module_config_get_index_service (const gchar *name) tracker_module_config_get_index_service (const gchar *name)
{ {
......
...@@ -42,6 +42,7 @@ GList * tracker_module_config_get_monitor_recurse_directories (const gchar ...@@ -42,6 +42,7 @@ GList * tracker_module_config_get_monitor_recurse_directories (const gchar
GList * tracker_module_config_get_ignored_directories (const gchar *name); GList * tracker_module_config_get_ignored_directories (const gchar *name);
GList * tracker_module_config_get_ignored_files (const gchar *name); GList * tracker_module_config_get_ignored_files (const gchar *name);
GList * tracker_module_config_get_ignored_directories_with_content (const gchar *name);
const gchar *tracker_module_config_get_index_service (const gchar *name); const gchar *tracker_module_config_get_index_service (const gchar *name);
GList * tracker_module_config_get_index_mime_types (const gchar *name); GList * tracker_module_config_get_index_mime_types (const gchar *name);
......
...@@ -88,6 +88,7 @@ struct _TrackerCrawlerPrivate { ...@@ -88,6 +88,7 @@ struct _TrackerCrawlerPrivate {
GList *ignored_directory_patterns; GList *ignored_directory_patterns;
GList *ignored_file_patterns; GList *ignored_file_patterns;
GList *index_file_patterns; GList *index_file_patterns;
GList *ignored_directories_with_content;
/* Legacy NoWatchDirectoryRoots */ /* Legacy NoWatchDirectoryRoots */
GSList *no_watch_directory_roots; GSList *no_watch_directory_roots;
...@@ -115,9 +116,15 @@ enum { ...@@ -115,9 +116,15 @@ enum {
LAST_SIGNAL LAST_SIGNAL
}; };
typedef struct {
GFile *child;
gboolean is_dir;
} EnumeratorChildData;
typedef struct { typedef struct {
TrackerCrawler *crawler; TrackerCrawler *crawler;
GFile *parent; GFile *parent;
GHashTable *children;
} EnumeratorData; } EnumeratorData;
static void tracker_crawler_finalize (GObject *object); static void tracker_crawler_finalize (GObject *object);
...@@ -224,6 +231,10 @@ tracker_crawler_finalize (GObject *object) ...@@ -224,6 +231,10 @@ tracker_crawler_finalize (GObject *object)
g_list_free (priv->ignored_file_patterns); g_list_free (priv->ignored_file_patterns);
} }
if (priv->ignored_directories_with_content) {
g_list_free (priv->ignored_directories_with_content);
}
/* Don't free the 'current_' variant of these, they are just /* Don't free the 'current_' variant of these, they are just
* place holders so we know our status. * place holders so we know our status.
*/ */
...@@ -275,6 +286,8 @@ tracker_crawler_new (TrackerConfig *config, ...@@ -275,6 +286,8 @@ tracker_crawler_new (TrackerConfig *config,
tracker_module_config_get_ignored_file_patterns (module_name); tracker_module_config_get_ignored_file_patterns (module_name);
crawler->private->index_file_patterns = crawler->private->index_file_patterns =
tracker_module_config_get_index_file_patterns (module_name); tracker_module_config_get_index_file_patterns (module_name);
crawler->private->ignored_directories_with_content =
tracker_module_config_get_ignored_directories_with_content (module_name);
/* Should we use module config paths? If true, when we /* Should we use module config paths? If true, when we
* _start() the module config paths are used to import paths * _start() the module config paths are used to import paths
...@@ -444,8 +457,6 @@ add_directory (TrackerCrawler *crawler, ...@@ -444,8 +457,6 @@ add_directory (TrackerCrawler *crawler,
path, path,
crawler->private->enumerations); crawler->private->enumerations);
} else { } else {
crawler->private->directories_found++;
g_debug ("Found :'%s' (%d)", g_debug ("Found :'%s' (%d)",
path, path,
crawler->private->enumerations); crawler->private->enumerations);
...@@ -458,19 +469,16 @@ add_directory (TrackerCrawler *crawler, ...@@ -458,19 +469,16 @@ add_directory (TrackerCrawler *crawler,
static void static void
process_file (TrackerCrawler *crawler, process_file (TrackerCrawler *crawler,
const gchar *module_name,
GFile *file) GFile *file)
{ {
g_signal_emit (crawler, signals[PROCESSING_FILE], 0, module_name, file); g_signal_emit (crawler, signals[PROCESSING_FILE], 0,
crawler->private->module_name, file);
} }
static void static void
process_directory (TrackerCrawler *crawler, process_directory (TrackerCrawler *crawler,
const gchar *module_name,
GFile *file) GFile *file)
{ {
g_signal_emit (crawler, signals[PROCESSING_DIRECTORY], 0, module_name, file);
file_enumerate_children (crawler, file); file_enumerate_children (crawler, file);
} }
...@@ -500,7 +508,7 @@ process_func (gpointer data) ...@@ -500,7 +508,7 @@ process_func (gpointer data)
file = g_queue_pop_head (priv->files); file = g_queue_pop_head (priv->files);
if (file) { if (file) {
process_file (crawler, priv->module_name, file); process_file (crawler, file);
g_object_unref (file); g_object_unref (file);
return TRUE; return TRUE;
...@@ -510,7 +518,7 @@ process_func (gpointer data) ...@@ -510,7 +518,7 @@ process_func (gpointer data)
file = g_queue_pop_head (priv->directories); file = g_queue_pop_head (priv->directories);
if (file) { if (file) {
process_directory (crawler, priv->module_name, file); process_directory (crawler, file);
g_object_unref (file); g_object_unref (file);
return TRUE; return TRUE;
...@@ -612,6 +620,27 @@ process_func (gpointer data) ...@@ -612,6 +620,27 @@ process_func (gpointer data)
return FALSE; return FALSE;
} }
static EnumeratorChildData *
enumerator_child_data_new (GFile *child,
gboolean is_dir)
{
EnumeratorChildData *cd;
cd = g_slice_new (EnumeratorChildData);
cd->child = g_object_ref (child);
cd->is_dir = is_dir;
return cd;
}
static void
enumerator_child_data_free (EnumeratorChildData *cd)
{
g_object_unref (cd->child);
g_slice_free (EnumeratorChildData, cd);
}
static EnumeratorData * static EnumeratorData *
enumerator_data_new (TrackerCrawler *crawler, enumerator_data_new (TrackerCrawler *crawler,
GFile *parent) GFile *parent)
...@@ -619,17 +648,80 @@ enumerator_data_new (TrackerCrawler *crawler, ...@@ -619,17 +648,80 @@ enumerator_data_new (TrackerCrawler *crawler,
EnumeratorData *ed; EnumeratorData *ed;
ed = g_slice_new0 (EnumeratorData); ed = g_slice_new0 (EnumeratorData);
ed->crawler = g_object_ref (crawler); ed->crawler = g_object_ref (crawler);
ed->parent = g_object_ref (parent); ed->parent = g_object_ref (parent);
ed->children = g_hash_table_new_full (g_str_hash,
g_str_equal,
(GDestroyNotify) g_free,
(GDestroyNotify) enumerator_child_data_free);
return ed; return ed;
} }
static void
enumerator_data_add_child (EnumeratorData *ed,
const gchar *name,
GFile *file,
gboolean is_dir)
{
g_hash_table_insert (ed->children,
g_strdup (name),
enumerator_child_data_new (file, is_dir));
}
static void
enumerator_data_process (EnumeratorData *ed)
{
TrackerCrawler *crawler;
GHashTableIter iter;
EnumeratorChildData *cd;
GList *l;
crawler = ed->crawler;
/* Ignore directory if its contents match something we should ignore */
for (l = crawler->private->ignored_directories_with_content; l; l = l->next) {
if (g_hash_table_lookup (ed->children, l->data)) {
gchar *path;
path = g_file_get_path (ed->parent);
crawler->private->directories_ignored++;
g_debug ("Ignoring directory '%s' since it contains a file named '%s'", path, (gchar *) l->data);
g_free (path);
return;
}
}
crawler->private->directories_found++;
g_signal_emit (crawler, signals[PROCESSING_DIRECTORY], 0,
crawler->private->module_name, ed->parent);
g_hash_table_iter_init (&iter, ed->children);
while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &cd)) {
if (cd->is_dir) {
/* This is a bit of a hack, but we assume this is a
* recursive lookup because the current non-recursive
* path is NULL, meaning they have all been traversed
* already.
*/
if (crawler->private->paths_are_done) {
add_directory (crawler, cd->child);
}
} else {
add_file (crawler, cd->child);
}
}
}
static void static void
enumerator_data_free (EnumeratorData *ed) enumerator_data_free (EnumeratorData *ed)
{ {
g_object_unref (ed->parent); g_object_unref (ed->parent);
g_object_unref (ed->crawler); g_object_unref (ed->crawler);
g_hash_table_destroy (ed->children);
g_slice_free (EnumeratorData, ed); g_slice_free (EnumeratorData, ed);
} }
...@@ -681,6 +773,7 @@ file_enumerate_next_cb (GObject *object, ...@@ -681,6 +773,7 @@ file_enumerate_next_cb (GObject *object,
g_list_free (files); g_list_free (files);
} }
enumerator_data_process (ed);
enumerator_data_free (ed); enumerator_data_free (ed);
g_file_enumerator_close_async (enumerator, g_file_enumerator_close_async (enumerator,
G_PRIORITY_DEFAULT, G_PRIORITY_DEFAULT,
...@@ -693,21 +786,16 @@ file_enumerate_next_cb (GObject *object, ...@@ -693,21 +786,16 @@ file_enumerate_next_cb (GObject *object,
} }
for (l = files; l; l = l->next) { for (l = files; l; l = l->next) {
const gchar *child_name;
gboolean is_dir;
info = l->data; info = l->data;
child = g_file_get_child (parent, g_file_info_get_name (info));
if (g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY) { child_name = g_file_info_get_name (info);
/* This is a bit of a hack, but we assume this is a child = g_file_get_child (parent, child_name);
* recursive lookup because the current non-recursive is_dir = (g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY);
* path is NULL, meaning they have all been traversed
* already. enumerator_data_add_child (ed, child_name, child, is_dir);
*/
if (crawler->private->paths_are_done) {
add_directory (crawler, child);
}
} else {
add_file (crawler, child);
}
g_object_unref (child); g_object_unref (child);
g_object_unref (info); g_object_unref (info);
...@@ -742,7 +830,8 @@ file_enumerate_children_cb (GObject *file, ...@@ -742,7 +830,8 @@ file_enumerate_children_cb (GObject *file,
GFile *parent; GFile *parent;
parent = G_FILE (file); parent = G_FILE (file);
crawler = TRACKER_CRAWLER (user_data); ed = (EnumeratorData *) user_data;
crawler = ed->crawler;
enumerator = g_file_enumerate_children_finish (parent, result, NULL); enumerator = g_file_enumerate_children_finish (parent, result, NULL);
if (!enumerator) { if (!enumerator) {
...@@ -750,8 +839,6 @@ file_enumerate_children_cb (GObject *file, ...@@ -750,8 +839,6 @@ file_enumerate_children_cb (GObject *file,
return; return;
} }
ed = enumerator_data_new (crawler, parent);
/* Start traversing the directory's files */ /* Start traversing the directory's files */
file_enumerate_next (enumerator, ed); file_enumerate_next (enumerator, ed);
} }
...@@ -760,15 +847,19 @@ static void ...@@ -760,15 +847,19 @@ static void
file_enumerate_children (TrackerCrawler *crawler, file_enumerate_children (TrackerCrawler *crawler,
GFile *file) GFile *file)
{ {
EnumeratorData *ed;
crawler->private->enumerations++; crawler->private->enumerations++;
ed = enumerator_data_new (crawler, file);
g_file_enumerate_children_async (file, g_file_enumerate_children_async (file,
FILE_ATTRIBUTES, FILE_ATTRIBUTES,
G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
G_PRIORITY_DEFAULT, G_PRIORITY_DEFAULT,
NULL, NULL,
file_enumerate_children_cb, file_enumerate_children_cb,
crawler); ed);
} }
static GSList * static GSList *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment