tracker-miner-fs.c 134 KB
Newer Older
1
/*
2
 * Copyright (C) 2009, Nokia <ivan.frade@nokia.com>
3 4
 *
 * This library is free software; you can redistribute it and/or
Martyn Russell's avatar
Martyn Russell committed
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
Martyn Russell's avatar
Martyn Russell committed
7
 * version 2.1 of the License, or (at your option) any later version.
8 9 10 11
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Martyn Russell's avatar
Martyn Russell committed
12
 * Lesser General Public License for more details.
13
 *
Martyn Russell's avatar
Martyn Russell committed
14
 * You should have received a copy of the GNU Lesser General Public
15 16 17 18 19 20 21
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 */

#include "config.h"

22
#include <libtracker-common/tracker-common.h>
23

24
#include "tracker-crawler.h"
25
#include "tracker-miner-fs.h"
26 27
#include "tracker-media-art.h"
#include "tracker-monitor.h"
28
#include "tracker-utils.h"
29
#include "tracker-thumbnailer.h"
30
#include "tracker-priority-queue.h"
31
#include "tracker-task-pool.h"
32
#include "tracker-sparql-buffer.h"
33
#include "tracker-file-notifier.h"
34

35
/* If defined will print the tree from GNode while running */
36
#ifdef CRAWLED_TREE_ENABLE_TRACE
37
#warning Tree debugging traces enabled
38
#endif /* CRAWLED_TREE_ENABLE_TRACE */
39

40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
/* If defined will print push/pop actions on queues */
#ifdef EVENT_QUEUE_ENABLE_TRACE
#warning Event Queue traces enabled
#define EVENT_QUEUE_LOG_PREFIX "[Event Queues] "
#define EVENT_QUEUE_STATUS_TIMEOUT_SECS 30
#define trace_eq(message, ...) g_debug (EVENT_QUEUE_LOG_PREFIX message, ##__VA_ARGS__)
#define trace_eq_action(pushed, queue_name, position, gfile1, gfile2, reason) \
	do { \
		gchar *uri1 = g_file_get_uri (gfile1); \
		gchar *uri2 = gfile2 ? g_file_get_uri (gfile2) : NULL; \
		g_debug ("%s%s '%s%s%s' %s %s of queue '%s'%s%s", \
		         EVENT_QUEUE_LOG_PREFIX, \
		         pushed ? "Pushed" : "Popped", \
		         uri1, \
		         uri2 ? "->" : "", \
		         uri2 ? uri2 : "", \
		         pushed ? "to" : "from", \
		         position, \
		         queue_name, \
		         reason ? ": " : "", \
		         reason ? reason : ""); \
		g_free (uri1); \
		g_free (uri2); \
	} while (0)
#define trace_eq_push_tail(queue_name, gfile, reason)	  \
	trace_eq_action (TRUE, queue_name, "tail", gfile, NULL, reason)
#define trace_eq_push_head(queue_name, gfile, reason)	  \
	trace_eq_action (TRUE, queue_name, "head", gfile, NULL, reason)
#define trace_eq_push_tail_2(queue_name, gfile1, gfile2, reason)	  \
	trace_eq_action (TRUE, queue_name, "tail", gfile1, gfile2, reason)
#define trace_eq_push_head_2(queue_name, gfile1, gfile2, reason)	  \
	trace_eq_action (TRUE, queue_name, "head", gfile1, gfile2, reason)
#define trace_eq_pop_head(queue_name, gfile)	  \
	trace_eq_action (FALSE, queue_name, "head", gfile, NULL, NULL)
#define trace_eq_pop_head_2(queue_name, gfile1, gfile2)	  \
	trace_eq_action (FALSE, queue_name, "head", gfile1, gfile2, NULL)
static gboolean miner_fs_queues_status_trace_timeout_cb (gpointer data);
#else
#define trace_eq(...)
#define trace_eq_push_tail(...)
#define trace_eq_push_head(...)
#define trace_eq_push_tail_2(...)
#define trace_eq_push_head_2(...)
#define trace_eq_pop_head(...)
#define trace_eq_pop_head_2(...)
#endif /* EVENT_QUEUE_ENABLE_TRACE */

87 88 89 90 91
/* Number of times a GFile can be re-queued before it's dropped for
 * whatever reason to avoid infinite loops.
*/
#define REENTRY_MAX 2

92 93
/* Default processing pool limits to be set */
#define DEFAULT_WAIT_POOL_LIMIT 1
94
#define DEFAULT_READY_POOL_LIMIT 1
95

96 97 98 99 100 101
/* Put tasks processing at a lower priority so other events
 * (timeouts, monitor events, etc...) are guaranteed to be
 * dispatched promptly.
 */
#define TRACKER_TASK_PRIORITY G_PRIORITY_DEFAULT_IDLE + 10

102 103 104
/**
 * SECTION:tracker-miner-fs
 * @short_description: Abstract base class for filesystem miners
105
 * @include: libtracker-miner/tracker-miner.h
106 107
 *
 * #TrackerMinerFS is an abstract base class for miners that collect data
108 109 110 111 112 113 114 115
 * from a filesystem where parent/child relationships need to be
 * inserted into the database correctly with queue management.
 *
 * All the filesystem crawling and monitoring is abstracted away,
 * leaving to implementations the decisions of what directories/files
 * should it process, and the actual data extraction.
 *
 * Example creating a TrackerMinerFS with our own file system root and
116
 * data provider.
117 118 119 120 121 122 123 124 125 126
 *
 * First create our class and base it on TrackerMinerFS:
 * |[
 * G_DEFINE_TYPE_WITH_CODE (MyMinerFiles, my_miner_files, TRACKER_TYPE_MINER_FS,
 *                          G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
 *                                                 my_miner_files_initable_iface_init))
 * ]|
 *
 * Later in our class creation function, we are supplying the
 * arguments we want. In this case, the 'root' is a #GFile pointing to
127 128 129 130
 * a root URI location (for example 'file:///') and 'data_provider' is a
 * #TrackerDataProvider used to enumerate 'root' and return children it
 * finds. If 'data_provider' is %NULL (the default), then a
 * #TrackerFileDataProvider is created automatically.
131 132 133 134 135 136 137
 * |[
 * // Note that only 'name' is mandatory
 * miner = g_initable_new (MY_TYPE_MINER_FILES,
 *                         NULL,
 *                         error,
 *                         "name", "MyMinerFiles",
 *                         "root", root,
138
 *                         "data-provider", data_provider,
139 140 141 142
 *                         "processing-pool-wait-limit", 10,
 *                         "processing-pool-ready-limit", 100,
 *                         NULL);
 * ]|
143 144
 **/

145
#define TRACKER_MINER_FS_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_MINER_FS, TrackerMinerFSPrivate))
146

147
typedef struct {
148 149
	GFile *file;
	GFile *source_file;
150
} ItemMovedData;
151

152
typedef struct {
153 154
	GFile     *file;
	GPtrArray *results;
155
	GStrv      rdf_types;
156 157
	GCancellable *cancellable;
	guint notified : 1;
158 159
} ItemWritebackData;

160
typedef struct {
161
	GFile *file;
162 163
	gchar *urn;
	gchar *parent_urn;
164
	gint priority;
165
	GCancellable *cancellable;
166
	TrackerSparqlBuilder *builder;
167
	TrackerMiner *miner;
168
} UpdateProcessingTaskContext;
169

170 171
typedef struct {
	GMainLoop *main_loop;
172
	TrackerMiner *miner;
173
} ThumbnailMoveData;
174

175
struct _TrackerMinerFSPrivate {
176
	/* File queues for indexer */
177 178 179 180
	TrackerPriorityQueue *items_created;
	TrackerPriorityQueue *items_updated;
	TrackerPriorityQueue *items_deleted;
	TrackerPriorityQueue *items_moved;
181
	TrackerPriorityQueue *items_writeback;
182 183 184 185 186

	guint item_queues_handler_id;
	GFile *item_queue_blocker;
	GHashTable *items_ignore_next_update;

187
#ifdef EVENT_QUEUE_ENABLE_TRACE
188
	guint queue_status_timeout_id;
189 190
#endif /* EVENT_QUEUE_ENABLE_TRACE */

191 192 193
	/* Root / tree / index */
	GFile *root;
	TrackerIndexingTree *indexing_tree;
194
	TrackerFileNotifier *file_notifier;
195
	TrackerDataProvider *data_provider;
196

197
	/* Sparql insertion tasks */
198
	TrackerTaskPool *task_pool;
199 200
	TrackerSparqlBuffer *sparql_buffer;
	guint sparql_buffer_limit;
201

202 203
	/* File properties */
	GQuark quark_ignore_file;
204
	GQuark quark_recursive_removal;
205 206 207 208 209 210 211 212 213 214 215 216 217
	GQuark quark_attribute_updated;
	GQuark quark_directory_found_crawling;
	GQuark quark_reentry_counter;

	/* Properties */
	gdouble throttle;
	guint mtime_checking : 1;   /* TRUE if mtime checks should be done
	                             * during initial crawling. */
	guint initial_crawling : 1; /* TRUE if initial crawling should be
	                             * done */

	/* Writeback tasks */
	TrackerTaskPool *writeback_pool;
218

219 220
	TrackerThumbnailer *thumbnailer;

221
	/* Status */
222 223 224 225 226 227 228 229 230 231 232 233
	GTimer *timer;
	GTimer *extraction_timer;

	guint been_started : 1;     /* TRUE if miner has been started */
	guint been_crawled : 1;     /* TRUE if initial crawling has been
	                             * done */
	guint shown_totals : 1;     /* TRUE if totals have been shown */
	guint is_paused : 1;        /* TRUE if miner is paused */

	guint timer_stopped : 1;    /* TRUE if main timer is stopped */
	guint extraction_timer_stopped : 1; /* TRUE if the extraction
	                                     * timer is stopped */
234

235 236 237 238 239 240 241 242 243
	GHashTable *roots_to_notify;        /* Used to signal indexing
	                                     * trees finished */

	/*
	 * Statistics
	 */

	/* How many we found during crawling and how many were black
	 * listed (ignored). Reset to 0 when processing stops. */
244 245 246 247 248
	guint total_directories_found;
	guint total_directories_ignored;
	guint total_files_found;
	guint total_files_ignored;

249
	/* How many we indexed and how many had errors indexing. */
250 251 252
	guint total_files_processed;
	guint total_files_notified;
	guint total_files_notified_error;
253 254
};

255
typedef enum {
256 257 258 259
	QUEUE_NONE,
	QUEUE_CREATED,
	QUEUE_UPDATED,
	QUEUE_DELETED,
260
	QUEUE_MOVED,
261
	QUEUE_IGNORE_NEXT_UPDATE,
262 263
	QUEUE_WAIT,
	QUEUE_WRITEBACK
264
} QueueState;
265

266
enum {
267
	PROCESS_FILE,
268
	PROCESS_FILE_ATTRIBUTES,
269
	IGNORE_NEXT_UPDATE_FILE,
270
	FINISHED,
271
	WRITEBACK_FILE,
272
	FINISHED_ROOT,
273
	REMOVE_FILE,
274 275 276
	LAST_SIGNAL
};

277 278
enum {
	PROP_0,
279
	PROP_THROTTLE,
280
	PROP_ROOT,
281
	PROP_WAIT_POOL_LIMIT,
282
	PROP_READY_POOL_LIMIT,
283
	PROP_DATA_PROVIDER,
284 285
	PROP_MTIME_CHECKING,
	PROP_INITIAL_CRAWLING
286 287
};

288 289
static void           miner_fs_initable_iface_init        (GInitableIface       *iface);

290 291 292 293
static gboolean       miner_fs_remove_file                (TrackerMinerFS       *fs,
                                                           GFile                *file,
                                                           gboolean              children_only,
                                                           TrackerSparqlBuilder *builder);
294
static void           fs_finalize                         (GObject              *object);
295
static void           fs_constructed                      (GObject              *object);
296 297 298 299 300 301 302 303
static void           fs_set_property                     (GObject              *object,
                                                           guint                 prop_id,
                                                           const GValue         *value,
                                                           GParamSpec           *pspec);
static void           fs_get_property                     (GObject              *object,
                                                           guint                 prop_id,
                                                           GValue               *value,
                                                           GParamSpec           *pspec);
304

305 306 307 308 309 310 311 312 313
static void           miner_started                       (TrackerMiner         *miner);
static void           miner_stopped                       (TrackerMiner         *miner);
static void           miner_paused                        (TrackerMiner         *miner);
static void           miner_resumed                       (TrackerMiner         *miner);
static void           miner_ignore_next_update            (TrackerMiner         *miner,
                                                           const GStrv           subjects);
static ItemMovedData *item_moved_data_new                 (GFile                *file,
                                                           GFile                *source_file);
static void           item_moved_data_free                (ItemMovedData        *data);
314
static void           item_writeback_data_free            (ItemWritebackData    *data);
315

316
static void           indexing_tree_directory_removed     (TrackerIndexingTree  *indexing_tree,
317 318
                                                           GFile                *directory,
                                                           gpointer              user_data);
319
static void           file_notifier_file_created          (TrackerFileNotifier  *notifier,
320 321
                                                           GFile                *file,
                                                           gpointer              user_data);
322
static void           file_notifier_file_deleted          (TrackerFileNotifier  *notifier,
323 324
                                                           GFile                *file,
                                                           gpointer              user_data);
325
static void           file_notifier_file_updated          (TrackerFileNotifier  *notifier,
326
                                                           GFile                *file,
327
                                                           gboolean              attributes_only,
328
                                                           gpointer              user_data);
329 330 331 332
static void           file_notifier_file_moved            (TrackerFileNotifier  *notifier,
                                                           GFile                *source,
                                                           GFile                *dest,
                                                           gpointer              user_data);
333 334 335
static void           file_notifier_directory_started     (TrackerFileNotifier *notifier,
                                                           GFile               *directory,
                                                           gpointer             user_data);
336 337 338 339 340 341 342 343 344
static void           file_notifier_directory_finished    (TrackerFileNotifier *notifier,
                                                           GFile               *directory,
                                                           guint                directories_found,
                                                           guint                directories_ignored,
                                                           guint                files_found,
                                                           guint                files_ignored,
                                                           gpointer             user_data);
static void           file_notifier_finished              (TrackerFileNotifier *notifier,
                                                           gpointer             user_data);
345

346
static void           item_queue_handlers_set_up          (TrackerMinerFS       *fs);
347

348
static void           task_pool_cancel_foreach                (gpointer        data,
349
                                                               gpointer        user_data);
350 351 352
static void           task_pool_limit_reached_notify_cb       (GObject        *object,
                                                               GParamSpec     *pspec,
                                                               gpointer        user_data);
353 354
static void           writeback_pool_cancel_foreach           (gpointer        data,
                                                               gpointer        user_data);
355

356
static GQuark quark_file_iri = 0;
357
static GInitableIface* miner_fs_initable_parent_iface;
358 359
static guint signals[LAST_SIGNAL] = { 0, };

360 361 362 363 364 365 366 367 368 369
/**
 * tracker_miner_fs_error_quark:
 *
 * Gives the caller the #GQuark used to identify #TrackerMinerFS errors
 * in #GError structures. The #GQuark is used as the domain for the error.
 *
 * Returns: the #GQuark used for the domain of a #GError.
 *
 * Since: 1.2.
 **/
370 371
G_DEFINE_QUARK (TrackerMinerFSError, tracker_miner_fs_error)

372 373 374
G_DEFINE_ABSTRACT_TYPE_WITH_CODE (TrackerMinerFS, tracker_miner_fs, TRACKER_TYPE_MINER,
                                  G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
                                                         miner_fs_initable_iface_init));
375 376

static void
377
tracker_miner_fs_class_init (TrackerMinerFSClass *klass)
378 379
{
	GObjectClass *object_class = G_OBJECT_CLASS (klass);
Martyn Russell's avatar
Martyn Russell committed
380
	TrackerMinerClass *miner_class = TRACKER_MINER_CLASS (klass);
381

382 383
	klass->remove_file = miner_fs_remove_file;

384
	object_class->finalize = fs_finalize;
385
	object_class->constructed = fs_constructed;
386 387
	object_class->set_property = fs_set_property;
	object_class->get_property = fs_get_property;
388

Martyn Russell's avatar
Martyn Russell committed
389 390
	miner_class->started = miner_started;
	miner_class->stopped = miner_stopped;
391 392
	miner_class->paused  = miner_paused;
	miner_class->resumed = miner_resumed;
393
	miner_class->ignore_next_update = miner_ignore_next_update;
394

395
	g_object_class_install_property (object_class,
Martyn Russell's avatar
Martyn Russell committed
396 397 398 399 400 401
	                                 PROP_THROTTLE,
	                                 g_param_spec_double ("throttle",
	                                                      "Throttle",
	                                                      "Modifier for the indexing speed, 0 is max speed",
	                                                      0, 1, 0,
	                                                      G_PARAM_READWRITE));
402 403 404 405 406 407 408
	g_object_class_install_property (object_class,
	                                 PROP_ROOT,
	                                 g_param_spec_object ("root",
	                                                      "Root",
	                                                      "Top level URI for our indexing tree and file notify clases",
	                                                      G_TYPE_FILE,
	                                                      G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
409
	g_object_class_install_property (object_class,
410
	                                 PROP_WAIT_POOL_LIMIT,
411
	                                 g_param_spec_uint ("processing-pool-wait-limit",
412 413 414 415 416 417
	                                                    "Processing pool limit for WAIT tasks",
	                                                    "Maximum number of files that can be concurrently "
	                                                    "processed by the upper layer",
	                                                    1, G_MAXUINT, DEFAULT_WAIT_POOL_LIMIT,
	                                                    G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
	g_object_class_install_property (object_class,
418 419 420
	                                 PROP_READY_POOL_LIMIT,
	                                 g_param_spec_uint ("processing-pool-ready-limit",
	                                                    "Processing pool limit for READY tasks",
421 422
	                                                    "Maximum number of SPARQL updates that can be merged "
	                                                    "in a single connection to the store",
423
	                                                    1, G_MAXUINT, DEFAULT_READY_POOL_LIMIT,
Martyn Russell's avatar
Martyn Russell committed
424
	                                                    G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
425
	g_object_class_install_property (object_class,
426 427 428 429 430
	                                 PROP_DATA_PROVIDER,
	                                 g_param_spec_object ("data-provider",
	                                                      "Data provider",
	                                                      "Data provider populating data, e.g. like GFileEnumerator",
	                                                      TRACKER_TYPE_DATA_PROVIDER,
431
	                                                      G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
432
	g_object_class_install_property (object_class,
433
	                                 PROP_MTIME_CHECKING,
434 435 436 437 438 439 440 441 442 443 444 445 446
	                                 g_param_spec_boolean ("mtime-checking",
	                                                       "Mtime checking",
	                                                       "Whether to perform mtime checks during initial crawling or not",
	                                                       TRUE,
	                                                       G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
	g_object_class_install_property (object_class,
	                                 PROP_INITIAL_CRAWLING,
	                                 g_param_spec_boolean ("initial-crawling",
	                                                       "Initial crawling",
	                                                       "Whether to perform initial crawling or not",
	                                                       TRUE,
	                                                       G_PARAM_READWRITE));

447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
	/**
	 * TrackerMinerFS::process-file:
	 * @miner_fs: the #TrackerMinerFS
	 * @file: a #GFile
	 * @builder: a #TrackerSparqlBuilder
	 * @cancellable: a #GCancellable
	 *
	 * The ::process-file signal is emitted whenever a file should
	 * be processed, and it's metadata extracted.
	 *
	 * @builder is the #TrackerSparqlBuilder where all sparql updates
	 * to be performed for @file will be appended.
	 *
	 * This signal allows both synchronous and asynchronous extraction,
	 * in the synchronous case @cancellable can be safely ignored. In
	 * either case, on successful metadata extraction, implementations
Carlos Garnacho's avatar
Carlos Garnacho committed
463
	 * must call tracker_miner_fs_file_notify() to indicate that
464 465 466 467 468
	 * processing has finished on @file, so the miner can execute
	 * the SPARQL updates and continue processing other files.
	 *
	 * Returns: %TRUE if the file is accepted for processing,
	 *          %FALSE if the file should be ignored.
469 470
	 *
	 * Since: 0.8
471 472 473
	 **/
	signals[PROCESS_FILE] =
		g_signal_new ("process-file",
Martyn Russell's avatar
Martyn Russell committed
474 475 476 477
		              G_OBJECT_CLASS_TYPE (object_class),
		              G_SIGNAL_RUN_LAST,
		              G_STRUCT_OFFSET (TrackerMinerFSClass, process_file),
		              NULL, NULL,
Xavier Claessens's avatar
Xavier Claessens committed
478
		              NULL,
Martyn Russell's avatar
Martyn Russell committed
479
		              G_TYPE_BOOLEAN,
480
		              3, G_TYPE_FILE, TRACKER_SPARQL_TYPE_BUILDER, G_TYPE_CANCELLABLE);
481

482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
	/**
	 * TrackerMinerFS::process-file-attributes:
	 * @miner_fs: the #TrackerMinerFS
	 * @file: a #GFile
	 * @builder: a #TrackerSparqlBuilder
	 * @cancellable: a #GCancellable
	 *
	 * The ::process-file-attributes signal is emitted whenever a file should
	 * be processed, but only the attribute-related metadata extracted.
	 *
	 * @builder is the #TrackerSparqlBuilder where all sparql updates
	 * to be performed for @file will be appended. For the properties being
	 * updated, the DELETE statements should be included as well.
	 *
	 * This signal allows both synchronous and asynchronous extraction,
	 * in the synchronous case @cancellable can be safely ignored. In
	 * either case, on successful metadata extraction, implementations
	 * must call tracker_miner_fs_file_notify() to indicate that
	 * processing has finished on @file, so the miner can execute
	 * the SPARQL updates and continue processing other files.
	 *
	 * Returns: %TRUE if the file is accepted for processing,
	 *          %FALSE if the file should be ignored.
505 506
	 *
	 * Since: 0.10
507 508 509 510 511 512 513
	 **/
	signals[PROCESS_FILE_ATTRIBUTES] =
		g_signal_new ("process-file-attributes",
		              G_OBJECT_CLASS_TYPE (object_class),
		              G_SIGNAL_RUN_LAST,
		              G_STRUCT_OFFSET (TrackerMinerFSClass, process_file_attributes),
		              NULL, NULL,
Xavier Claessens's avatar
Xavier Claessens committed
514
		              NULL,
515 516 517
		              G_TYPE_BOOLEAN,
		              3, G_TYPE_FILE, TRACKER_SPARQL_TYPE_BUILDER, G_TYPE_CANCELLABLE);

518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
	/**
	 * TrackerMinerFS::ignore-next-update-file:
	 * @miner_fs: the #TrackerMinerFS
	 * @file: a #GFile
	 * @builder: a #TrackerSparqlBuilder
	 * @cancellable: a #GCancellable
	 *
	 * The ::ignore-next-update-file signal is emitted whenever a file should
	 * be marked as to ignore on next update, and it's metadata prepared for that.
	 *
	 * @builder is the #TrackerSparqlBuilder where all sparql updates
	 * to be performed for @file will be appended.
	 *
	 * Returns: %TRUE on success
	 *          %FALSE on failure
533 534
	 *
	 * Since: 0.8
535
	 *
536
	 * Deprecated: 0.12
537 538
	 **/
	signals[IGNORE_NEXT_UPDATE_FILE] =
539
		g_signal_new ("ignore-next-update-file",
540 541 542 543
		              G_OBJECT_CLASS_TYPE (object_class),
		              G_SIGNAL_RUN_LAST,
		              G_STRUCT_OFFSET (TrackerMinerFSClass, ignore_next_update_file),
		              NULL, NULL,
Xavier Claessens's avatar
Xavier Claessens committed
544
		              NULL,
545 546
		              G_TYPE_BOOLEAN,
		              3, G_TYPE_FILE, TRACKER_SPARQL_TYPE_BUILDER, G_TYPE_CANCELLABLE);
547 548 549 550 551 552 553 554 555 556 557 558

	/**
	 * TrackerMinerFS::finished:
	 * @miner_fs: the #TrackerMinerFS
	 * @elapsed: elapsed time since mining was started
	 * @directories_found: number of directories found
	 * @directories_ignored: number of ignored directories
	 * @files_found: number of files found
	 * @files_ignored: number of ignored files
	 *
	 * The ::finished signal is emitted when @miner_fs has finished
	 * all pending processing.
559 560
	 *
	 * Since: 0.8
561 562
	 **/
	signals[FINISHED] =
563
		g_signal_new ("finished",
Martyn Russell's avatar
Martyn Russell committed
564 565 566 567
		              G_TYPE_FROM_CLASS (object_class),
		              G_SIGNAL_RUN_LAST,
		              G_STRUCT_OFFSET (TrackerMinerFSClass, finished),
		              NULL, NULL,
Xavier Claessens's avatar
Xavier Claessens committed
568
		              NULL,
Martyn Russell's avatar
Martyn Russell committed
569 570 571 572 573 574 575
		              G_TYPE_NONE,
		              5,
		              G_TYPE_DOUBLE,
		              G_TYPE_UINT,
		              G_TYPE_UINT,
		              G_TYPE_UINT,
		              G_TYPE_UINT);
576

577 578 579 580
	/**
	 * TrackerMinerFS::writeback-file:
	 * @miner_fs: the #TrackerMinerFS
	 * @file: a #GFile
581
	 * @rdf_types: the set of RDF types
582
	 * @results: (element-type GStrv): a set of results prepared by the preparation query
583
	 * @cancellable: a #GCancellable
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
	 *
	 * The ::writeback-file signal is emitted whenever a file must be written
	 * back
	 *
	 * Returns: %TRUE on success, %FALSE otherwise
	 *
	 * Since: 0.10.20
	 **/
	signals[WRITEBACK_FILE] =
		g_signal_new ("writeback-file",
		              G_OBJECT_CLASS_TYPE (object_class),
		              G_SIGNAL_RUN_LAST,
		              G_STRUCT_OFFSET (TrackerMinerFSClass, writeback_file),
		              NULL,
		              NULL,
Xavier Claessens's avatar
Xavier Claessens committed
599
		              NULL,
600
		              G_TYPE_BOOLEAN,
601
		              4,
602
		              G_TYPE_FILE,
603
		              G_TYPE_STRV,
604 605
		              G_TYPE_PTR_ARRAY,
		              G_TYPE_CANCELLABLE);
606

607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
	/**
	 * TrackerMinerFS::finished-root:
	 * @miner_fs: the #TrackerMinerFS
	 * @file: a #GFile
	 *
	 * The ::finished-crawl signal is emitted when @miner_fs has
	 * finished finding all resources that need to be indexed
	 * with the root location of @file. At this point, it's likely
	 * many are still in the queue to be added to the database,
	 * but this gives some indication that a location is
	 * processed.
	 *
	 * Since: 1.2
	 **/
	signals[FINISHED_ROOT] =
		g_signal_new ("finished-root",
		              G_TYPE_FROM_CLASS (object_class),
		              G_SIGNAL_RUN_LAST,
		              G_STRUCT_OFFSET (TrackerMinerFSClass, finished_root),
		              NULL, NULL,
		              NULL,
		              G_TYPE_NONE,
		              1,
		              G_TYPE_FILE);

632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674
	/**
	 * TrackerMinerFS::remove-file:
	 * @miner_fs: the #TrackerMinerFS
	 * @file: a #GFile
	 * @children_only: #TRUE if only the children of @file are to be deleted
	 * @builder: a #TrackerSparqlBuilder
	 *
	 * The ::remove-file signal will be emitted on files that need removal
	 * according to the miner configuration (either the files themselves are
	 * deleted, or the directory/contents no longer need inspection according
	 * to miner configuration and their location.
	 *
	 * This operation is always assumed to be recursive, the @children_only
	 * argument will be %TRUE if for any reason the topmost directory needs
	 * to stay (e.g. moved from a recursively indexed directory tree to a
	 * non-recursively indexed location).
	 *
	 * The @builder argument can be used to provide additional SPARQL
	 * deletes and updates necessary around the deletion of those items. If
	 * the return value of this signal is %TRUE, @builder is expected to
	 * contain all relevant deletes for this operation.
	 *
	 * If the return value of this signal is %FALSE, the miner will apply
	 * its default behavior, which is deleting all triples that correspond
	 * to the affected URIs.
	 *
	 * Returns: %TRUE if @builder contains all the necessary operations to
	 *          delete the affected resources, %FALSE to let the miner
	 *          implicitly handle the deletion.
	 *
	 * Since: 1.8
	 **/
	signals[REMOVE_FILE] =
		g_signal_new ("remove-file",
		              G_TYPE_FROM_CLASS (object_class),
		              G_SIGNAL_RUN_LAST,
		              G_STRUCT_OFFSET (TrackerMinerFSClass, remove_file),
		              NULL, NULL, NULL,
		              G_TYPE_BOOLEAN,
		              3,
		              G_TYPE_FILE, G_TYPE_BOOLEAN,
		              TRACKER_SPARQL_TYPE_BUILDER);

675
	g_type_class_add_private (object_class, sizeof (TrackerMinerFSPrivate));
676 677

	quark_file_iri = g_quark_from_static_string ("tracker-miner-file-iri");
678 679 680
}

static void
681
tracker_miner_fs_init (TrackerMinerFS *object)
682
{
683
	TrackerMinerFSPrivate *priv;
684

685
	object->priv = TRACKER_MINER_FS_GET_PRIVATE (object);
686

687
	priv = object->priv;
688

689 690 691
	priv->timer = g_timer_new ();
	priv->extraction_timer = g_timer_new ();

692 693 694
	g_timer_stop (priv->timer);
	g_timer_stop (priv->extraction_timer);

695 696 697
	priv->timer_stopped = TRUE;
	priv->extraction_timer_stopped = TRUE;

698 699 700 701
	priv->items_created = tracker_priority_queue_new ();
	priv->items_updated = tracker_priority_queue_new ();
	priv->items_deleted = tracker_priority_queue_new ();
	priv->items_moved = tracker_priority_queue_new ();
702
	priv->items_writeback = tracker_priority_queue_new ();
703 704 705 706 707 708 709

#ifdef EVENT_QUEUE_ENABLE_TRACE
	priv->queue_status_timeout_id = g_timeout_add_seconds (EVENT_QUEUE_STATUS_TIMEOUT_SECS,
	                                                       miner_fs_queues_status_trace_timeout_cb,
	                                                       object);
#endif /* PROCESSING_POOL_ENABLE_TRACE */

710 711 712
	priv->items_ignore_next_update = g_hash_table_new_full (g_str_hash, g_str_equal,
	                                                        (GDestroyNotify) g_free,
	                                                        (GDestroyNotify) NULL);
713

714 715
	/* Create processing pools */
	priv->task_pool = tracker_task_pool_new (DEFAULT_WAIT_POOL_LIMIT);
716 717 718
	g_signal_connect (priv->task_pool, "notify::limit-reached",
	                  G_CALLBACK (task_pool_limit_reached_notify_cb), object);

719
	priv->writeback_pool = tracker_task_pool_new (DEFAULT_WAIT_POOL_LIMIT);
720 721
	g_signal_connect (priv->writeback_pool, "notify::limit-reached",
	                  G_CALLBACK (task_pool_limit_reached_notify_cb), object);
722

723
	priv->quark_ignore_file = g_quark_from_static_string ("tracker-ignore-file");
724
	priv->quark_recursive_removal = g_quark_from_static_string ("tracker-recursive-removal");
725
	priv->quark_directory_found_crawling = g_quark_from_static_string ("tracker-directory-found-crawling");
726
	priv->quark_attribute_updated = g_quark_from_static_string ("tracker-attribute-updated");
727
	priv->quark_reentry_counter = g_quark_from_static_string ("tracker-reentry-counter");
728

729 730
	priv->mtime_checking = TRUE;
	priv->initial_crawling = TRUE;
731 732 733 734 735

	priv->roots_to_notify = g_hash_table_new_full (g_file_hash,
	                                               (GEqualFunc) g_file_equal,
	                                               g_object_unref,
	                                               NULL);
736 737
}

738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
static gboolean
miner_fs_initable_init (GInitable     *initable,
                        GCancellable  *cancellable,
                        GError       **error)
{
	TrackerMinerFSPrivate *priv;
	guint limit;

	if (!miner_fs_initable_parent_iface->init (initable, cancellable, error)) {
		return FALSE;
	}

	priv = TRACKER_MINER_FS_GET_PRIVATE (initable);

	g_object_get (initable, "processing-pool-ready-limit", &limit, NULL);
	priv->sparql_buffer = tracker_sparql_buffer_new (tracker_miner_get_connection (TRACKER_MINER (initable)),
	                                                 limit);
755 756

	if (!priv->sparql_buffer) {
757 758 759 760
		g_set_error (error,
		             tracker_miner_fs_error_quark (),
		             TRACKER_MINER_FS_ERROR_INIT,
		             "Could not create TrackerSparqlBuffer needed to process resources");
761 762 763
		return FALSE;
	}

764 765 766 767
	g_signal_connect (priv->sparql_buffer, "notify::limit-reached",
	                  G_CALLBACK (task_pool_limit_reached_notify_cb),
	                  initable);

768
	if (!priv->indexing_tree) {
769 770 771 772
		g_set_error (error,
		             tracker_miner_fs_error_quark (),
		             TRACKER_MINER_FS_ERROR_INIT,
		             "Could not create TrackerIndexingTree needed to manage content indexed");
773 774 775 776 777 778 779 780 781
		return FALSE;
	}

	g_signal_connect (priv->indexing_tree, "directory-removed",
	                  G_CALLBACK (indexing_tree_directory_removed),
	                  initable);

	/* Create the file notifier */
	priv->file_notifier = tracker_file_notifier_new (priv->indexing_tree,
782
	                                                 priv->data_provider);
783 784

	if (!priv->file_notifier) {
785 786 787 788
		g_set_error (error,
		             tracker_miner_fs_error_quark (),
		             TRACKER_MINER_FS_ERROR_INIT,
		             "Could not create TrackerFileNotifier needed to signal new resources to be indexed");
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
		return FALSE;
	}

	g_signal_connect (priv->file_notifier, "file-created",
	                  G_CALLBACK (file_notifier_file_created),
	                  initable);
	g_signal_connect (priv->file_notifier, "file-updated",
	                  G_CALLBACK (file_notifier_file_updated),
	                  initable);
	g_signal_connect (priv->file_notifier, "file-deleted",
	                  G_CALLBACK (file_notifier_file_deleted),
	                  initable);
	g_signal_connect (priv->file_notifier, "file-moved",
	                  G_CALLBACK (file_notifier_file_moved),
	                  initable);
	g_signal_connect (priv->file_notifier, "directory-started",
	                  G_CALLBACK (file_notifier_directory_started),
	                  initable);
	g_signal_connect (priv->file_notifier, "directory-finished",
	                  G_CALLBACK (file_notifier_directory_finished),
	                  initable);
	g_signal_connect (priv->file_notifier, "finished",
	                  G_CALLBACK (file_notifier_finished),
	                  initable);

814 815
	priv->thumbnailer = tracker_thumbnailer_new ();

816 817 818 819 820 821 822 823 824 825
	return TRUE;
}

static void
miner_fs_initable_iface_init (GInitableIface *iface)
{
	miner_fs_initable_parent_iface = g_type_interface_peek_parent (iface);
	iface->init = miner_fs_initable_init;
}

826 827 828 829 830 831 832 833 834
static gboolean
miner_fs_remove_file (TrackerMinerFS *fs,
                      GFile                *file,
                      gboolean              children_only,
                      TrackerSparqlBuilder *builder)
{
	return FALSE;
}

835
static void
836
fs_finalize (GObject *object)
837
{
838
	TrackerMinerFSPrivate *priv;
839

840
	priv = TRACKER_MINER_FS_GET_PRIVATE (object);
841

842 843
	g_timer_destroy (priv->timer);
	g_timer_destroy (priv->extraction_timer);
844

845 846 847
	if (priv->item_queues_handler_id) {
		g_source_remove (priv->item_queues_handler_id);
		priv->item_queues_handler_id = 0;
848
	}
849

850 851
	if (priv->item_queue_blocker) {
		g_object_unref (priv->item_queue_blocker);
852 853
	}

854 855 856
	if (priv->file_notifier) {
		tracker_file_notifier_stop (priv->file_notifier);
	}
857

858
	/* Cancel every pending task */
859 860 861 862 863
	tracker_task_pool_foreach (priv->task_pool,
	                           task_pool_cancel_foreach,
	                           NULL);
	g_object_unref (priv->task_pool);

864 865
	g_object_unref (priv->writeback_pool);

866 867 868
	if (priv->sparql_buffer) {
		g_object_unref (priv->sparql_buffer);
	}
869

870 871 872 873
	tracker_priority_queue_foreach (priv->items_moved,
	                                (GFunc) item_moved_data_free,
	                                NULL);
	tracker_priority_queue_unref (priv->items_moved);
874

875 876 877 878
	tracker_priority_queue_foreach (priv->items_deleted,
	                                (GFunc) g_object_unref,
	                                NULL);
	tracker_priority_queue_unref (priv->items_deleted);
879

880 881 882 883
	tracker_priority_queue_foreach (priv->items_updated,
	                                (GFunc) g_object_unref,
	                                NULL);
	tracker_priority_queue_unref (priv->items_updated);
884

885 886 887 888
	tracker_priority_queue_foreach (priv->items_created,
	                                (GFunc) g_object_unref,
	                                NULL);
	tracker_priority_queue_unref (priv->items_created);
889

890
	tracker_priority_queue_foreach (priv->items_writeback,
891 892
	                                (GFunc) item_writeback_data_free,
	                                NULL);
893 894
	tracker_priority_queue_unref (priv->items_writeback);

895
	g_hash_table_unref (priv->items_ignore_next_update);
896

897 898 899 900 901 902 903
	if (priv->indexing_tree) {
		g_object_unref (priv->indexing_tree);
	}

	if (priv->file_notifier) {
		g_object_unref (priv->file_notifier);
	}
904

905
	if (priv->thumbnailer) {
906
		g_object_unref (priv->thumbnailer);
907 908 909 910
	}

	if (priv->roots_to_notify) {
		g_hash_table_unref (priv->roots_to_notify);
911 912 913

		/* Just in case we end up using this AFTER finalize, not expected */
		priv->roots_to_notify = NULL;
914
	}
915

916 917 918 919 920
#ifdef EVENT_QUEUE_ENABLE_TRACE
	if (priv->queue_status_timeout_id)
		g_source_remove (priv->queue_status_timeout_id);
#endif /* PROCESSING_POOL_ENABLE_TRACE */

921
	G_OBJECT_CLASS (tracker_miner_fs_parent_class)->finalize (object);
922 923
}

924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949
static void
fs_constructed (GObject *object)
{
	TrackerMinerFSPrivate *priv;

	/* NOTE: We have to do this in this order because initables
	 * are called _AFTER_ constructed and for subclasses that are
	 * not initables we don't have any other way than to chain
	 * constructed and root/indexing tree must exist at that
	 * point.
	 *
	 * If priv->indexing_tree is NULL after this function, the
	 * initiable functions will fail and this class will not be
	 * created anyway.
	 */
	G_OBJECT_CLASS (tracker_miner_fs_parent_class)->constructed (object);

	priv = TRACKER_MINER_FS_GET_PRIVATE (object);

	/* Create root if one didn't exist */
	if (priv->root == NULL) {
		/* We default to file:/// */
		priv->root = g_file_new_for_uri ("file:///");
	}

	/* Create indexing tree */
950
	priv->indexing_tree = tracker_indexing_tree_new_with_root (priv->root);
951 952
}

953 954
static void
fs_set_property (GObject      *object,
Martyn Russell's avatar
Martyn Russell committed
955 956 957
                 guint         prop_id,
                 const GValue *value,
                 GParamSpec   *pspec)
958
{
959 960
	TrackerMinerFS *fs = TRACKER_MINER_FS (object);

961 962 963
	switch (prop_id) {
	case PROP_THROTTLE:
		tracker_miner_fs_set_throttle (TRACKER_MINER_FS (object),
Martyn Russell's avatar
Martyn Russell committed
964
		                               g_value_get_double (value));
965
		break;
966 967 968 969
	case PROP_ROOT:
		/* We expect this to only occur once, on object construct */
		fs->priv->root = g_value_dup_object (value);
		break;
970
	case PROP_WAIT_POOL_LIMIT:
971 972
		tracker_task_pool_set_limit (fs->priv->task_pool,
		                             g_value_get_uint (value));
973
		break;
974
	case PROP_READY_POOL_LIMIT:
975 976 977 978 979 980
		fs->priv->sparql_buffer_limit = g_value_get_uint (value);

		if (fs->priv->sparql_buffer) {
			tracker_task_pool_set_limit (TRACKER_TASK_POOL (fs->priv->sparql_buffer),
			                             fs->priv->sparql_buffer_limit);
		}
981
		break;
982 983
	case PROP_DATA_PROVIDER:
		fs->priv->data_provider = g_value_dup_object (value);
984
		break;
985
	case PROP_MTIME_CHECKING:
986
		fs->priv->mtime_checking = g_value_get_boolean (value);
987
		break;
988
	case PROP_INITIAL_CRAWLING:
989
		fs->priv->initial_crawling = g_value_get_boolean (value);
990
		break;
991 992 993 994 995 996 997 998
	default:
		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
		break;
	}
}

static void
fs_get_property (GObject    *object,
Martyn Russell's avatar
Martyn Russell committed
999 1000 1001
                 guint       prop_id,
                 GValue     *value,
                 GParamSpec *pspec)
1002 1003 1004 1005 1006 1007 1008
{
	TrackerMinerFS *fs;

	fs = TRACKER_MINER_FS (object);

	switch (prop_id) {
	case PROP_THROTTLE:
1009
		g_value_set_double (value, fs->priv->throttle);
1010
		break;
1011 1012 1013
	case PROP_ROOT:
		g_value_set_object (value, fs->priv->root);
		break;
1014
	case PROP_WAIT_POOL_LIMIT:
1015
		g_value_set_uint (value, tracker_task_pool_get_limit (fs->priv->task_pool));
1016
		break;
1017
	case PROP_READY_POOL_LIMIT:
1018
		g_value_set_uint (value, fs->priv->sparql_buffer_limit);
1019
		break;
1020
	case PROP_MTIME_CHECKING:
1021
		g_value_set_boolean (value, fs->priv->mtime_checking);
1022
		break;
1023 1024
	case PROP_DATA_PROVIDER:
		g_value_set_object (value, fs->priv->data_provider);
1025
		break;
1026
	case PROP_INITIAL_CRAWLING:
1027
		g_value_set_boolean (value, fs->priv->initial_crawling);
1028
		break;
1029 1030 1031 1032 1033 1034
	default:
		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
		break;
	}
}

1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
static void
task_pool_limit_reached_notify_cb (GObject    *object,
				   GParamSpec *pspec,
				   gpointer    user_data)
{
	if (!tracker_task_pool_limit_reached (TRACKER_TASK_POOL (object))) {
		item_queue_handlers_set_up (TRACKER_MINER_FS (user_data));
	}
}

1045 1046 1047
static void
miner_started (TrackerMiner *miner)
{
1048
	TrackerMinerFS *fs;
1049

1050
	fs = TRACKER_MINER_FS (miner);
1051

1052
	fs->priv->been_started = TRUE;
1053

1054
	g_info ("Initializing");
1055

1056
	g_object_set (miner,
Martyn Russell's avatar
Martyn Russell committed
1057
	              "progress", 0.0,
1058
	              "status", "Initializing",
1059
	              "remaining-time", 0,
Martyn Russell's avatar
Martyn Russell committed
1060
	              NULL);
1061

1062