Commit c5d5e291 authored by Allison Karlitskaya's avatar Allison Karlitskaya

inotify: implement "boredom" algorithm

Use the "interesting" value from g_file_monitor_source_handle_event() to
decide if we're currently being flooded by a stream of boring events.

The main case here is when one or more files is being written to and the
change events are all being rate-limited in the GFileMonitor frontends.

In that case, we become "bored" with the event stream and add a backoff
timeout.  In the case that it is exactly one large file being written
(which is the common case) then leaving the event in the queue also lets
the kernel perform merging on it, so when we wake up, we will only see
the one event.  Even in the case that the kernel is unable to perform
merging, the context switch overhead will be vastly reduced.

In testing, this cuts down on the number of wake ups during a large file
copy, by a couple orders of magnitude (ie: less than 1% of the number of
wake ups).
parent 9adf948a
......@@ -39,9 +39,9 @@
static gboolean ih_debug_enabled = FALSE;
#define IH_W if (ih_debug_enabled) g_warning
static void ih_event_callback (ik_event_t *event,
inotify_sub *sub,
gboolean file_event);
static gboolean ih_event_callback (ik_event_t *event,
inotify_sub *sub,
gboolean file_event);
static void ih_not_missing_callback (inotify_sub *sub);
/* We share this lock with inotify-kernel.c and inotify-missing.c
......@@ -151,11 +151,13 @@ _ih_fullpath_from_event (ik_event_t *event,
return fullpath;
}
static void
static gboolean
ih_event_callback (ik_event_t *event,
inotify_sub *sub,
gboolean file_event)
{
gboolean interesting;
g_assert (!file_event); /* XXX hardlink support */
if (event->mask & IN_MOVE)
......@@ -166,8 +168,8 @@ ih_event_callback (ik_event_t *event,
if (event->pair && event->pair->wd == event->wd)
{
/* this is a rename */
g_file_monitor_source_handle_event (sub->user_data, G_FILE_MONITOR_EVENT_RENAMED,
event->name, event->pair->name, NULL, event->timestamp);
interesting = g_file_monitor_source_handle_event (sub->user_data, G_FILE_MONITOR_EVENT_RENAMED,
event->name, event->pair->name, NULL, event->timestamp);
}
else
{
......@@ -187,8 +189,8 @@ ih_event_callback (ik_event_t *event,
other = NULL;
/* this is either an incoming or outgoing move */
g_file_monitor_source_handle_event (sub->user_data, ih_mask_to_EventFlags (event->mask),
event->name, NULL, other, event->timestamp);
interesting = g_file_monitor_source_handle_event (sub->user_data, ih_mask_to_EventFlags (event->mask),
event->name, NULL, other, event->timestamp);
if (other)
g_object_unref (other);
......@@ -196,8 +198,8 @@ ih_event_callback (ik_event_t *event,
}
else
/* unpaired event -- no 'other' field */
g_file_monitor_source_handle_event (sub->user_data, ih_mask_to_EventFlags (event->mask),
event->name, NULL, NULL, event->timestamp);
interesting = g_file_monitor_source_handle_event (sub->user_data, ih_mask_to_EventFlags (event->mask),
event->name, NULL, NULL, event->timestamp);
if (event->mask & IN_CREATE)
{
......@@ -230,6 +232,8 @@ ih_event_callback (ik_event_t *event,
g_file_monitor_source_handle_event (sub->user_data, G_FILE_MONITOR_EVENT_CHANGES_DONE_HINT,
event->name, NULL, NULL, event->timestamp);
}
return interesting;
}
static void
......
......@@ -35,6 +35,12 @@
#include "glib-private.h"
/* From inotify(7) */
#define MAX_EVENT_SIZE (sizeof(struct inotify_event) + NAME_MAX + 1)
/* Amount of time to sleep on receipt of uninteresting events */
#define BOREDOM_SLEEP_TIME (100 * G_TIME_SPAN_MILLISECOND)
/* Define limits on the maximum amount of time and maximum amount of
* interceding events between FROM/TO that can be merged.
*/
......@@ -83,17 +89,20 @@ _ik_event_free (ik_event_t *event)
typedef struct
{
GSource source;
GSource source;
GQueue queue;
gpointer fd_tag;
gint fd;
gpointer fd_tag;
GQueue queue;
gint fd;
GHashTable *unmatched_moves;
gboolean is_bored;
} InotifyKernelSource;
static InotifyKernelSource *inotify_source;
static gint64
ik_source_get_ready_time (InotifyKernelSource *iks)
ik_source_get_dispatch_time (InotifyKernelSource *iks)
{
ik_event_t *head;
......@@ -119,42 +128,84 @@ static gboolean
ik_source_can_dispatch_now (InotifyKernelSource *iks,
gint64 now)
{
gint64 ready_time;
gint64 dispatch_time;
ready_time = ik_source_get_ready_time (iks);
dispatch_time = ik_source_get_dispatch_time (iks);
return 0 <= ready_time && ready_time <= now;
return 0 <= dispatch_time && dispatch_time <= now;
}
static void
ik_source_try_to_pair_head (InotifyKernelSource *iks)
static gsize
ik_source_read_some_events (InotifyKernelSource *iks,
gchar *buffer,
gsize buffer_len)
{
ik_event_t *head;
GList *node;
gssize result;
again:
result = read (iks->fd, buffer, buffer_len);
if (result < 0)
{
if (errno == EINTR)
goto again;
if (errno == EAGAIN)
return 0;
node = g_queue_peek_head_link (&iks->queue);
g_error ("inotify read(): %s", g_strerror (errno));
}
else if (result == 0)
g_error ("inotify unexpectedly hit eof");
if (!node)
return;
return result;
}
head = node->data;
static gchar *
ik_source_read_all_the_events (InotifyKernelSource *iks,
gchar *buffer,
gsize buffer_len,
gsize *length_out)
{
gsize n_read;
/* we should only be here if... */
g_assert (head->mask & IN_MOVED_FROM && !head->pair);
n_read = ik_source_read_some_events (iks, buffer, buffer_len);
while ((node = node->next))
/* Check if we might have gotten another event if we had passed in a
* bigger buffer...
*/
if (n_read + MAX_EVENT_SIZE > buffer_len)
{
ik_event_t *candidate = node->data;
gchar *new_buffer;
guint n_readable;
gint result;
/* figure out how many more bytes there are to read */
result = ioctl (iks->fd, FIONREAD, &n_readable);
if (result != 0)
g_error ("inotify ioctl(FIONREAD): %s", g_strerror (errno));
if (candidate->cookie == head->cookie && candidate->mask & IN_MOVED_TO)
if (n_readable != 0)
{
g_queue_remove (&iks->queue, candidate);
candidate->is_second_in_pair = TRUE;
head->pair = candidate;
candidate->pair = head;
return;
/* there is in fact more data. allocate a new buffer, copy
* the existing data, and then append the remaining.
*/
new_buffer = g_malloc (n_read + n_readable);
memcpy (new_buffer, buffer, n_read);
n_read += ik_source_read_some_events (iks, new_buffer + n_read, n_readable);
buffer = new_buffer;
/* There may be new events in the buffer that were added after
* the FIONREAD was performed, but we can't risk getting into
* a loop. We'll get them next time.
*/
}
}
*length_out = n_read;
return buffer;
}
static gboolean
......@@ -163,60 +214,155 @@ ik_source_dispatch (GSource *source,
gpointer user_data)
{
InotifyKernelSource *iks = (InotifyKernelSource *) source;
void (*user_callback) (ik_event_t *event) = (void *) func;
gint64 now = g_source_get_time (source);
gboolean (*user_callback) (ik_event_t *event) = (void *) func;
gboolean interesting = FALSE;
gint64 now;
now = g_source_get_time (source);
/* Only try to fill the queue if we don't already have work to do. */
if (!ik_source_can_dispatch_now (iks, now) &&
g_source_query_unix_fd (source, iks->fd_tag))
if (iks->is_bored || g_source_query_unix_fd (source, iks->fd_tag))
{
gchar buffer[sizeof(struct inotify_event) + NAME_MAX + 1];
gssize result;
gssize offset;
result = read (iks->fd, buffer, sizeof buffer);
if (result < 0)
g_error ("inotify error: %s\n", g_strerror (errno));
else if (result == 0)
g_error ("inotify unexpectedly hit eof");
gchar stack_buffer[4096];
gsize buffer_len;
gchar *buffer;
gsize offset;
/* We want to read all of the available events.
*
* We need to do it in a finite number of steps so that we don't
* get caught in a loop of read() with another process
* continuously adding events each time we drain them.
*
* In the normal case we will have only a few events in the queue,
* so start out by reading into a small stack-allocated buffer.
* Even though we're on a fresh stack frame, there is no need to
* pointlessly blow up with the size of the worker thread stack
* with a huge buffer here.
*
* If the result is large enough to cause us to suspect that
* another event may be pending then we allocate a buffer on the
* heap that can hold all of the events and read (once!) into that
* buffer.
*/
buffer = ik_source_read_all_the_events (iks, stack_buffer, sizeof stack_buffer, &buffer_len);
offset = 0;
while (offset < result)
while (offset < buffer_len)
{
struct inotify_event *event = (struct inotify_event *) (buffer + offset);
struct inotify_event *kevent = (struct inotify_event *) (buffer + offset);
ik_event_t *event;
g_queue_push_tail (&iks->queue, ik_event_new (event, now));
event = ik_event_new (kevent, now);
offset += sizeof (struct inotify_event) + event->len;
if (event->mask & IN_MOVED_TO)
{
ik_event_t *pair;
pair = g_hash_table_lookup (iks->unmatched_moves, GUINT_TO_POINTER (event->cookie));
if (pair != NULL)
{
g_assert (!pair->pair);
g_hash_table_remove (iks->unmatched_moves, GUINT_TO_POINTER (event->cookie));
event->is_second_in_pair = TRUE;
event->pair = pair;
pair->pair = event;
continue;
}
interesting = TRUE;
}
else if (event->mask & IN_MOVED_FROM)
{
gboolean new;
new = g_hash_table_insert (iks->unmatched_moves, GUINT_TO_POINTER (event->cookie), event);
if G_UNLIKELY (!new)
g_warning ("inotify: got IN_MOVED_FROM event with already-pending cookie %#x", event->cookie);
interesting = TRUE;
}
g_queue_push_tail (&iks->queue, event);
}
if (buffer_len == 0)
{
/* We can end up reading nothing if we arrived here due to a
* boredom timer but the stream of events stopped meanwhile.
*
* In that case, we need to switch back to polling the file
* descriptor in the usual way.
*/
g_assert (iks->is_bored);
interesting = TRUE;
}
}
if (!ik_source_can_dispatch_now (iks, now))
ik_source_try_to_pair_head (iks);
if (buffer != stack_buffer)
g_free (buffer);
}
if (ik_source_can_dispatch_now (iks, now))
while (ik_source_can_dispatch_now (iks, now))
{
ik_event_t *event;
/* callback will free the event */
event = g_queue_pop_head (&iks->queue);
if (event->mask & IN_MOVED_TO && !event->pair)
g_hash_table_remove (iks->unmatched_moves, GUINT_TO_POINTER (event->cookie));
G_LOCK (inotify_lock);
(* user_callback) (event);
interesting |= (* user_callback) (event);
G_UNLOCK (inotify_lock);
}
g_source_set_ready_time (source, ik_source_get_ready_time (iks));
/* The queue gets blocked iff we have unmatched moves */
g_assert ((iks->queue.length > 0) == (g_hash_table_size (iks->unmatched_moves) > 0));
/* Here's where we decide what will wake us up next.
*
* If the last event was interesting then we will wake up on the fd or
* when the timeout is reached on an unpaired move (if any).
*
* If the last event was uninteresting then we will wake up after the
* shorter of the boredom sleep or any timeout for a unpaired move.
*/
if (interesting)
{
if (iks->is_bored)
{
g_source_modify_unix_fd (source, iks->fd_tag, G_IO_IN);
iks->is_bored = FALSE;
}
g_source_set_ready_time (source, ik_source_get_dispatch_time (iks));
}
else
{
guint64 dispatch_time = ik_source_get_dispatch_time (iks);
guint64 boredom_time = now + BOREDOM_SLEEP_TIME;
if (!iks->is_bored)
{
g_source_modify_unix_fd (source, iks->fd_tag, 0);
iks->is_bored = TRUE;
}
g_source_set_ready_time (source, MIN (dispatch_time, boredom_time));
}
return TRUE;
}
static InotifyKernelSource *
ik_source_new (void (* callback) (ik_event_t *event))
ik_source_new (gboolean (* callback) (ik_event_t *event))
{
static GSourceFuncs source_funcs = {
NULL, NULL,
......@@ -231,13 +377,21 @@ ik_source_new (void (* callback) (ik_event_t *event))
g_source_set_name (source, "inotify kernel source");
iks->unmatched_moves = g_hash_table_new (NULL, NULL);
iks->fd = inotify_init1 (IN_CLOEXEC);
if (iks->fd < 0)
iks->fd = inotify_init ();
if (iks->fd >= 0)
iks->fd_tag = g_source_add_unix_fd (source, iks->fd, G_IO_IN);
{
GError *error = NULL;
g_unix_set_fd_nonblocking (iks->fd, TRUE, &error);
g_assert_no_error (error);
iks->fd_tag = g_source_add_unix_fd (source, iks->fd, G_IO_IN);
}
g_source_set_callback (source, (GSourceFunc) callback, NULL, NULL);
......@@ -247,7 +401,7 @@ ik_source_new (void (* callback) (ik_event_t *event))
}
gboolean
_ik_startup (void (*cb)(ik_event_t *event))
_ik_startup (gboolean (*cb)(ik_event_t *event))
{
if (g_once_init_enter (&inotify_source))
g_once_init_leave (&inotify_source, ik_source_new (cb));
......
......@@ -40,7 +40,7 @@ typedef struct ik_event_s {
gint64 timestamp; /* monotonic time that this was created */
} ik_event_t;
gboolean _ik_startup (void (*cb) (ik_event_t *event));
gboolean _ik_startup (gboolean (*cb) (ik_event_t *event));
ik_event_t *_ik_event_new_dummy (const char *name,
gint32 wd,
......
......@@ -100,13 +100,13 @@ static GHashTable * wd_file_hash = NULL;
static ip_watched_dir_t *ip_watched_dir_new (const char *path,
int wd);
static void ip_watched_dir_free (ip_watched_dir_t *dir);
static void ip_event_callback (ik_event_t *event);
static gboolean ip_event_callback (ik_event_t *event);
static void (*event_callback)(ik_event_t *event, inotify_sub *sub, gboolean file_event);
static gboolean (*event_callback)(ik_event_t *event, inotify_sub *sub, gboolean file_event);
gboolean
_ip_startup (void (*cb)(ik_event_t *event, inotify_sub *sub, gboolean file_event))
_ip_startup (gboolean (*cb)(ik_event_t *event, inotify_sub *sub, gboolean file_event))
{
static gboolean initialized = FALSE;
static gboolean result = FALSE;
......@@ -436,15 +436,17 @@ ip_wd_delete (gpointer data,
ip_watched_dir_free (dir);
}
static void
static gboolean
ip_event_dispatch (GList *dir_list,
GList *file_list,
ik_event_t *event)
{
gboolean interesting = FALSE;
GList *l;
if (!event)
return;
return FALSE;
for (l = dir_list; l; l = l->next)
{
......@@ -487,7 +489,7 @@ ip_event_dispatch (GList *dir_list,
* the filename doesn't match
*/
event_callback (event, sub, FALSE);
interesting |= event_callback (event, sub, FALSE);
if (sub->hardlinks)
{
......@@ -516,14 +518,17 @@ ip_event_dispatch (GList *dir_list,
{
inotify_sub *sub = subl->data;
event_callback (event, sub, TRUE);
interesting |= event_callback (event, sub, TRUE);
}
}
return interesting;
}
static void
static gboolean
ip_event_callback (ik_event_t *event)
{
gboolean interesting = FALSE;
GList* dir_list = NULL;
GList *file_list = NULL;
......@@ -531,14 +536,14 @@ ip_event_callback (ik_event_t *event)
if (event->mask & IN_IGNORED)
{
_ik_event_free (event);
return;
return TRUE;
}
dir_list = g_hash_table_lookup (wd_dir_hash, GINT_TO_POINTER (event->wd));
file_list = g_hash_table_lookup (wd_file_hash, GINT_TO_POINTER (event->wd));
if (event->mask & IP_INOTIFY_DIR_MASK)
ip_event_dispatch (dir_list, file_list, event);
interesting |= ip_event_dispatch (dir_list, file_list, event);
/* Only deliver paired events if the wds are separate */
if (event->pair && event->pair->wd != event->wd)
......@@ -547,7 +552,7 @@ ip_event_callback (ik_event_t *event)
file_list = g_hash_table_lookup (wd_file_hash, GINT_TO_POINTER (event->pair->wd));
if (event->pair->mask & IP_INOTIFY_DIR_MASK)
ip_event_dispatch (dir_list, file_list, event->pair);
interesting |= ip_event_dispatch (dir_list, file_list, event->pair);
}
/* We have to manage the missing list
......@@ -565,6 +570,8 @@ ip_event_callback (ik_event_t *event)
}
_ik_event_free (event);
return interesting;
}
const char *
......
......@@ -25,7 +25,7 @@
#include "inotify-kernel.h"
#include "inotify-sub.h"
gboolean _ip_startup (void (*event_cb)(ik_event_t *event, inotify_sub *sub, gboolean file_event));
gboolean _ip_startup (gboolean (*event_cb)(ik_event_t *event, inotify_sub *sub, gboolean file_event));
gboolean _ip_start_watching (inotify_sub *sub);
gboolean _ip_stop_watching (inotify_sub *sub);
const char * _ip_get_path_for_wd (gint32 wd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment