Commit 1929fd51 authored by Martyn Russell's avatar Martyn Russell

tracker-extract: Some code clean ups from review

parent 31442763
......@@ -29,7 +29,9 @@ tracker_coalesce
tracker_coalesce_strip
tracker_merge
tracker_merge_const
tracker_getline
tracker_text_normalize
tracker_text_validate_utf8
tracker_date_format_to_iso8601
tracker_date_guess
</SECTION>
......
......@@ -360,14 +360,14 @@ tracker_text_normalize (const gchar *text,
/**
* tracker_text_validate_utf8:
* @text: the text to validate
* @text_len: length of @text, or -1 if NIL-terminated
* @text_len: length of @text, or -1 if NULL-terminated
* @str: the string where to place the validated UTF-8 characters, or %NULL if
* not needed.
* @p_utf8_len: Output number of valid UTF-8 bytes found, or %NULL if not needed
* @valid_len: Output number of valid UTF-8 bytes found, or %NULL if not needed
*
* This function iterates through @text checking for UTF-8 validity
* using g_utf8_validate(), appends the first chunk of valid characters
* to @str, and gives the number of valid UTF-8 bytes in @p_utf8_len.
* to @str, and gives the number of valid UTF-8 bytes in @valid_len.
*
* Returns: %TRUE if some bytes were found to be valid, %FALSE otherwise.
*
......@@ -377,7 +377,7 @@ gboolean
tracker_text_validate_utf8 (const gchar *text,
gsize text_len,
GString **str,
gsize *p_utf8_len)
gsize *valid_len)
{
gsize len_to_validate;
......@@ -401,8 +401,8 @@ tracker_text_validate_utf8 (const gchar *text,
}
/* If utf8 len output required... */
if (p_utf8_len) {
*p_utf8_len = end - text;
if (valid_len) {
*valid_len = end - text;
}
return TRUE;
......@@ -834,22 +834,8 @@ tracker_date_guess (const gchar *date_string)
return g_strdup (date_string);
}
/**
* tracker_getline:
* @linebuf: Buffer to write into
* @n: Max bytes of linebuf
* @stream: Filestream to read from
*
* Reads an entire line from stream, storing the address of the buffer
* containing the text into *lineptr. The buffer is null-terminated
* and includes the newline character, if one was found.
*
* Read GNU getline()'s manpage for more information
*
* Since: 0.9
**/
#ifndef HAVE_GETLINE
static gint
my_igetdelim (gchar **linebuf,
guint *linebufsz,
......@@ -904,15 +890,54 @@ my_igetdelim (gchar **linebuf,
return idx;
}
/**
* tracker_getline:
* @lineptr: Buffer to write into
* @n: Max bytes of linebuf
* @stream: Filestream to read from
*
* Reads an entire line from stream, storing the address of the buffer
* containing the text into *lineptr. The buffer is null-terminated
* and includes the newline character, if one was found.
*
* Read GNU getline()'s manpage for more information
*
* Returns: the number of characters read, including the delimiter
* character, but not including the terminating %NULL byte. This value
* can be used to handle embedded %NULL bytes in the line read. Upon
* failure, -1 is returned.
*
* Since: 0.9
**/
gssize
tracker_getline (gchar **lineptr,
gsize *n,
FILE *stream)
FILE *stream)
{
return my_igetdelim (lineptr, n, '\n', stream);
}
#else
/**
* tracker_getline:
* @lineptr: Buffer to write into
* @n: Max bytes of linebuf
* @stream: Filestream to read from
*
* Reads an entire line from stream, storing the address of the buffer
* containing the text into *lineptr. The buffer is null-terminated
* and includes the newline character, if one was found.
*
* Read GNU getline()'s manpage for more information
*
* Returns: the number of characters read, including the delimiter
* character, but not including the terminating %NULL byte. This value
* can be used to handle embedded %NULL bytes in the line read. Upon
* failure, -1 is returned.
*
* Since: 0.9
**/
gssize
tracker_getline (gchar **lineptr,
gsize *n,
......@@ -920,4 +945,5 @@ tracker_getline (gchar **lineptr,
{
return getline (lineptr, n, stream);
}
#endif /* HAVE_GETLINE */
......@@ -42,7 +42,7 @@ gchar* tracker_text_normalize (const gchar *text,
gboolean tracker_text_validate_utf8 (const gchar *text,
gsize text_len,
GString **str,
gsize *p_utf8_len);
gsize *valid_len);
gchar* tracker_date_guess (const gchar *date_string);
gchar* tracker_date_format_to_iso8601 (const gchar *date_string,
const gchar *format);
......
......@@ -71,7 +71,7 @@ enum {
static ObjectToKeyFile conversions[] = {
{ G_TYPE_INT, "verbosity", GROUP_GENERAL, "Verbosity" },
{ G_TYPE_INT, "max_bytes", GROUP_GENERAL, "Max_Bytes" },
{ G_TYPE_INT, "max-bytes", GROUP_GENERAL, "MaxBytes" },
};
G_DEFINE_TYPE (TrackerConfig, tracker_config, TRACKER_TYPE_CONFIG_FILE);
......@@ -99,9 +99,9 @@ tracker_config_class_init (TrackerConfigClass *klass)
g_object_class_install_property (object_class,
PROP_MAX_BYTES,
g_param_spec_int ("max_bytes",
g_param_spec_int ("max-bytes",
"Max Bytes",
" Maximum number of UTF-8 bytes to extract [0,G_MAXINT]",
" Maximum number of UTF-8 bytes to extract per file [0->G_MAXINT]",
0,
G_MAXINT,
DEFAULT_MAX_BYTES,
......@@ -362,12 +362,12 @@ tracker_config_set_max_bytes (TrackerConfig *config,
g_return_if_fail (TRACKER_IS_CONFIG (config));
if (!tracker_keyfile_object_validate_int (config, "max_bytes", value)) {
if (!tracker_keyfile_object_validate_int (config, "max-bytes", value)) {
return;
}
priv = TRACKER_CONFIG_GET_PRIVATE (config);
priv->max_bytes = value;
g_object_notify (G_OBJECT (config), "max_bytes");
g_object_notify (G_OBJECT (config), "max-bytes");
}
......@@ -50,10 +50,10 @@ TrackerConfig *tracker_config_new (void);
gboolean tracker_config_save (TrackerConfig *config);
gint tracker_config_get_verbosity (TrackerConfig *config);
gint tracker_config_get_max_bytes (TrackerConfig *config);
void tracker_config_set_verbosity (TrackerConfig *config,
gint value);
gint tracker_config_get_max_bytes (TrackerConfig *config);
void tracker_config_set_max_bytes (TrackerConfig *config,
gint value);
......
......@@ -394,7 +394,7 @@ read_32bit (const guint8 *buffer)
* @param chunk_size Number of valid bytes in the input buffer
* @param is_ansi If %TRUE, input text should be encoded in CP1252, and
* in UTF-16 otherwise.
* @param p_words_remaining Pointer to #gsize specifying how many bytes
* @param p_bytes_remaining Pointer to #gsize specifying how many bytes
* should still be considered.
* @param p_content Pointer to a #GString where the output normalized words
* will be appended.
......@@ -403,8 +403,8 @@ static void
msoffice_convert_and_normalize_chunk (guint8 *buffer,
gsize chunk_size,
gboolean is_ansi,
gsize *p_bytes_remaining,
GString **p_content)
gsize *bytes_remaining,
GString **content)
{
gsize n_bytes_utf8;
gchar *converted_text;
......@@ -412,13 +412,15 @@ msoffice_convert_and_normalize_chunk (guint8 *buffer,
g_return_if_fail (buffer != NULL);
g_return_if_fail (chunk_size > 0);
g_return_if_fail (p_bytes_remaining != NULL);
g_return_if_fail (p_content != NULL);
g_return_if_fail (bytes_remaining != NULL);
g_return_if_fail (content != NULL);
/* chunks can have different encoding
* TODO: Using g_iconv, this extra heap allocation could be
* avoided, re-using over and over again the same output buffer
* for the UTF-8 encoded string */
*
* TODO: Using g_iconv, this extra heap allocation could be
* avoided, re-using over and over again the same output buffer
* for the UTF-8 encoded string
*/
converted_text = g_convert (buffer,
chunk_size,
"UTF-8",
......@@ -430,18 +432,18 @@ msoffice_convert_and_normalize_chunk (guint8 *buffer,
if (converted_text) {
gsize len_to_validate;
len_to_validate = MIN (*p_bytes_remaining, n_bytes_utf8);
len_to_validate = MIN (*bytes_remaining, n_bytes_utf8);
if (tracker_text_validate_utf8 (converted_text,
len_to_validate,
p_content,
content,
NULL)) {
/* A whitespace is added to separate next strings appended */
g_string_append_c (*p_content, ' ');
g_string_append_c (*content, ' ');
}
/* Update accumulated UTF-8 bytes read */
*p_bytes_remaining -= len_to_validate;
*bytes_remaining -= len_to_validate;
} else {
g_warning ("Couldn't convert %" G_GSIZE_FORMAT " bytes from %s to UTF-8: %s",
......
......@@ -74,11 +74,11 @@ static gchar *
extract_oasis_content (const gchar *uri,
gsize n_bytes)
{
const gchar *argv[4];
gchar *text = NULL;
gchar *path;
GIOChannel *channel;
GPid pid;
const gchar *argv[4];
gchar *text = NULL;
gchar *path;
GIOChannel *channel;
GPid pid;
/* Newly allocated string with the file path */
path = g_filename_from_uri (uri, NULL, NULL);
......
......@@ -38,9 +38,9 @@ static GString *
get_string_in_locale (GString *s)
{
GError *error = NULL;
gchar *str;
gsize bytes_read;
gsize bytes_written;
gchar *str;
gsize bytes_read;
gsize bytes_written;
str = g_locale_to_utf8 (s->str,
s->len,
......@@ -48,7 +48,7 @@ get_string_in_locale (GString *s)
&bytes_written,
&error);
if (error) {
g_debug (" Conversion to UTF-8 read %d bytes, wrote %d bytes",
g_debug (" Conversion to UTF-8 read %" G_GSIZE_FORMAT " bytes, wrote %" G_GSIZE_FORMAT " bytes",
bytes_read,
bytes_written);
g_message ("Could not convert string from locale to UTF-8, %s",
......@@ -83,8 +83,8 @@ tracker_iochannel_read_text (GIOChannel *channel,
gboolean close_channel)
{
GString *s = NULL;
gsize n_bytes_remaining = max_bytes;
guint n_retries = MAX_RETRIES;
gsize n_bytes_remaining = max_bytes;
guint n_retries = MAX_RETRIES;
g_return_val_if_fail (channel, NULL);
g_return_val_if_fail (max_bytes > 0, NULL);
......@@ -104,10 +104,10 @@ tracker_iochannel_read_text (GIOChannel *channel,
*/
while (n_bytes_remaining > 0 &&
n_retries > 0) {
gchar buf[BUFFER_SIZE];
GError *error = NULL;
gssize bytes_read;
GIOStatus status;
gchar buf[BUFFER_SIZE];
GError *error = NULL;
gssize bytes_read;
GIOStatus status;
/* Try to read from channel */
status = g_io_channel_read_chars (channel,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment