Commit 2bfddf16 authored by Alexander Larsson's avatar Alexander Larsson

Remove GUtf8InputStream for now

It turns out that the way this worked did not work out for the current
main usecase (gedit) due to issues with how this is best integrated
with GtkTextView. So, in order to not have to support an unused non-ideal
API forever we remove this before its been in a stable release.

The basic feature seems to have some utility though, so we hope for it
to eventually return in a better form.
parent c47e1949
......@@ -272,7 +272,6 @@ libgio_2_0_la_SOURCES = \
gthreadedresolver.h \
gunionvolumemonitor.c \
gunionvolumemonitor.h \
gutf8inputstream.c \
gvfs.c \
gvolume.c \
gvolumemonitor.c \
......@@ -407,7 +406,6 @@ gio_headers = \
gtcpconnection.h \
gthreadedsocketservice.h\
gthemedicon.h \
gutf8inputstream.h \
gvfs.h \
gvolume.h \
gvolumemonitor.h \
......
......@@ -90,7 +90,6 @@
#include <gio/gthreadedsocketservice.h>
#include <gio/gsrvtarget.h>
#include <gio/gthemedicon.h>
#include <gio/gutf8inputstream.h>
#include <gio/gvfs.h>
#include <gio/gvolume.h>
#include <gio/gvolumemonitor.h>
......
......@@ -1319,13 +1319,6 @@ g_unix_fd_list_steal_fds
#endif
#endif
#if IN_HEADER(__G_UTF8_INPUT_STREAM_H__)
#if IN_FILE(__G_UTF8_INPUT_STREAM_C__)
g_utf8_input_stream_get_type G_GNUC_CONST
g_utf8_input_stream_new
#endif
#endif
#if IN_HEADER(__G_ZLIB_COMPRESSOR_H__)
#if IN_FILE(__G_ZLIB_COMPRESSOR_C__)
g_zlib_compressor_get_type G_GNUC_CONST
......
/* GIO - GLib Input, Output and Streaming Library
*
* Copyright (C) 2009 Paolo Borelli
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307, USA.
*
* Author: Paolo Borelli <pborelli@gnome.org>
*/
#include "config.h"
#include "gutf8inputstream.h"
#include "ginputstream.h"
#include "gcancellable.h"
#include "gioerror.h"
#include "glibintl.h"
#include "gioalias.h"
/**
* SECTION:gutf8inputstream
* @short_description: Input Stream performing UTF8 validation
* @include: gio/gio.h
* @see_also: #GFilterInputStream, #GInputStream
*
* utf8 input stream implements #GFilterInputStream and provides
* UTF8 validation of the data read from a the stream.
* If the supplied buffer is long enough (see below), the returned
* data is guaranteed to end at utf8 character boundaries.
* <note>
* <para>
* Extra care must be taken when performing "small" reads:
* unless you have control of the data being read, you need
* to always supply a buffer long at least 6 bytes, otherwise
* the returned content may be an incomplete utf8 byte sequence.
* </para>
* </note>
*
* To create an utf8 input stream, use g_utf8_input_stream_new().
*
**/
#define MAX_UNICHAR_LEN 6
struct _GUtf8InputStreamPrivate {
/* buffer containing trailing partial character not yet returned */
char buffer[MAX_UNICHAR_LEN];
gsize len;
/* buffer containing partial character returned in a "small read"
* but not yet validated */
char small_read_buffer[MAX_UNICHAR_LEN];
gsize small_read_len;
};
static gssize g_utf8_input_stream_read (GInputStream *stream,
void *buffer,
gsize count,
GCancellable *cancellable,
GError **error);
G_DEFINE_TYPE (GUtf8InputStream,
g_utf8_input_stream,
G_TYPE_FILTER_INPUT_STREAM)
static void
g_utf8_input_stream_class_init (GUtf8InputStreamClass *klass)
{
GInputStreamClass *istream_class;
g_type_class_add_private (klass, sizeof (GUtf8InputStreamPrivate));
istream_class = G_INPUT_STREAM_CLASS (klass);
istream_class->read_fn = g_utf8_input_stream_read;
}
static void
g_utf8_input_stream_init (GUtf8InputStream *stream)
{
stream->priv = G_TYPE_INSTANCE_GET_PRIVATE (stream,
G_TYPE_UTF8_INPUT_STREAM,
GUtf8InputStreamPrivate);
}
/**
* g_utf8_input_stream_new:
* @base_stream: a #GInputStream.
*
* Creates a new #GUtf8InputStream from the given @base_stream.
*
* Returns: a #GInputStream for the given @base_stream.
*
* Since: 2.24
**/
GInputStream *
g_utf8_input_stream_new (GInputStream *base_stream)
{
GInputStream *stream;
g_return_val_if_fail (G_IS_INPUT_STREAM (base_stream), NULL);
stream = g_object_new (G_TYPE_UTF8_INPUT_STREAM,
"base-stream", base_stream,
NULL);
return stream;
}
static void
store_remainder (GUtf8InputStream *stream,
const char *remainder,
gsize len)
{
GUtf8InputStreamPrivate *priv;
gsize i;
priv = stream->priv;
/* we store a remanainder only after having
* consumed the previous */
g_assert (priv->len == 0);
for (i = 0; i < len; ++i)
priv->buffer[i] = remainder[i];
priv->len = i;
}
static gssize
get_remainder (GUtf8InputStream *stream,
char *buffer,
gsize count)
{
GUtf8InputStreamPrivate *priv;
gsize i, len;
gssize res;
priv = stream->priv;
g_assert (priv->len < MAX_UNICHAR_LEN);
len = MIN (count, priv->len);
for (i = 0; i < len; ++i)
buffer[i] = priv->buffer[i];
res = i;
/* if there is more remainder, move it at the start */
for (i = 0; i < (priv->len - res); ++i)
priv->buffer[i] = priv->buffer[res + i];
priv->len = i;
return res;
}
static void
store_small_read (GUtf8InputStream *stream,
const char *buffer,
gsize len)
{
GUtf8InputStreamPrivate *priv;
gsize i;
priv = stream->priv;
/* if we reach MAX_UNICHAR_LEN it is either valid
* or invalid, so we should already have removed it
* from the buffer */
g_assert (priv->small_read_len + len < MAX_UNICHAR_LEN);
for (i = 0; i < len; ++i)
priv->small_read_buffer[priv->small_read_len + i] = buffer[i];
priv->small_read_len += i;
}
/* Combines the current "small read" buffer with the new
* bytes given, validates the buffer and if needed
* flushes it.
*
* returns:
* the number of bytes of buffer that are needed to
* make the current small read buffer valid.
*
* -1 if the small read buffer is invalid
*
* 0 if it is an incomplete character or if the
* small read buffer is empty.
*/
static gssize
validate_small_read (GUtf8InputStream *stream,
const char *buffer,
gsize len)
{
GUtf8InputStreamPrivate *priv;
gsize i;
gunichar c;
char *p;
gssize res;
priv = stream->priv;
if (priv->small_read_len == 0)
return 0;
for (i = 0; i < MIN (len, MAX_UNICHAR_LEN - priv->small_read_len); ++i)
priv->small_read_buffer[priv->small_read_len + i] = buffer[i];
c = g_utf8_get_char_validated (priv->small_read_buffer, priv->small_read_len + i);
if (c == (gunichar)-1)
{
priv->small_read_len = 0;
return -1;
}
if (c == (gunichar)-2)
{
return 0;
}
p = g_utf8_next_char (priv->small_read_buffer);
res = p - (priv->small_read_buffer + priv->small_read_len);
g_assert (res > 0);
/* reset the buffer */
priv->small_read_len = 0;
return res;
}
static gssize
g_utf8_input_stream_read (GInputStream *stream,
void *buffer,
gsize count,
GCancellable *cancellable,
GError **error)
{
GUtf8InputStream *ustream;
GUtf8InputStreamPrivate *priv;
GInputStream *base_stream;
gsize nvalid, remainder;
gssize oldread, nread, offset;
gboolean valid, eof;
const gchar *end;
ustream = G_UTF8_INPUT_STREAM (stream);
priv = ustream->priv;
/* if we had previous incomplete data put it at the start of the buffer */
oldread = get_remainder (ustream, buffer, count);
/* if we have already reached count, it is "small read":
* store it to validate later */
if (oldread == count)
{
store_small_read (ustream, buffer, oldread);
return oldread;
}
base_stream = g_filter_input_stream_get_base_stream (G_FILTER_INPUT_STREAM (stream));
nread = g_input_stream_read (base_stream,
(char *)buffer + oldread,
count - oldread,
cancellable,
error);
if (nread < 0)
return -1;
/* take into account bytes we put in the buffer */
eof = (nread == 0);
nread += oldread;
/* validate previous small reads */
offset = validate_small_read (ustream, buffer, nread);
if (offset < 0)
goto error;
/* validate */
valid = g_utf8_validate ((char *)buffer + offset, nread - offset, &end);
nvalid = end - (char *)buffer;
if (valid)
return nread;
remainder = nread - nvalid;
/* if validation failed in the last bytes and the byte
* sequence is an incomplete character and EOF is not reached,
* try to read further to see if we stopped in the middle
* of a character */
if ((remainder < MAX_UNICHAR_LEN) &&
(!eof) &&
(g_utf8_get_char_validated ((char *)buffer + nvalid, remainder) == (gunichar)-2))
{
if (nvalid == 0)
{
/* A "small" read: store it to validate later */
store_small_read (ustream, buffer, nread);
return nread;
}
store_remainder (ustream, (char *)buffer + nvalid, remainder);
return nvalid;
}
error:
g_set_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
_("Invalid UTF-8 sequence in input"));
return -1;
}
#define __G_UTF8_INPUT_STREAM_C__
#include "gioaliasdef.c"
/* GIO - GLib Input, Output and Streaming Library
*
* Copyright (C) 2009 Paolo Borelli
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307, USA.
*
* Author: Paolo Borelli <pborelli@gnome.org>
*/
#if !defined (__GIO_GIO_H_INSIDE__) && !defined (GIO_COMPILATION)
#error "Only <gio/gio.h> can be included directly."
#endif
#ifndef __G_UTF8_INPUT_STREAM_H__
#define __G_UTF8_INPUT_STREAM_H__
#include <gio/ginputstream.h>
#include <gio/gfilterinputstream.h>
G_BEGIN_DECLS
#define G_TYPE_UTF8_INPUT_STREAM (g_utf8_input_stream_get_type ())
#define G_UTF8_INPUT_STREAM(o) (G_TYPE_CHECK_INSTANCE_CAST ((o), G_TYPE_UTF8_INPUT_STREAM, GUtf8InputStream))
#define G_UTF8_INPUT_STREAM_CLASS(k) (G_TYPE_CHECK_CLASS_CAST((k), G_TYPE_UTF8_INPUT_STREAM, GUtf8InputStreamClass))
#define G_IS_UTF8_INPUT_STREAM(o) (G_TYPE_CHECK_INSTANCE_TYPE ((o), G_TYPE_UTF8_INPUT_STREAM))
#define G_IS_UTF8_INPUT_STREAM_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), G_TYPE_UTF8_INPUT_STREAM))
#define G_UTF8_INPUT_STREAM_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), G_TYPE_UTF8_INPUT_STREAM, GUtf8InputStreamClass))
/**
* GUtf8InputStream:
* @parent_instance: a #GFilterInputStream.
*
* An implementation of #GFilterInputStream that performs UTF8 validation.
*
* Since: 2.24
**/
typedef struct _GUtf8InputStreamClass GUtf8InputStreamClass;
typedef struct _GUtf8InputStreamPrivate GUtf8InputStreamPrivate;
struct _GUtf8InputStream
{
GFilterInputStream parent_instance;
/*< private >*/
GUtf8InputStreamPrivate *priv;
};
struct _GUtf8InputStreamClass
{
GFilterInputStreamClass parent_class;
/*< private >*/
/* Padding for future expansion */
void (*_g_reserved1) (void);
void (*_g_reserved2) (void);
void (*_g_reserved3) (void);
void (*_g_reserved4) (void);
void (*_g_reserved5) (void);
};
GType g_utf8_input_stream_get_type (void) G_GNUC_CONST;
GInputStream *g_utf8_input_stream_new (GInputStream *base_stream);
G_END_DECLS
#endif /* __G_UTF8_INPUT_STREAM_H__ */
......@@ -26,7 +26,6 @@ TEST_PROGS += \
converter-stream \
data-input-stream \
data-output-stream \
utf8-input-stream \
g-icon \
buffered-input-stream \
sleepy-stream \
......@@ -72,9 +71,6 @@ data_input_stream_LDADD = $(progs_ldadd)
data_output_stream_SOURCES = data-output-stream.c
data_output_stream_LDADD = $(progs_ldadd)
utf8_input_stream_SOURCES = utf8-input-stream.c
utf8_input_stream_LDADD = $(progs_ldadd)
filter_cat_SOURCES = filter-cat.c
filter_cat_LDADD = $(progs_ldadd)
......
/* GIO - GLib Input, Output and Streaming Library
*
* Copyright (C) 2009 Paolo Borelli
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307, USA.
*
* Author: Paolo Borelli <pborelli@gnome.org>
*/
#include <glib/glib.h>
#include <gio/gio.h>
#include <string.h>
static void
do_test_read (const char *str, gssize expected_nread, glong expected_nchar)
{
GInputStream *base;
GInputStream *in;
gssize len, n;
char *buf;
GError *err;
len = strlen (str);
base = g_memory_input_stream_new_from_data (str, -1, NULL);
in = g_utf8_input_stream_new (base);
g_object_unref (base);
buf = g_new0 (char, strlen(str));
err = NULL;
n = g_input_stream_read (in, buf, len, NULL, &err);
g_assert_cmpint (n, ==, expected_nread);
if (expected_nread < 0)
{
g_assert_error (err, G_IO_ERROR, G_IO_ERROR_INVALID_DATA);
}
else
{
g_assert_cmpstr (str, ==, buf);
g_assert_cmpint (g_utf8_strlen (buf, -1), ==, expected_nchar);
g_assert (err == NULL);
}
g_free (buf);
g_object_unref (in);
}
static void
do_test_read_partial (const char *str,
gssize chunk_len,
gssize expected_nread1,
gssize expected_nread2,
glong expected_nchar)
{
GInputStream *base;
GInputStream *in;
gssize len, n1, n2;
char *buf;
GError *err;
len = strlen (str);
base = g_memory_input_stream_new_from_data (str, -1, NULL);
in = g_utf8_input_stream_new (base);
g_object_unref (base);
buf = g_new0 (char, strlen(str));
err = NULL;
n1 = g_input_stream_read (in, buf, chunk_len, NULL, &err);
g_assert_cmpint (n1, ==, expected_nread1);
g_assert (err == NULL);
n2 = g_input_stream_read (in, buf + n1, len - n1, NULL, &err);
g_assert_cmpint (n2, ==, expected_nread2);
if (expected_nread2 < 0)
{
g_assert_error (err, G_IO_ERROR, G_IO_ERROR_INVALID_DATA);
}
else
{
g_assert_cmpstr (str, ==, buf);
g_assert_cmpint (g_utf8_strlen (buf, -1), ==, expected_nchar);
g_assert (err == NULL);
}
g_free (buf);
g_object_unref (in);
}
static void
test_read_ascii (void)
{
do_test_read ("foobar", 6, 6);
}
static void
test_read_utf8 (void)
{
do_test_read ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", 18, 15);
}
static void
test_read_utf8_partial (void)
{
do_test_read_partial ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", 7, 6, 12, 15);
}
static void
test_read_invalid_start (void)
{
do_test_read ("\xef\xbf\xbezzzzzz", -1, -1);
}
static void
test_read_invalid_middle (void)
{
do_test_read ("foobar\xef\xbf\xbezzzzzz", -1, -1);
}
static void
test_read_invalid_end (void)
{
do_test_read ("foobar\xef\xbf\xbe", -1, -1);
}
static void
test_read_invalid_partial (void)
{
do_test_read_partial ("foobar\xef\xbf\xbezzzzzz", 7, 6, -1, -1);
}
static void
test_read_small_valid (void)
{
GInputStream *base;
GInputStream *in;
gssize len, n;
char *buf;
GError *err;
base = g_memory_input_stream_new_from_data ("\xc3\xa8\xc3\xa8", -1, NULL);
in = g_utf8_input_stream_new (base);
g_object_unref (base);
len = strlen("\xc3\xa8\xc3\xa8");
buf = g_new0 (char, len);
err = NULL;
/* read a single byte */
n = g_input_stream_read (in, buf, 1, NULL, &err);
g_assert_cmpint (n, ==, 1);
g_assert_cmpstr ("\xc3", ==, buf);
g_assert (err == NULL);
/* read the rest */
n = g_input_stream_read (in, buf + n, len - n, NULL, &err);
g_assert_cmpint (n, ==, len - 1);
g_assert_cmpstr ("\xc3\xa8\xc3\xa8", ==, buf);
g_assert (err == NULL);
g_object_unref (in);
}
static void
test_read_small_invalid (void)
{
GInputStream *base;
GInputStream *in;
gssize n;
char *buf;
GError *err;
base = g_memory_input_stream_new_from_data ("\xbf\xbe", -1, NULL);
in = g_utf8_input_stream_new (base);
g_object_unref (base);
buf = g_new0 (char, 2);
err = NULL;
n = g_input_stream_read (in, buf, 1, NULL, &err);
g_assert_cmpint (n, ==, -1);
g_assert_error (err, G_IO_ERROR, G_IO_ERROR_INVALID_DATA);
g_object_unref (in);
}
static void
test_read_small_consecutive (void)
{
GInputStream *base;
GInputStream *in;
gssize len, n;
char *buf;
GError *err;
base = g_memory_input_stream_new_from_data ("\xc3\xa8\xc3\xa8", -1, NULL);
in = g_utf8_input_stream_new (base);
g_object_unref (base);
len = strlen("\xc3\xa8\xc3\xa8");
buf = g_new0 (char, len);
err = NULL;
n = 0;
/* read a single byte at a time */
while (n < len)
{
gssize r;
r = g_input_stream_read (in, buf + n, 1, NULL, &err);
g_assert_cmpint (r, ==, 1);
g_assert (err == NULL);
n += r;
}
g_assert_cmpstr ("\xc3\xa8\xc3\xa8", ==, buf);
g_object_unref (in);
}
int
main (int argc,
char *argv[])
{
g_type_init ();
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/utf8-input-stream/read-ascii", test_read_ascii);
g_test_add_func ("/utf8-input-stream/read-utf8", test_read_utf8);
g_test_add_func ("/utf8-input-stream/read-utf8-partial", test_read_utf8_partial);
g_test_add_func ("/utf8-input-stream/read-invalid-start", test_read_invalid_start);
g_test_add_func ("/utf8-input-stream/read-invalid-middle", test_read_invalid_middle);
g_test_add_func ("/utf8-input-stream/read-invalid-end", test_read_invalid_end);
g_test_add_func ("/utf8-input-stream/read-invalid-partial", test_read_invalid_partial);
g_test_add_func ("/utf8-input-stream/read-small-valid", test_read_small_valid);
g_test_add_func ("/utf8-input-stream/read-small-invalid", test_read_small_invalid);
g_test_add_func ("/utf8-input-stream/read-small-consecutive", test_read_small_consecutive);
return g_test_run();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment