Commit 61a8c6f5 authored by Jesse van den Kieboom's avatar Jesse van den Kieboom Committed by Ignacio Casal Quinteiro

Fix smart converter encoding guessing and add test case

parent b6a28db8
......@@ -42,6 +42,7 @@
#include "gedit-prefs-manager-app.h"
#include "gedit-document.h"
#include "gedit-convert.h"
#include "gedit-debug.h"
#include "gedit-utils.h"
#include "gedit-language-manager.h"
......@@ -87,8 +88,6 @@ static void gedit_document_save_real (GeditDocument *doc,
const gchar *uri,
const GeditEncoding *encoding,
GeditDocumentSaveFlags flags);
static void gedit_document_set_readonly (GeditDocument *doc,
gboolean readonly);
static void to_search_region_range (GeditDocument *doc,
GtkTextIter *start,
GtkTextIter *end);
......@@ -1046,9 +1045,16 @@ set_readonly (GeditDocument *doc,
doc->priv->readonly = readonly;
}
static void
gedit_document_set_readonly (GeditDocument *doc,
gboolean readonly)
/**
* gedit_document_set_readonly:
* @doc: a #GeditDocument
* @readonly: %TRUE to se the document as read-only
*
* If @readonly is %TRUE sets @doc as read-only.
*/
void
_gedit_document_set_readonly (GeditDocument *doc,
gboolean readonly)
{
gedit_debug (DEBUG_DOCUMENT);
......@@ -1119,7 +1125,7 @@ document_loader_loaded (GeditDocumentLoader *loader,
GeditDocument *doc)
{
/* load was successful */
if (error == NULL)
if (error == NULL || error->code == GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
{
GtkTextIter iter;
GFileInfo *info;
......@@ -1360,7 +1366,7 @@ document_saver_saving (GeditDocumentSaver *saver,
g_get_current_time (&doc->priv->time_of_last_save_or_load);
gedit_document_set_readonly (doc, FALSE);
_gedit_document_set_readonly (doc, FALSE);
gtk_text_buffer_set_modified (GTK_TEXT_BUFFER (doc),
FALSE);
......
......@@ -267,6 +267,9 @@ void gedit_document_set_metadata (GeditDocument *doc,
/*
* Non exported functions
*/
void _gedit_document_set_readonly (GeditDocument *doc,
gboolean readonly);
glong _gedit_document_get_seconds_since_last_save_or_load
(GeditDocument *doc);
......
......@@ -236,7 +236,7 @@ remote_load_completed_or_failed (GeditGioDocumentLoader *gvloader, AsyncData *as
if (async)
async_data_free (async);
if (gvloader->priv->stream)
g_input_stream_close_async (G_INPUT_STREAM (gvloader->priv->stream),
G_PRIORITY_HIGH, NULL, NULL, NULL);
......@@ -359,11 +359,11 @@ async_read_cb (GInputStream *stream,
if ((gedit_smart_charset_converter_get_num_fallbacks (gvloader->priv->converter) != 0) &&
gvloader->priv->error == NULL)
{
/* FIXME: Maybe check for some specific error ? */
g_set_error_literal (&gvloader->priv->error,
GEDIT_DOCUMENT_ERROR,
GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK,
_("There was a problem blah blah")); /* FIXME */
"There was a conversion error and it was "
"needed to use a fallback char");
}
end_append_text_to_document (GEDIT_DOCUMENT_LOADER (gvloader));
......
......@@ -540,7 +540,8 @@ create_option_menu (GtkWidget *message_area, GtkWidget *vbox)
static GtkWidget *
create_conversion_error_message_area (const gchar *primary_text,
const gchar *secondary_text)
const gchar *secondary_text,
gboolean edit_anyway)
{
GtkWidget *message_area;
GtkWidget *hbox_content;
......@@ -558,21 +559,53 @@ create_conversion_error_message_area (const gchar *primary_text,
_("_Retry"),
GTK_STOCK_REDO,
GTK_RESPONSE_OK);
gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL);
if (edit_anyway)
{
gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
_("Edit Any_way"),
GTK_RESPONSE_YES);
gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
_("D_on't Edit"),
GTK_RESPONSE_CANCEL);
}
else
{
gedit_message_area_add_button (GEDIT_MESSAGE_AREA (message_area),
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL);
}
#else
message_area = gtk_info_bar_new ();
info_bar_add_stock_button_with_text (GTK_INFO_BAR (message_area),
_("_Retry"),
GTK_STOCK_REDO,
GTK_RESPONSE_OK);
gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL);
gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
GTK_MESSAGE_ERROR);
if (edit_anyway)
{
gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
/* Translators: the access key chosen for this string should be
different from other main menu access keys (Open, Edit, View...) */
_("Edit Any_way"),
GTK_RESPONSE_YES);
gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
/* Translators: the access key chosen for this string should be
different from other main menu access keys (Open, Edit, View...) */
_("D_on't Edit"),
GTK_RESPONSE_CANCEL);
gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
GTK_MESSAGE_WARNING);
}
else
{
gtk_info_bar_add_button (GTK_INFO_BAR (message_area),
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL);
gtk_info_bar_set_message_type (GTK_INFO_BAR (message_area),
GTK_MESSAGE_ERROR);
}
#endif
hbox_content = gtk_hbox_new (FALSE, 8);
......@@ -628,6 +661,7 @@ gedit_conversion_error_while_loading_message_area_new (
gchar *uri_for_display;
gchar *temp_uri_for_display;
GtkWidget *message_area;
gboolean edit_anyway = FALSE;
g_return_val_if_fail (uri != NULL, NULL);
g_return_val_if_fail (error != NULL, NULL);
......@@ -640,8 +674,8 @@ gedit_conversion_error_while_loading_message_area_new (
* though the dialog uses wrapped text, if the URI doesn't contain
* white space then the text-wrapping code is too stupid to wrap it.
*/
temp_uri_for_display = gedit_utils_str_middle_truncate (full_formatted_uri,
MAX_URI_IN_DIALOG_LENGTH);
temp_uri_for_display = gedit_utils_str_middle_truncate (full_formatted_uri,
MAX_URI_IN_DIALOG_LENGTH);
g_free (full_formatted_uri);
uri_for_display = g_markup_printf_escaped ("<i>%s</i>", temp_uri_for_display);
......@@ -652,18 +686,29 @@ gedit_conversion_error_while_loading_message_area_new (
else
encoding_name = g_strdup ("UTF-8");
if (error->domain == GEDIT_CONVERT_ERROR)
if (error->domain == GEDIT_CONVERT_ERROR &&
error->code == GEDIT_CONVERT_ERROR_AUTO_DETECTION_FAILED)
{
g_return_val_if_fail (error->code == GEDIT_CONVERT_ERROR_AUTO_DETECTION_FAILED, NULL);
error_message = g_strdup_printf (_("Could not open the file %s."),
uri_for_display);
uri_for_display);
message_details = g_strconcat (_("gedit has not been able to detect "
"the character coding."), "\n",
_("Please check that you are not trying to open a binary file."), "\n",
_("Select a character coding from the menu and try again."), NULL);
}
else
else if (error->domain == GEDIT_DOCUMENT_ERROR &&
error->code == GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
{
error_message = g_strdup_printf (_("There was a problem opening the file %s."),
uri_for_display);
message_details = g_strconcat (_("The file you opened has some invalid characters, "
"if you continue editing this file you could make this "
"document useless."), "\n",
_("You can also choose another character encoding and try again."),
NULL);
edit_anyway = TRUE;
}
else
{
error_message = g_strdup_printf (_("Could not open the file %s using the %s character coding."),
......@@ -673,7 +718,9 @@ gedit_conversion_error_while_loading_message_area_new (
_("Select a different character coding from the menu and try again."), NULL);
}
message_area = create_conversion_error_message_area (error_message, message_details);
message_area = create_conversion_error_message_area (error_message,
message_details,
edit_anyway);
g_free (uri_for_display);
g_free (encoding_name);
......@@ -726,7 +773,8 @@ gedit_conversion_error_while_saving_message_area_new (
message_area = create_conversion_error_message_area (
error_message,
message_details);
message_details,
FALSE);
g_free (uri_for_display);
g_free (encoding_name);
......
......@@ -121,6 +121,73 @@ get_encoding (GeditSmartCharsetConverter *smart)
return (const GeditEncoding *)smart->priv->current_encoding->data;
}
static gboolean
try_convert (GCharsetConverter *converter,
const void *inbuf,
gsize inbuf_size)
{
GError *err;
gsize bytes_read, nread;
gsize bytes_written, nwritten;
GConverterResult res;
gchar *out;
gboolean ret;
gsize out_size;
err = NULL;
nread = 0;
nwritten = 0;
out_size = inbuf_size * 4;
out = g_malloc (out_size);
do
{
res = g_converter_convert (G_CONVERTER (converter),
inbuf + nread,
inbuf_size - nread,
out + nwritten,
out_size - nwritten,
G_CONVERTER_INPUT_AT_END,
&bytes_read,
&bytes_written,
&err);
nread += bytes_read;
nwritten += bytes_written;
} while (res != G_CONVERTER_FINISHED && res != G_CONVERTER_ERROR && err == NULL);
if (err != NULL)
{
if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
/* FIXME We can get partial input while guessing the
encoding because we just take some amount of text
to guess from. */
ret = TRUE;
}
else
{
ret = FALSE;
}
g_error_free (err);
}
else
{
ret = TRUE;
}
/* FIXME: Check the remainder? */
if (ret == TRUE && !g_utf8_validate (out, nwritten, NULL))
{
ret = FALSE;
}
g_free (out);
return ret;
}
static GCharsetConverter *
guess_encoding (GeditSmartCharsetConverter *smart,
const void *inbuf,
......@@ -136,10 +203,6 @@ guess_encoding (GeditSmartCharsetConverter *smart,
while (TRUE)
{
const GeditEncoding *enc;
gchar outbuf[inbuf_size];
GConverterResult ret;
gsize read, written;
GError *err = NULL;
if (conv != NULL)
{
......@@ -147,7 +210,7 @@ guess_encoding (GeditSmartCharsetConverter *smart,
conv = NULL;
}
/* We get the first encoding we have in the list */
/* We get an encoding from the list */
enc = get_encoding (smart);
/* if it is NULL we didn't guess anything */
......@@ -168,7 +231,7 @@ guess_encoding (GeditSmartCharsetConverter *smart,
break;
}
/* Check if the end is just less than one char */
/* Check if the end is less than one char */
remainder = inbuf_size - (end - (gchar *)inbuf);
if (remainder < 6)
{
......@@ -189,28 +252,8 @@ guess_encoding (GeditSmartCharsetConverter *smart,
break;
}
ret = g_converter_convert (G_CONVERTER (conv),
inbuf,
inbuf_size,
outbuf,
inbuf_size,
0,
&read,
&written,
&err);
if (err != NULL)
{
/* FIXME: Is this ok or should we just skip it? */
if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
g_error_free (err);
break;
}
g_error_free (err);
}
else
/* Try to convert */
if (try_convert (conv, inbuf, inbuf_size))
{
break;
}
......@@ -218,6 +261,7 @@ guess_encoding (GeditSmartCharsetConverter *smart,
if (conv != NULL)
{
g_converter_reset (G_CONVERTER (conv));
g_charset_converter_set_use_fallback (conv, TRUE);
}
......@@ -343,5 +387,9 @@ gedit_smart_charset_converter_get_num_fallbacks (GeditSmartCharsetConverter *sma
{
g_return_val_if_fail (GEDIT_IS_SMART_CHARSET_CONVERTER (smart), FALSE);
if (smart->priv->charset_conv == NULL)
return FALSE;
return g_charset_converter_get_num_fallbacks (smart->priv->charset_conv) != 0;
}
......@@ -503,41 +503,57 @@ conversion_loading_error_message_area_response (GtkWidget *message_area,
GeditTab *tab)
{
GeditDocument *doc;
GeditView *view;
gchar *uri;
const GeditEncoding *encoding;
doc = gedit_tab_get_document (tab);
g_return_if_fail (GEDIT_IS_DOCUMENT (doc));
view = gedit_tab_get_view (tab);
g_return_if_fail (GEDIT_IS_VIEW (view));
uri = gedit_document_get_uri (doc);
g_return_if_fail (uri != NULL);
if (response_id == GTK_RESPONSE_OK)
switch (response_id)
{
const GeditEncoding *encoding;
case GTK_RESPONSE_OK:
encoding = gedit_conversion_error_message_area_get_encoding (
GTK_WIDGET (message_area));
encoding = gedit_conversion_error_message_area_get_encoding (
GTK_WIDGET (message_area));
g_return_if_fail (encoding != NULL);
g_return_if_fail (encoding != NULL);
set_message_area (tab, NULL);
gedit_tab_set_state (tab, GEDIT_TAB_STATE_LOADING);
set_message_area (tab, NULL);
gedit_tab_set_state (tab, GEDIT_TAB_STATE_LOADING);
tab->priv->tmp_encoding = encoding;
tab->priv->tmp_encoding = encoding;
g_return_if_fail (tab->priv->auto_save_timeout <= 0);
if (tab->priv->auto_save_timeout > 0)
remove_auto_save_timeout (tab);
gedit_document_load (doc,
uri,
encoding,
tab->priv->tmp_line_pos,
FALSE);
}
else
{
_gedit_recent_remove (GEDIT_WINDOW (gtk_widget_get_toplevel (GTK_WIDGET (tab))), uri);
gedit_document_load (doc,
uri,
encoding,
tab->priv->tmp_line_pos,
FALSE);
break;
case GTK_RESPONSE_YES:
/* This means that we want to edit the document anyway */
set_message_area (tab, NULL);
tab->priv->not_editable = FALSE;
gtk_text_view_set_editable (GTK_TEXT_VIEW (view),
TRUE);
break;
case GTK_RESPONSE_CANCEL:
/* We don't want to edit the document just show it */
set_message_area (tab, NULL);
break;
default:
_gedit_recent_remove (GEDIT_WINDOW (gtk_widget_get_toplevel (GTK_WIDGET (tab))), uri);
remove_tab (tab);
remove_tab (tab);
break;
}
g_free (uri);
......@@ -927,7 +943,8 @@ document_loaded (GeditDocument *document,
location = gedit_document_get_location (document);
uri = gedit_document_get_uri (document);
if (error != NULL)
/* if the error is CONVERSION FALLBACK don't treat it as a normal error */
if (error != NULL && error->code != GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
{
if (tab->priv->state == GEDIT_TAB_STATE_LOADING)
gedit_tab_set_state (tab, GEDIT_TAB_STATE_LOADING_ERROR);
......@@ -979,10 +996,7 @@ document_loaded (GeditDocument *document,
else
{
g_return_if_fail ((error->domain == G_CONVERT_ERROR) ||
(error->domain == GEDIT_CONVERT_ERROR));
/* FIXME: Check for GEDIT_CONVERT_ERROR_FALLBACK_USED
and set the right message area */
(error->domain == GEDIT_CONVERT_ERROR));
// TODO: different error messages if tab->priv->state == GEDIT_TAB_STATE_REVERTING?
// note that while reverting encoding should be ok, so this is unlikely to happen
......@@ -1028,6 +1042,36 @@ document_loaded (GeditDocument *document,
mime);
g_free (mime);
if (error && error->code == GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK)
{
GtkWidget *emsg;
//_gedit_document_set_readonly (document, TRUE);
tab->priv->not_editable = TRUE;
emsg = gedit_conversion_error_while_loading_message_area_new (
uri,
tab->priv->tmp_encoding,
error);
set_message_area (tab, emsg);
g_signal_connect (emsg,
"response",
G_CALLBACK (conversion_loading_error_message_area_response),
tab);
#if !GTK_CHECK_VERSION (2, 17, 1)
gedit_message_area_set_default_response (GEDIT_MESSAGE_AREA (emsg),
GTK_RESPONSE_CANCEL);
#else
gtk_info_bar_set_default_response (GTK_INFO_BAR (emsg),
GTK_RESPONSE_CANCEL);
#endif
gtk_widget_show (emsg);
}
/* Scroll to the cursor when the document is loaded */
gedit_view_scroll_to_cursor (GEDIT_VIEW (tab->priv->view));
......
......@@ -28,6 +28,94 @@
#include <string.h>
#define TEXT_TO_CONVERT "this is some text to make the tests"
#define TEXT_TO_GUESS "hello \xe6\x96\x87 world"
static void
print_hex (gchar *ptr, gint len)
{
gint i;
for (i = 0; i < len; ++i)
{
g_printf ("\\x%02x", (unsigned char)ptr[i]);
}
g_printf ("\n");
}
static gchar *
get_encoded_text (const gchar *text,
gsize nread,
const GeditEncoding *to,
const GeditEncoding *from,
gsize *bytes_written_aux,
gboolean care_about_error)
{
GCharsetConverter *converter;
gchar *out, *out_aux;
gsize bytes_read, bytes_read_aux;
gsize bytes_written;
GConverterResult res;
GError *err;
converter = g_charset_converter_new (gedit_encoding_get_charset (to),
gedit_encoding_get_charset (from),
NULL);
out = g_malloc (200);
out_aux = g_malloc (200);
err = NULL;
bytes_read_aux = 0;
*bytes_written_aux = 0;
if (nread == -1)
{
nread = strlen (text);
}
do
{
res = g_converter_convert (G_CONVERTER (converter),
text + bytes_read_aux,
nread,
out_aux,
200,
G_CONVERTER_INPUT_AT_END,
&bytes_read,
&bytes_written,
&err);
memcpy (out + *bytes_written_aux, out_aux, bytes_written);
bytes_read_aux += bytes_read;
*bytes_written_aux += bytes_written;
nread -= bytes_read;
} while (res != G_CONVERTER_FINISHED && res != G_CONVERTER_ERROR);
if (care_about_error)
{
g_assert_no_error (err);
}
else if (err)
{
g_printf ("** You don't care, but there was an error: %s", err->message);
return NULL;
}
out[*bytes_written_aux] = '\0';
if (!g_utf8_validate (out, *bytes_written_aux, NULL) && !care_about_error)
{
if (!care_about_error)
{
return NULL;
}
else
{
g_assert_not_reached ();
}
}
return out;
}
static GSList *
get_all_encodings ()
......@@ -51,12 +139,12 @@ get_all_encodings ()
return encs;
}
static void
static gchar *
do_test (const gchar *test_in,
const gchar *enc,
GSList *encodings,
gsize nread,
const gchar *test_out)
const GeditEncoding **guessed)
{
GeditSmartCharsetConverter *converter;
gchar *out, *out_aux;
......@@ -99,7 +187,10 @@ do_test (const gchar *test_in,
g_assert_no_error (err);
out[bytes_written_aux] = '\0';
g_assert_cmpstr (out, ==, test_out);
if (guessed != NULL)
*guessed = gedit_smart_charset_converter_get_guessed (converter);
return out;
}
static void
......@@ -163,10 +254,16 @@ do_test_roundtrip (const char *str, const char *charset)
static void
test_utf8_utf8 ()
{
do_test (TEXT_TO_CONVERT, "UTF-8", NULL, strlen (TEXT_TO_CONVERT), TEXT_TO_CONVERT);
gchar *aux;
aux = do_test (TEXT_TO_CONVERT, "UTF-8", NULL, strlen (TEXT_TO_CONVERT), NULL);
g_assert_cmpstr (aux, ==, TEXT_TO_CONVERT);
do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 18, "foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz");
do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 9, "foobar\xc3\xa8\xc3");
aux = do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 18, NULL);
g_assert_cmpstr (aux, ==, "foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz");
aux = do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 9, NULL);
g_assert_cmpstr (aux, ==, "foobar\xc3\xa8\xc3");
/* FIXME: Use the utf8 stream for a fallback? */
//do_test_with_error ("\xef\xbf\xbezzzzzz", encs, G_IO_ERROR_FAILED);
......@@ -189,6 +286,37 @@ test_xxx_xxx ()
g_slist_free (encs);
}
static void
test_guessed ()
{
GSList *encs = NULL;
gchar *aux, *aux2, *fail;
gsize aux_len, fail_len;
const GeditEncoding *guessed;
aux = get_encoded_text (TEXT_TO_GUESS, -1,
gedit_encoding_get_from_charset ("UTF-16"),
gedit_encoding_get_from_charset ("UTF-8"),
&aux_len,
TRUE);
fail = get_encoded_text (aux, aux_len,
gedit_encoding_get_from_charset ("UTF-8"),
gedit_encoding_get_from_charset ("ISO-8859-15"),
&fail_len,
FALSE);
g_assert (fail == NULL);
/* ISO-8859-15 should fail */
encs = g_slist_append (encs, (gpointer)gedit_encoding_get_from_charset ("ISO-8859-15"));
encs = g_slist_append (encs, (gpointer)gedit_encoding_get_from_charset ("UTF-16"));
aux2 = do_test (aux, NULL, encs, aux_len, &guessed);
g_assert (guessed == gedit_encoding_get_from_charset ("UTF-16"));
}
int main (int argc,
char *argv[])
{
......@@ -196,7 +324,8 @@ int main (int argc,
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/smart-converter/utf8-utf8", test_utf8_utf8);
g_test_add_func ("/smart-converter/xxx-xxx", test_xxx_xxx);
//g_test_add_func ("/smart-converter/xxx-xxx", test_xxx_xxx);
g_test_add_func ("/smart-converter/guessed", test_guessed);
return g_test_run ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment