iso2022.cc 5.63 KB
Newer Older
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
1
/*
2
 * Copyright (C) 2002,2003 Red Hat, Inc.
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
3
 *
4 5 6 7
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
8
 *
9 10
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
13
 *
14 15 16
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
17 18
 */

19 20 21 22 23 24
/*
 * This file used to contain a full iso2022 decoder which was removed for
 * version 0.40. Now it only performs input conversion from the given
 * character encoding. TODO: probably this layer could be removed completely.
 */

25
#include <config.h>
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
26
#include <sys/types.h>
27
#include <errno.h>
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
28 29
#include <stdlib.h>
#include <string.h>
30
#include <unistd.h>
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
31 32
#include <glib.h>
#include "debug.h"
33
#include "buffer.h"
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
34
#include "iso2022.h"
35
#include "vteconv.h"
36

37 38 39
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
40
#include <glib/gi18n-lib.h>
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
41

42
#include <gdk/gdkkeysyms.h>
43

44
/* An invalid codepoint. */
45
#define INVALID_CODEPOINT 0xFFFD
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
46

47 48
struct _vte_iso2022_state {
	const gchar *codeset, *native_codeset, *utf8_codeset, *target_codeset;
49
	VteConv conv;
50
	VteByteArray *buffer;
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
51 52
};

53
struct _vte_iso2022_state *
54
_vte_iso2022_state_new(const char *native_codeset)
55
{
56
	struct _vte_iso2022_state *state;
57

58
	state = g_slice_new0(struct _vte_iso2022_state);
59
	state->native_codeset = state->codeset = g_intern_string(native_codeset);
60
	if (native_codeset == NULL) {
61 62 63 64 65
                const char *codeset;
		g_get_charset(&codeset);
		state->native_codeset = state->codeset = g_intern_string(codeset);
        }
	state->utf8_codeset = g_intern_string("UTF-8");
66
	state->target_codeset = VTE_CONV_GUNICHAR_TYPE;
67 68
	_vte_debug_print(VTE_DEBUG_SUBSTITUTION,
			"Native codeset \"%s\", currently %s\n",
69
			state->native_codeset, state->codeset);
70
	state->conv = _vte_conv_open(state->target_codeset, state->codeset);
71
	state->buffer = _vte_byte_array_new();
Chris Wilson's avatar
Chris Wilson committed
72
	if (state->conv == VTE_INVALID_CONV) {
73 74
		g_warning(_("Unable to convert characters from %s to %s."),
			  state->codeset, state->target_codeset);
75 76
		_vte_debug_print(VTE_DEBUG_SUBSTITUTION,
				"Using UTF-8 instead.\n");
77 78 79
		state->codeset = state->utf8_codeset;
		state->conv = _vte_conv_open(state->target_codeset,
					     state->codeset);
Chris Wilson's avatar
Chris Wilson committed
80
		if (state->conv == VTE_INVALID_CONV) {
81 82 83 84
			g_error(_("Unable to convert characters from %s to %s."),
				state->codeset, state->target_codeset);
		}
	}
85 86 87
	return state;
}

88 89 90
void
_vte_iso2022_state_free(struct _vte_iso2022_state *state)
{
91
	_vte_byte_array_free(state->buffer);
Chris Wilson's avatar
Chris Wilson committed
92
	if (state->conv != VTE_INVALID_CONV) {
93 94
		_vte_conv_close(state->conv);
	}
95
	g_slice_free(struct _vte_iso2022_state, state);
96 97
}

98 99 100 101
void
_vte_iso2022_state_set_codeset(struct _vte_iso2022_state *state,
			       const char *codeset)
{
102
	VteConv conv;
103 104 105 106 107

	g_return_if_fail(state != NULL);
	g_return_if_fail(codeset != NULL);
	g_return_if_fail(strlen(codeset) > 0);

108
	_vte_debug_print(VTE_DEBUG_SUBSTITUTION, "%s\n", codeset);
109
	conv = _vte_conv_open(state->target_codeset, codeset);
Chris Wilson's avatar
Chris Wilson committed
110
	if (conv == VTE_INVALID_CONV) {
111 112 113 114
		g_warning(_("Unable to convert characters from %s to %s."),
			  codeset, state->target_codeset);
		return;
	}
Chris Wilson's avatar
Chris Wilson committed
115
	if (state->conv != VTE_INVALID_CONV) {
116
		_vte_conv_close(state->conv);
117
	}
118
	state->codeset = g_intern_string (codeset);
119 120 121 122 123 124 125 126 127
	state->conv = conv;
}

const char *
_vte_iso2022_state_get_codeset(struct _vte_iso2022_state *state)
{
	return state->codeset;
}

128 129 130 131
gsize
_vte_iso2022_process(struct _vte_iso2022_state *state,
                     const guchar *cdata, gsize length,
                     GArray *gunichars)
132 133
{
	glong processed = 0;
134 135 136
	gsize converted;
	const guchar *inbuf;
	gunichar *outbuf, *buf;
137
	gsize inbytes, outbytes;
138
        guint i, j;
139
	gunichar c;
140
        gboolean stop;
141 142 143

		inbuf = cdata;
		inbytes = length;
144
		_vte_byte_array_set_minimum_size(state->buffer,
145
					     sizeof(gunichar) * length * 2);
146
		buf = (gunichar *)state->buffer->data;
147
		outbuf = buf;
148 149
		outbytes = sizeof(gunichar) * length * 2;
		do {
150 151 152
			converted = _vte_conv_cu(state->conv,
					         &inbuf, &inbytes,
					         &outbuf, &outbytes);
153
			stop = FALSE;
154
			switch (converted) {
155
			case ((gsize)-1):
156 157
				switch (errno) {
				case EILSEQ:
158 159 160 161 162
                                        /* Munge the input. */
                                        inbuf++;
                                        inbytes--;
                                        *outbuf++ = INVALID_CODEPOINT;
                                        outbytes -= sizeof(gunichar);
163 164
					break;
				case EINVAL:
165 166
					/* Incomplete. Save for later. */
					stop = TRUE;
167 168 169 170 171 172 173 174 175 176 177 178 179
					break;
				case E2BIG:
					/* Should never happen. */
					g_assert_not_reached();
					break;
				default:
					/* Should never happen. */
					g_assert_not_reached();
					break;
				}
			default:
				break;
			}
180
		} while ((inbytes > 0) && !stop);
181

182
                /* skip blanks -- TODOegmont: why here? */
183 184 185
		j = gunichars->len;
		g_array_set_size(gunichars, gunichars->len + outbuf-buf);
		for (i = 0; buf + i < outbuf; i++) {
186
			c = buf[i];
187
			if (G_UNLIKELY (c == '\0')) {
188
				/* Skip the padding character. */
189 190
				continue;
			}
191
			g_array_index(gunichars, gunichar, j++) = c;
192
		}
193
		gunichars->len = j;
194 195

		/* Done. */
196
		processed = length - inbytes;
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
197

198
	_vte_debug_print(VTE_DEBUG_SUBSTITUTION,
199 200
                        "Consuming %ld bytes.\n", (long) processed);
        return processed;
Nalin Dahyabhai's avatar
Nalin Dahyabhai committed
201
}