Commit 8a7eecd7 authored by Matthias Clasen's avatar Matthias Clasen Committed by Matthias Clasen

Introduce the idea of a filename encoding, which is *literally* the

2004-10-27  Matthias Clasen  <mclasen@redhat.com>

	Introduce the idea of a filename encoding, which is
	*literally* the filename encoding on Unix. On windows,
	use the Unicode name converted to UTF-8. (#156325,
	Tor Lillqvist, Owen Taylor)

	* glib/gdir.[hc]:
	* glib/gconvert.[hc]:
	* glib/gfileutils.[hc]:
	* glib/gutils.[hc]:
	* glib/giowin32.c: On Windows, keep old ABI versions
	of GLib pathname api for DLL ABI stability. Use different
	names for the new-style UTF-8 versions. Hide this through
	a #define.

	* glib/gstdio.[hc]: New files containing wrappers for
	POSIX pathname api.

	* glib/glib.symbols: Add new symbols.

	* glib/makegalias.pl: Drop Win32 specific .def syntax,
	include gstdio.h
parent 8e6b2721
2004-10-27 Matthias Clasen <mclasen@redhat.com>
Introduce the idea of a filename encoding, which is
*literally* the filename encoding on Unix. On windows,
use the Unicode name converted to UTF-8. (#156325,
Tor Lillqvist, Owen Taylor)
* glib/gdir.[hc]:
* glib/gconvert.[hc]:
* glib/gfileutils.[hc]:
* glib/gutils.[hc]:
* glib/giowin32.c: On Windows, keep old ABI versions
of GLib pathname api for DLL ABI stability. Use different
names for the new-style UTF-8 versions. Hide this through
a #define.
* glib/gstdio.[hc]: New files containing wrappers for
POSIX pathname api.
* glib/glib.symbols: Add new symbols.
* glib/makegalias.pl: Drop Win32 specific .def syntax,
include gstdio.h
2004-10-27 Matthias Clasen <mclasen@redhat.com>
* glib/gkeyfile.c: Fix includes. (#156500, #156499,
......
2004-10-27 Matthias Clasen <mclasen@redhat.com>
Introduce the idea of a filename encoding, which is
*literally* the filename encoding on Unix. On windows,
use the Unicode name converted to UTF-8. (#156325,
Tor Lillqvist, Owen Taylor)
* glib/gdir.[hc]:
* glib/gconvert.[hc]:
* glib/gfileutils.[hc]:
* glib/gutils.[hc]:
* glib/giowin32.c: On Windows, keep old ABI versions
of GLib pathname api for DLL ABI stability. Use different
names for the new-style UTF-8 versions. Hide this through
a #define.
* glib/gstdio.[hc]: New files containing wrappers for
POSIX pathname api.
* glib/glib.symbols: Add new symbols.
* glib/makegalias.pl: Drop Win32 specific .def syntax,
include gstdio.h
2004-10-27 Matthias Clasen <mclasen@redhat.com>
* glib/gkeyfile.c: Fix includes. (#156500, #156499,
......
2004-10-27 Matthias Clasen <mclasen@redhat.com>
Introduce the idea of a filename encoding, which is
*literally* the filename encoding on Unix. On windows,
use the Unicode name converted to UTF-8. (#156325,
Tor Lillqvist, Owen Taylor)
* glib/gdir.[hc]:
* glib/gconvert.[hc]:
* glib/gfileutils.[hc]:
* glib/gutils.[hc]:
* glib/giowin32.c: On Windows, keep old ABI versions
of GLib pathname api for DLL ABI stability. Use different
names for the new-style UTF-8 versions. Hide this through
a #define.
* glib/gstdio.[hc]: New files containing wrappers for
POSIX pathname api.
* glib/glib.symbols: Add new symbols.
* glib/makegalias.pl: Drop Win32 specific .def syntax,
include gstdio.h
2004-10-27 Matthias Clasen <mclasen@redhat.com>
* glib/gkeyfile.c: Fix includes. (#156500, #156499,
......
2004-10-27 Matthias Clasen <mclasen@redhat.com>
Introduce the idea of a filename encoding, which is
*literally* the filename encoding on Unix. On windows,
use the Unicode name converted to UTF-8. (#156325,
Tor Lillqvist, Owen Taylor)
* glib/gdir.[hc]:
* glib/gconvert.[hc]:
* glib/gfileutils.[hc]:
* glib/gutils.[hc]:
* glib/giowin32.c: On Windows, keep old ABI versions
of GLib pathname api for DLL ABI stability. Use different
names for the new-style UTF-8 versions. Hide this through
a #define.
* glib/gstdio.[hc]: New files containing wrappers for
POSIX pathname api.
* glib/glib.symbols: Add new symbols.
* glib/makegalias.pl: Drop Win32 specific .def syntax,
include gstdio.h
2004-10-27 Matthias Clasen <mclasen@redhat.com>
* glib/gkeyfile.c: Fix includes. (#156500, #156499,
......
2004-10-27 Matthias Clasen <mclasen@redhat.com>
Introduce the idea of a filename encoding, which is
*literally* the filename encoding on Unix. On windows,
use the Unicode name converted to UTF-8. (#156325,
Tor Lillqvist, Owen Taylor)
* glib/gdir.[hc]:
* glib/gconvert.[hc]:
* glib/gfileutils.[hc]:
* glib/gutils.[hc]:
* glib/giowin32.c: On Windows, keep old ABI versions
of GLib pathname api for DLL ABI stability. Use different
names for the new-style UTF-8 versions. Hide this through
a #define.
* glib/gstdio.[hc]: New files containing wrappers for
POSIX pathname api.
* glib/glib.symbols: Add new symbols.
* glib/makegalias.pl: Drop Win32 specific .def syntax,
include gstdio.h
2004-10-27 Matthias Clasen <mclasen@redhat.com>
* glib/gkeyfile.c: Fix includes. (#156500, #156499,
......
2004-10-27 Matthias Clasen <mclasen@redhat.com>
* glib/tmpl/fileutils.sgml: Add some intro.
2004-10-26 Matthias Clasen <mclasen@redhat.com>
* gobject/gobject-docs.sgml: Add an index for 2.6 additions.
......
......@@ -967,6 +967,7 @@ g_option_error_quark
<SECTION>
<TITLE>File Utilities</TITLE>
<FILE>fileutils</FILE>
<INCLUDE>glib.h,glib/gstdio.h</INCLUDE>
GFileError
G_FILE_ERROR
GFileTest
......@@ -984,6 +985,16 @@ g_dir_read_name
g_dir_rewind
g_dir_close
<SUBSECTION>
g_open
g_rename
g_mkdir
g_stat
g_unlink
g_remove
g_fopen
g_freopen
<SUBSECTION Private>
g_file_error_quark
</SECTION>
......
......@@ -6,7 +6,31 @@ various file-related functions.
<!-- ##### SECTION Long_Description ##### -->
<para>
There is a group of functions which wrap the common POSIX functions
dealing with filenames (g_open(), g_rename(), g_mkdir(), g_stat(),
g_unlink(), g_remove(), g_fopen(), g_freopen()). The point of these
wrappers is to make it possible to handle file names with any Unicode
characters in them on Windows without having to use ifdefs and the
wide character API in the application code.
</para>
<para>
The pathname argument should be in the GLib file name encoding. On
POSIX this is the actual on-disk encoding which might correspond to
the locale settings of the process (or the
<envar>G_FILENAME_ENCODING</envar> environment variable), or not.
</para>
<para>
On Windows the GLib file name encoding is UTF-8. Note that the
Microsoft C library does not use UTF-8, but has separate APIs for
current system code page and wide characters (UTF-16). The GLib
wrappers call the wide character API if present (on modern Windows
systems), otherwise convert to/from the system code page.
</para>
<para>
Another group of functions allows to open and read directories
in the GLib file name encoding. These are g_dir_open(),
g_dir_read_name(), g_dir_rewind(), g_dir_close().
</para>
<!-- ##### SECTION See_Also ##### -->
......@@ -223,3 +247,83 @@ An opaque structure representing an opened directory.
@dir:
<!-- ##### FUNCTION g_open ##### -->
<para>
</para>
@filename:
@flags:
@mode:
@Returns:
<!-- ##### FUNCTION g_rename ##### -->
<para>
</para>
@oldfilename:
@newfilename:
@Returns:
<!-- ##### FUNCTION g_mkdir ##### -->
<para>
</para>
@filename:
@mode:
@Returns:
<!-- ##### FUNCTION g_stat ##### -->
<para>
</para>
@filename:
@buf:
@Returns:
<!-- ##### FUNCTION g_unlink ##### -->
<para>
</para>
@filename:
@Returns:
<!-- ##### FUNCTION g_remove ##### -->
<para>
</para>
@filename:
@Returns:
<!-- ##### FUNCTION g_fopen ##### -->
<para>
</para>
@filename:
@mode:
@Returns:
<!-- ##### FUNCTION g_freopen ##### -->
<para>
</para>
@filename:
@mode:
@stream:
@Returns:
......@@ -260,6 +260,7 @@ attributes (currently only <command>gcc</command>).
Since: 2.6
<!-- ##### MACRO G_GNUC_INTERNAL ##### -->
<para>
Expands to the GNU C <literal>visibility(hidden)</literal> attribute if the
......
......@@ -610,7 +610,7 @@ The type of functions to be called when a child exists.
@pid: the process id of the child process
@status: Status information about the child process,
see waitpid(2) for more information about this field
@data: user data passed to g_child_watch_add()
@data: user data passed to g_child_watch_add()
<!-- ##### FUNCTION g_child_watch_source_new ##### -->
......
......@@ -90,6 +90,7 @@ libglib_2_0_la_SOURCES = \
gscanner.c \
gshell.c \
gslist.c \
gstdio.c \
gstrfuncs.c \
gstring.c \
gthread.c \
......@@ -164,6 +165,7 @@ glibsubinclude_HEADERS = \
gshell.h \
gslist.h \
gspawn.h \
gstdio.h \
gstrfuncs.h \
gstring.h \
gthread.h \
......
......@@ -1014,8 +1014,9 @@ filename_charset_cache_free (gpointer data)
* get_filename_charset:
* @charset: return location for the name of the filename encoding
*
* Determines the character set used for filenames by consulting the
* environment variables G_FILENAME_ENCODING and G_BROKEN_FILENAMES.
* Determines the preferred character set used for filenames by
* consulting the environment variables G_FILENAME_ENCODING and
* G_BROKEN_FILENAMES.
*
* G_FILENAME_ENCODING may be set to a comma-separated list of character
* set names. The special token "@locale" is taken to mean the character set
......@@ -1025,8 +1026,13 @@ filename_charset_cache_free (gpointer data)
* character set of the current locale is taken as the filename encoding.
*
* The returned @charset belongs to GLib and must not be freed.
*
* Return value: %TRUE if the charset used for filename is UTF-8.
*
* Note that on Unix, regardless of the locale character set or
* G_FILENAME_ENCODING value, the actual file names present on a
* system might be in any random encoding or just gibberish.
*
* Return value: %TRUE
* if the charset used for filename is UTF-8.
*/
static gboolean
get_filename_charset (const gchar **filename_charset)
......@@ -1089,12 +1095,33 @@ get_filename_charset (const gchar **filename_charset)
}
#else /* G_PLATFORM_WIN32 */
static gboolean
get_filename_charset (const gchar **filename_charset)
{
#ifdef G_OS_WIN32
/* On Windows GLib pretends that the filename charset is UTF-8 */
if (filename_charset)
*filename_charset = "UTF-8";
return TRUE;
#else
/* Cygwin works like before */
g_get_charset (filename_charset);
return FALSE;
#endif
}
#ifdef G_OS_WIN32
static gboolean
old_get_filename_charset (const gchar **filename_charset)
{
g_get_charset (filename_charset);
return FALSE;
}
#endif
#endif /* G_PLATFORM_WIN32 */
/* This is called from g_thread_init(). It's used to
......@@ -1146,6 +1173,30 @@ g_filename_to_utf8 (const gchar *opsysstring,
"UTF-8", charset, bytes_read, bytes_written, error);
}
#ifdef G_OS_WIN32
#undef g_filename_to_utf8
/* Binary compatibility version. Not for newly compiled code. */
gchar*
g_filename_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
{
const gchar *charset;
if (old_get_filename_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
return g_convert (opsysstring, len,
"UTF-8", charset, bytes_read, bytes_written, error);
}
#endif
/**
* g_filename_from_utf8:
* @utf8string: a UTF-8 encoded string.
......@@ -1184,6 +1235,30 @@ g_filename_from_utf8 (const gchar *utf8string,
charset, "UTF-8", bytes_read, bytes_written, error);
}
#ifdef G_OS_WIN32
#undef g_filename_from_utf8
/* Binary compatibility version. Not for newly compiled code. */
gchar*
g_filename_from_utf8 (const gchar *utf8string,
gssize len,
gsize *bytes_read,
gsize *bytes_written,
GError **error)
{
const gchar *charset;
if (old_get_filename_charset (&charset))
return strdup_len (utf8string, len, bytes_read, bytes_written, error);
else
return g_convert (utf8string, len,
charset, "UTF-8", bytes_read, bytes_written, error);
}
#endif
/* Test of haystack has the needle prefix, comparing case
* insensitive. haystack may be UTF-8, but needle must
* contain only ascii. */
......
......@@ -98,6 +98,11 @@ gchar* g_locale_from_utf8 (const gchar *utf8string,
/* Convert between the operating system (or C runtime)
* representation of file names and UTF-8.
*/
#ifdef G_OS_WIN32
#define g_filename_to_utf8 g_filename_to_utf8_utf8
#define g_filename_from_utf8 g_filename_from_utf8_utf8
#endif
gchar* g_filename_to_utf8 (const gchar *opsysstring,
gssize len,
gsize *bytes_read,
......
......@@ -4,6 +4,7 @@
* gdir.c: Simplified wrapper around the DIRENT functions.
*
* Copyright 2001 Hans Breuer
* Copyright 2004 Tor Lillqvist
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
......@@ -39,20 +40,28 @@
struct _GDir
{
DIR *dir;
union {
DIR *dirp;
#ifdef G_OS_WIN32
_WDIR *wdirp;
#endif
} u;
#ifdef G_OS_WIN32
gchar utf8_buf[FILENAME_MAX*4];
#endif
};
/**
* g_dir_open:
* @path: the path to the directory you are interested in
* @path: the path to the directory you are interested in. On Unix
* in the on-disk encoding. On Windows in UTF-8
* @flags: Currently must be set to 0. Reserved for future use.
* @error: return location for a #GError, or %NULL.
* If non-%NULL, an error will be set if and only if
* g_dir_open_fails.
*
* Opens a directory for reading. The names of the files
* in the directory can then be retrieved using
* g_dir_read_name().
* Opens a directory for reading. The names of the files in the
* directory can then be retrieved using g_dir_read_name().
*
* Return value: a newly allocated #GDir on success, %NULL on failure.
* If non-%NULL, you must free the result with g_dir_close()
......@@ -64,15 +73,62 @@ g_dir_open (const gchar *path,
GError **error)
{
GDir *dir;
#ifndef G_OS_WIN32
gchar *utf8_path;
#endif
g_return_val_if_fail (path != NULL, NULL);
#ifdef G_OS_WIN32
if (G_WIN32_HAVE_WIDECHAR_API ())
{
wchar_t *wpath = g_utf8_to_utf16 (path, -1, NULL, NULL, error);
if (wpath == NULL)
return NULL;
dir = g_new (GDir, 1);
dir->u.wdirp = _wopendir (wpath);
g_free (wpath);
if (dir->u.wdirp)
return dir;
}
else
{
gchar *cp_path = g_locale_from_utf8 (path, -1, NULL, NULL, error);
if (cp_path == NULL)
return NULL;
dir = g_new (GDir, 1);
dir->u.dirp = opendir (cp_path);
g_free (cp_path);
if (dir->u.dirp)
return dir;
}
/* error case */
g_set_error (error,
G_FILE_ERROR,
g_file_error_from_errno (errno),
_("Error opening directory '%s': %s"),
path, g_strerror (errno));
g_free (dir);
return NULL;
#else
dir = g_new (GDir, 1);
dir->dir = opendir (path);
dir->u.dirp = opendir (path);
if (dir->dir)
if (dir->u.dirp)
return dir;
/* error case */
......@@ -88,18 +144,48 @@ g_dir_open (const gchar *path,
g_free (dir);
return NULL;
#endif
}
#ifdef G_OS_WIN32
/* The above function actually is called g_dir_open_utf8, and it's
* that what applications compiled with this GLib version will
* use.
*/
#undef g_dir_open
/* Binary compatibility version. Not for newly compiled code. */
GDir *
g_dir_open (const gchar *path,
guint flags,
GError **error)
{
gchar *utf8_path = g_locale_to_utf8 (path, -1, NULL, NULL, error);
GDir *retval;
if (utf8_path == NULL)
return NULL;
retval = g_dir_open_utf8 (utf8_path, flags, error);
g_free (utf8_path);
return retval;
}
#endif
/**
* g_dir_read_name:
* @dir: a #GDir* created by g_dir_open()
*
* Retrieves the name of the next entry in the directory.
* The '.' and '..' entries are omitted. The returned name is in
* the encoding used for filenames. Use g_filename_to_utf8() to
* convert it to UTF-8.
* Retrieves the name of the next entry in the directory. The '.' and
* '..' entries are omitted. On Windows, the returned name is in
* UTF-8. On Unix, it is in the on-disk encoding.
*
* Return value: The entries name or %NULL if there are no
* Return value: The entry's name or %NULL if there are no
* more entries. The return value is owned by GLib and
* must not be modified or freed.
**/
......@@ -110,18 +196,107 @@ g_dir_read_name (GDir *dir)
g_return_val_if_fail (dir != NULL, NULL);
entry = readdir (dir->dir);
#ifdef G_OS_WIN32
if (G_WIN32_HAVE_WIDECHAR_API ())
{
gchar *utf8_name;
struct _wdirent *wentry;
while (1)
{
wentry = _wreaddir (dir->u.wdirp);
while (wentry
&& (0 == wcscmp (wentry->d_name, L".") ||
0 == wcscmp (wentry->d_name, L"..")))
wentry = _wreaddir (dir->u.wdirp);
if (wentry == NULL)
return NULL;
utf8_name = g_utf16_to_utf8 (wentry->d_name, -1, NULL, NULL, NULL);
if (utf8_name == NULL)
continue; /* Huh, impossible? Skip it anyway */
strcpy (dir->utf8_buf, utf8_name);
g_free (utf8_name);
return dir->utf8_buf;
}
}
else
{
while (1)
{
gchar *utf8_name;
entry = readdir (dir->u.dirp);
while (entry
&& (0 == strcmp (entry->d_name, ".") ||
0 == strcmp (entry->d_name, "..")))
entry = readdir (dir->u.dirp);
if (entry == NULL)
return NULL;
utf8_name = g_locale_to_utf8 (entry->d_name, -1, NULL, NULL, NULL);
if (utf8_name != NULL)
{
strcpy (dir->utf8_buf, utf8_name);
g_free (utf8_name);
return dir->utf8_buf;
}
}
}
#else
entry = readdir (dir->u.dirp);
while (entry
&& (0 == strcmp (entry->d_name, ".") ||
0 == strcmp (entry->d_name, "..")))
entry = readdir (dir->dir);
entry = readdir (dir->u.dirp);
if (entry)
return entry->d_name;
else
return NULL;
#endif
}
#ifdef G_OS_WIN32
/* Ditto for g_dir_read_name */
#undef g_dir_read_name
/* Binary compatibility version. Not for newly compiled code. */
G_CONST_RETURN gchar*
g_dir_read_name (GDir *dir)
{
while (1)
{
const gchar *utf8_name = g_dir_read_name_utf8 (dir);
gchar *retval;
if (utf8_name == NULL)
return NULL;
retval = g_locale_from_utf8 (utf8_name, -1, NULL, NULL, NULL);
if (retval != NULL)
{
strcpy (dir->utf8_buf, retval);
g_free (retval);
return dir->utf8_buf;
}
}
}
#endif
/**
* g_dir_rewind:
* @dir: a #GDir* created by g_dir_open()
......@@ -134,7 +309,15 @@ g_dir_rewind (GDir *dir)
{
g_return_if_fail (dir != NULL);
rewinddir (dir->dir);
#ifdef G_OS_WIN32
if (G_WIN32_HAVE_WIDECHAR_API ())
{
_wrewinddir (dir->u.wdirp);
return;
}
#endif
rewinddir (dir->u.dirp);
}
/**
......@@ -148,6 +331,15 @@ g_dir_close (GDir *dir)
{
g_return_if_fail (dir != NULL);
closedir (dir->dir);
#ifdef G_OS_WIN32
if (G_WIN32_HAVE_WIDECHAR_API ())
{
_wclosedir (dir->u