Commit 2adfee82 authored by Daniel García Moreno's avatar Daniel García Moreno

Use libgepub_internals

parent f8c26d66
/* GepubArchive
*
* Copyright (C) 2011 Daniel Garcia <danigm@wadobo.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <config.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <archive.h>
#include <archive_entry.h>
#include "gepub-archive.h"
#include "gepub-utils.h"
#define BUFZISE 1024
struct _GepubArchive {
GObject parent;
struct archive *archive;
gchar *path;
};
struct _GepubArchiveClass {
GObjectClass parent_class;
};
G_DEFINE_TYPE (GepubArchive, gepub_archive, G_TYPE_OBJECT)
static gboolean
gepub_archive_open (GepubArchive *archive)
{
int r;
archive->archive = archive_read_new ();
archive_read_support_format_zip (archive->archive);
r = archive_read_open_filename (archive->archive, archive->path, 10240);
if (r != ARCHIVE_OK) {
return FALSE;
}
return TRUE;
}
static void
gepub_archive_close (GepubArchive *archive)
{
if (!archive->archive)
return;
archive_read_free (archive->archive);
archive->archive = NULL;
}
static void
gepub_archive_finalize (GObject *object)
{
GepubArchive *archive = GEPUB_ARCHIVE (object);
g_clear_pointer (&archive->path, g_free);
gepub_archive_close (archive);
G_OBJECT_CLASS (gepub_archive_parent_class)->finalize (object);
}
static void
gepub_archive_init (GepubArchive *archive)
{
}
static void
gepub_archive_class_init (GepubArchiveClass *klass)
{
GObjectClass *object_class = G_OBJECT_CLASS (klass);
object_class->finalize = gepub_archive_finalize;
}
GepubArchive *
gepub_archive_new (const gchar *path)
{
GepubArchive *archive;
archive = GEPUB_ARCHIVE (g_object_new (GEPUB_TYPE_ARCHIVE, NULL));
archive->path = g_strdup (path);
archive->archive = NULL;
return archive;
}
/**
* gepub_archive_list_files:
* @archive: a #GepubArchive
*
* Returns: (element-type utf8) (transfer full): list of files in the archive
*/
GList *
gepub_archive_list_files (GepubArchive *archive)
{
struct archive_entry *entry;
GList *file_list = NULL;
if (!gepub_archive_open (archive))
return NULL;
while (archive_read_next_header (archive->archive, &entry) == ARCHIVE_OK) {
file_list = g_list_prepend (file_list, g_strdup (archive_entry_pathname (entry)));
archive_read_data_skip (archive->archive);
}
gepub_archive_close (archive);
return file_list;
}
GBytes *
gepub_archive_read_entry (GepubArchive *archive,
const gchar *path)
{
struct archive_entry *entry;
guchar *buffer;
gint size;
if (!gepub_archive_open (archive))
return NULL;
while (archive_read_next_header (archive->archive, &entry) == ARCHIVE_OK) {
if (g_ascii_strcasecmp (path, archive_entry_pathname (entry)) == 0)
break;
archive_read_data_skip (archive->archive);
}
size = archive_entry_size (entry);
buffer = g_malloc0 (size);
archive_read_data (archive->archive, buffer, size);
gepub_archive_close (archive);
return g_bytes_new_take (buffer, size);
}
gchar *
gepub_archive_get_root_file (GepubArchive *archive)
{
xmlDoc *doc = NULL;
xmlNode *root_element = NULL;
xmlNode *root_node = NULL;
GBytes *bytes;
const gchar *buffer;
gsize bufsize;
gchar *root_file = NULL;
// root file is in META-INF/container.xml
bytes = gepub_archive_read_entry (archive, "META-INF/container.xml");
if (!bytes)
return NULL;
buffer = g_bytes_get_data (bytes, &bufsize);
doc = xmlRecoverMemory (buffer, bufsize);
root_element = xmlDocGetRootElement (doc);
root_node = gepub_utils_get_element_by_tag (root_element, "rootfile");
root_file = gepub_utils_get_prop (root_node, "full-path");
xmlFreeDoc (doc);
g_bytes_unref (bytes);
return root_file;
}
/* GepubArchive
*
* Copyright (C) 2011 Daniel Garcia <danigm@wadobo.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __GEPUB_ARCHIVE_H__
#define __GEPUB_ARCHIVE_H__
#include <glib-object.h>
#include <gio/gio.h>
#include <glib.h>
G_BEGIN_DECLS
#define GEPUB_TYPE_ARCHIVE (gepub_archive_get_type ())
#define GEPUB_ARCHIVE(obj) (G_TYPE_CHECK_INSTANCE_CAST (obj, GEPUB_TYPE_ARCHIVE, GepubArchive))
#define GEPUB_ARCHIVE_CLASS(cls) (G_TYPE_CHECK_CLASS_CAST (cls, GEPUB_TYPE_ARCHIVE, GepubArchiveClass))
#define GEPUB_IS_ARCHIVE(obj) (G_TYPE_CHECK_INSTANCE_TYPE (obj, GEPUB_TYPE_ARCHIVE))
#define GEPUB_IS_ARCHIVE_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE (obj, GEPUB_TYPE_ARCHIVE))
#define GEPUB_ARCHIVE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GEPUB_TYPE_ARCHIVE, GepubArchiveClass))
typedef struct _GepubArchive GepubArchive;
typedef struct _GepubArchiveClass GepubArchiveClass;
GType gepub_archive_get_type (void) G_GNUC_CONST;
GepubArchive *gepub_archive_new (const gchar *path);
GList *gepub_archive_list_files (GepubArchive *archive);
GBytes *gepub_archive_read_entry (GepubArchive *archive,
const gchar *path);
gchar *gepub_archive_get_root_file (GepubArchive *archive);
G_END_DECLS
#endif /* __GEPUB_ARCHIVE_H__ */
......@@ -18,14 +18,40 @@
*/
#include <config.h>
#include <gio/gio.h>
#include <libxml/tree.h>
#include <libxml/HTMLparser.h>
#include <string.h>
#include "gepub-utils.h"
#include "gepub-doc.h"
#include "gepub-archive.h"
#include "gepub-text-chunk.h"
// Rust
void *epub_new(char *path);
void epub_destroy(void *doc);
void *epub_get_resource(void *doc, const char *path, int *size);
void *epub_get_resource_by_id(void *doc, const char *id, int *size);
void *epub_get_metadata(void *doc, const char *mdata);
void *epub_get_resource_mime(void *doc, const char *path);
void *epub_get_resource_mime_by_id(void *doc, const char *id);
void *epub_get_current_mime(void *doc);
void *epub_get_current(void *doc, int *size);
void *epub_get_current_with_epub_uris(void *doc, int *size);
void epub_set_page(void *doc, guint page);
guint epub_get_num_pages(void *doc);
guint epub_get_page(void *doc);
gboolean epub_next_page(void *doc);
gboolean epub_prev_page(void *doc);
void *epub_get_cover(void *doc);
void *epub_resource_path(void *doc, const char *id);
void *epub_current_path(void *doc);
void *epub_current_id(void *doc);
void *epub_get_resources(void *doc);
guint epub_resources_get_length(void *er);
gchar *epub_resources_get_id(void *er, gint i);
gchar *epub_resources_get_mime(void *er, gint i);
gchar *epub_resources_get_path(void *er, gint i);
static GQuark
......@@ -49,21 +75,12 @@ typedef enum {
static void gepub_doc_fill_resources (GepubDoc *doc);
static void gepub_doc_fill_spine (GepubDoc *doc);
static void gepub_doc_initable_iface_init (GInitableIface *iface);
struct _GepubDoc {
GObject parent;
GepubArchive *archive;
GBytes *content;
gchar *content_base;
gchar *path;
GHashTable *resources;
GList *spine;
GList *chapter;
void *rust_epub_doc;
};
struct _GepubDocClass {
......@@ -95,24 +112,17 @@ gepub_doc_finalize (GObject *object)
{
GepubDoc *doc = GEPUB_DOC (object);
g_clear_object (&doc->archive);
g_clear_pointer (&doc->content, g_bytes_unref);
epub_destroy (doc->rust_epub_doc);
g_clear_pointer (&doc->path, g_free);
g_clear_pointer (&doc->resources, g_hash_table_destroy);
if (doc->spine) {
g_list_foreach (doc->spine, (GFunc)g_free, NULL);
g_clear_pointer (&doc->spine, g_list_free);
}
G_OBJECT_CLASS (gepub_doc_parent_class)->finalize (object);
}
static void
gepub_doc_set_property (GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
GepubDoc *doc = GEPUB_DOC (object);
......@@ -131,9 +141,9 @@ gepub_doc_set_property (GObject *object,
static void
gepub_doc_get_property (GObject *object,
guint prop_id,
GValue *value,
GParamSpec *pspec)
guint prop_id,
GValue *value,
GParamSpec *pspec)
{
GepubDoc *doc = GEPUB_DOC (object);
......@@ -153,13 +163,6 @@ gepub_doc_get_property (GObject *object,
static void
gepub_doc_init (GepubDoc *doc)
{
/* doc resources hashtable:
* id : (mime, path)
*/
doc->resources = g_hash_table_new_full (g_str_hash,
g_str_equal,
(GDestroyNotify)g_free,
(GDestroyNotify)gepub_resource_free);
}
static void
......@@ -196,22 +199,10 @@ gepub_doc_initable_init (GInitable *initable,
GError **error)
{
GepubDoc *doc = GEPUB_DOC (initable);
gchar *file;
gint i = 0, len;
g_assert (doc->path != NULL);
doc->archive = gepub_archive_new (doc->path);
file = gepub_archive_get_root_file (doc->archive);
if (!file) {
if (error != NULL) {
g_set_error (error, gepub_error_quark (), GEPUB_ERROR_INVALID,
"Invalid epub file: %s", doc->path);
}
return FALSE;
}
doc->content = gepub_archive_read_entry (doc->archive, file);
if (!doc->content) {
doc->rust_epub_doc = epub_new (doc->path);
if (!doc->rust_epub_doc) {
if (error != NULL) {
g_set_error (error, gepub_error_quark (), GEPUB_ERROR_INVALID,
"Invalid epub file: %s", doc->path);
......@@ -219,21 +210,6 @@ gepub_doc_initable_init (GInitable *initable,
return FALSE;
}
len = strlen (file);
doc->content_base = g_strdup ("");
for (i=0; i<len; i++) {
if (file[i] == '/') {
g_free (doc->content_base);
doc->content_base = g_strndup (file, i+1);
break;
}
}
gepub_doc_fill_resources (doc);
gepub_doc_fill_spine (doc);
g_free (file);
return TRUE;
}
......@@ -259,95 +235,6 @@ gepub_doc_new (const gchar *path, GError **error)
NULL);
}
static void
gepub_doc_fill_resources (GepubDoc *doc)
{
xmlDoc *xdoc = NULL;
xmlNode *root_element = NULL;
xmlNode *mnode = NULL;
xmlNode *item = NULL;
gchar *id, *tmpuri, *uri;
GepubResource *res;
const char *data;
gsize size;
data = g_bytes_get_data (doc->content, &size);
xdoc = xmlRecoverMemory (data, size);
root_element = xmlDocGetRootElement (xdoc);
mnode = gepub_utils_get_element_by_tag (root_element, "manifest");
item = mnode->children;
while (item) {
if (item->type != XML_ELEMENT_NODE ) {
item = item->next;
continue;
}
id = gepub_utils_get_prop (item, "id");
tmpuri = gepub_utils_get_prop (item, "href");
uri = g_strdup_printf ("%s%s", doc->content_base, tmpuri);
g_free (tmpuri);
res = g_malloc (sizeof (GepubResource));
res->mime = gepub_utils_get_prop (item, "media-type");
res->uri = uri;
g_hash_table_insert (doc->resources, id, res);
item = item->next;
}
xmlFreeDoc (xdoc);
}
static void
gepub_doc_fill_spine (GepubDoc *doc)
{
xmlDoc *xdoc = NULL;
xmlNode *root_element = NULL;
xmlNode *snode = NULL;
xmlNode *item = NULL;
gchar *id;
const char *data;
gsize size;
GList *spine = NULL;
data = g_bytes_get_data (doc->content, &size);
xdoc = xmlRecoverMemory (data, size);
root_element = xmlDocGetRootElement (xdoc);
snode = gepub_utils_get_element_by_tag (root_element, "spine");
item = snode->children;
while (item) {
if (item->type != XML_ELEMENT_NODE ) {
item = item->next;
continue;
}
id = gepub_utils_get_prop (item, "idref");
spine = g_list_prepend (spine, id);
item = item->next;
}
doc->spine = g_list_reverse (spine);
doc->chapter = doc->spine;
xmlFreeDoc (xdoc);
}
/**
* gepub_doc_get_content:
* @doc: a #GepubDoc
*
* Returns: (transfer none): the document content
*/
GBytes *
gepub_doc_get_content (GepubDoc *doc)
{
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
return doc->content;
}
/**
* gepub_doc_get_metadata:
* @doc: a #GepubDoc
......@@ -358,45 +245,41 @@ gepub_doc_get_content (GepubDoc *doc)
gchar *
gepub_doc_get_metadata (GepubDoc *doc, const gchar *mdata)
{
xmlDoc *xdoc = NULL;
xmlNode *root_element = NULL;
xmlNode *mnode = NULL;
xmlNode *mdata_node = NULL;
gchar *ret;
xmlChar *text;
const char *data;
gsize size;
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
g_return_val_if_fail (mdata != NULL, NULL);
data = g_bytes_get_data (doc->content, &size);
xdoc = xmlRecoverMemory (data, size);
root_element = xmlDocGetRootElement (xdoc);
mnode = gepub_utils_get_element_by_tag (root_element, "metadata");
mdata_node = gepub_utils_get_element_by_tag (mnode, mdata);
text = xmlNodeGetContent (mdata_node);
ret = g_strdup ((const char *) text);
xmlFree (text);
xmlFreeDoc (xdoc);
return ret;
return epub_get_metadata (doc->rust_epub_doc, mdata);
}
/**
* gepub_doc_get_resources:
* @doc: a #GepubDoc
*
* Returns: (element-type utf8 Gepub.Resource) (transfer none): doc resource table
* Returns: (element-type utf8 Gepub.Resource) (transfer full): doc resource table
*/
GHashTable *
gepub_doc_get_resources (GepubDoc *doc)
{
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
return doc->resources;
GHashTable *resources = g_hash_table_new_full (g_str_hash,
g_str_equal,
(GDestroyNotify)g_free,
(GDestroyNotify)gepub_resource_free);
void *res = epub_get_resources (doc->rust_epub_doc);
guint l = epub_resources_get_length (res);
gint i = 0;
for (i=0; i<l; i++) {
gchar *key = epub_resources_get_id (res, i);
GepubResource *r = g_malloc (sizeof (GepubResource));
r->uri = epub_resources_get_path (res, i);
r->mime = epub_resources_get_mime (res, i);
g_hash_table_insert (resources, key, r);
}
return resources;
}
/**
......@@ -409,18 +292,12 @@ gepub_doc_get_resources (GepubDoc *doc)
GBytes *
gepub_doc_get_resource_by_id (GepubDoc *doc, const gchar *id)
{
GepubResource *gres;
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
g_return_val_if_fail (id != NULL, NULL);
gres = g_hash_table_lookup (doc->resources, id);
if (!gres) {
// not found
return NULL;
}
return gepub_archive_read_entry (doc->archive, gres->uri);
int size = 0;
guint8 *data = epub_get_resource_by_id (doc->rust_epub_doc, id, &size);
return g_bytes_new_take (data, size);
}
/**
......@@ -436,7 +313,9 @@ gepub_doc_get_resource (GepubDoc *doc, const gchar *path)
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
g_return_val_if_fail (path != NULL, NULL);
return gepub_archive_read_entry (doc->archive, path);
int size = 0;
guint8 *data = epub_get_resource (doc->rust_epub_doc, path, &size);
return g_bytes_new_take (data, size);
}
/**
......@@ -449,18 +328,10 @@ gepub_doc_get_resource (GepubDoc *doc, const gchar *path)
gchar *
gepub_doc_get_resource_mime_by_id (GepubDoc *doc, const gchar *id)
{
GepubResource *gres;
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
g_return_val_if_fail (id != NULL, NULL);
gres = g_hash_table_lookup (doc->resources, id);
if (!gres) {
// not found
return NULL;
}
return g_strdup (gres->mime);
return epub_get_resource_mime_by_id (doc->rust_epub_doc, id);
}
/**
......@@ -473,25 +344,10 @@ gepub_doc_get_resource_mime_by_id (GepubDoc *doc, const gchar *id)
gchar *
gepub_doc_get_resource_mime (GepubDoc *doc, const gchar *path)
{
GepubResource *gres;
GList *keys;
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
g_return_val_if_fail (path != NULL, NULL);
keys = g_hash_table_get_keys (doc->resources);
while (keys) {
gres = ((GepubResource*)g_hash_table_lookup (doc->resources, keys->data));
if (!strcmp (gres->uri, path))
break;
keys = keys->next;
}
if (keys)
return g_strdup (gres->mime);
else
return NULL;
return epub_get_resource_mime (doc->rust_epub_doc, path);
}
/**
......@@ -504,9 +360,8 @@ gchar *
gepub_doc_get_current_mime (GepubDoc *doc)
{
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
g_return_val_if_fail (doc->chapter != NULL, NULL);
return gepub_doc_get_resource_mime_by_id (doc, doc->chapter->data);
return epub_get_current_mime (doc->rust_epub_doc);
}
/**
......@@ -519,9 +374,10 @@ GBytes *
gepub_doc_get_current (GepubDoc *doc)
{
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
g_return_val_if_fail (doc->chapter != NULL, NULL);
return gepub_doc_get_resource_by_id (doc, doc->chapter->data);
int size = 0;
guint8 *data = epub_get_current (doc->rust_epub_doc, &size);
return g_bytes_new_take (data, size);
}
/**
......@@ -535,106 +391,11 @@ gepub_doc_get_current (GepubDoc *doc)
GBytes *
gepub_doc_get_current_with_epub_uris (GepubDoc *doc)
{
GBytes *content, *replaced;
gchar *path, *base;
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
content = gepub_doc_get_current (doc);
path = gepub_doc_get_current_path (doc);
// getting the basepath of the current xhtml loaded
base = g_path_get_dirname (path);
replaced = gepub_utils_replace_resources (content, base);
g_free (path);
g_bytes_unref (content);
return replaced;
}
/**
* gepub_doc_get_text:
* @doc: a #GepubDoc
*
* Returns: (element-type Gepub.TextChunk) (transfer full): the list of text in the current chapter.
*/
GList *
gepub_doc_get_text (GepubDoc *doc)
{
xmlDoc *xdoc = NULL;
xmlNode *root_element = NULL;
GBytes *current;
const gchar *data;
gsize size;
GList *texts = NULL;
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
current = gepub_doc_get_current (doc);
if (!current) {
return NULL;
}
data = g_bytes_get_data (current, &size);
xdoc = htmlReadMemory (data, size, "", NULL, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
root_element = xmlDocGetRootElement (xdoc);
texts = gepub_utils_get_text_elements (root_element);
g_bytes_unref (current);
xmlFreeDoc (xdoc);
return texts;
}
/**
* gepub_doc_get_text_by_id:
* @doc: a #GepubDoc
* @id: the resource id
*
* Returns: (element-type Gepub.TextChunk) (transfer full): the list of text in the current chapter.
*/
GList *
gepub_doc_get_text_by_id (GepubDoc *doc, const gchar *id)
{
xmlDoc *xdoc = NULL;
xmlNode *root_element = NULL;
gsize size;
const gchar *res;
GBytes *contents;
GList *texts = NULL;
g_return_val_if_fail (GEPUB_IS_DOC (doc), NULL);
g_return_val_if_fail (id != NULL, NULL);
contents = gepub_doc_get_resource_by_id (doc, id);
if (!contents) {
return NULL;
}
res = g_bytes_get_data (contents, &size);
xdoc = htmlReadMemory (res, size, "", NULL, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
root_element = xmlDocGetRootElement (xdoc);
texts = gepub_utils_get_text_elements (root_element);
g_bytes_unref (contents);