Commit e6272d96 authored by Murray Cumming's avatar Murray Cumming
Browse files

Backups: Use libarchive instead of calling the tar command-line utility.

* configure.ac: Depend on libarchive.
* glom/libglom/document/document.[h|cc]:
  save_backup_file(): Use libarchive instead of the command-line.
  restore_backup_file(): Rename to extract backup_file() and redocument it
  to return the contents in memory rather than a path to a temporary file.
  Use libarchive rather than the command-line.
* tests/test_selfhosting_utils.[h|cc]: Add test_create_and_selfhost_from_data(),
  moving some shared code into after_load().
* tests/test_selfhosting_new_then_backup_restore.cc: Adapted.

  The libarchive API is not well documented and the code is rather verbose
  and repetitive, mostly due to the error checking, so it does not feel
  entirely robust. But in general, this should be safer than using shell
  commands, even though we were doing that properly. And this would let us
  load the example data progressively/asynchronously if we make suitable
  changes to the API.
parent c4b71541
......@@ -157,7 +157,7 @@ AS_IF([test "x$glom_enable_postgresql" = xyes],
# Libraries used by libglom:
REQUIRED_LIBGLOM_LIBS='giomm-2.4 >= 2.32.0 libxml++-2.6 >= 2.23.1 libxslt >= 1.1.10 pygobject-3.0 >= 2.29.0 libgdamm-5.0 >= 4.99.6 libgda-5.0 >= 5.2.1 libgda-postgres-5.0 libgda-postgres-5.0 libgda-mysql-5.0'
REQUIRED_LIBGLOM_LIBS='giomm-2.4 >= 2.32.0 libxml++-2.6 >= 2.23.1 libxslt >= 1.1.10 pygobject-3.0 >= 2.29.0 libgdamm-5.0 >= 4.99.6 libgda-5.0 >= 5.2.1 libgda-postgres-5.0 libgda-postgres-5.0 libgda-mysql-5.0 libarchive >= 3.0'
AS_IF([test "x$glom_host_win32" != xyes],
[REQUIRED_LIBGLOM_LIBS="$REQUIRED_LIBGLOM_LIBS libepc-1.0 >= 0.4.0"])
......
......@@ -2489,19 +2489,17 @@ bool AppWindow::do_restore_backup(const Glib::ustring& backup_uri)
return false;
ShowProgressMessage progress_message(_("Restoring backup"));
const Glib::ustring restored_file = Glom::Document::restore_backup_file(
const Glib::ustring backup_file_contents = Glom::Document::extract_backup_file(
backup_uri,
sigc::mem_fun(*this, &AppWindow::on_connection_convert_backup_progress));
if(restored_file.empty())
if(backup_file_contents.empty())
{
ui_warning(_("Restore Backup failed."), _("There was an error while restoring the backup."));
ui_warning(_("Restore Backup failed."), _("There was an error while extracting the backup."));
return false;
}
open_document(restored_file);
return true;
return open_document_from_data((const guchar*)backup_file_contents.c_str(), backup_file_contents.bytes());
}
void AppWindow::on_menu_developer_enable_layout_drag_and_drop()
......
......@@ -43,6 +43,10 @@
#include <libglom/connectionpool.h>
//libarchive:
#include <archive.h>
#include <archive_entry.h>
#include <glibmm/i18n.h>
//#include <libglom/libglom_config.h> //To get GLOM_DTD_INSTALL_DIR - dependent on configure prefix.
#include <algorithm> //For std::find_if().
......@@ -1684,6 +1688,7 @@ void Document::set_table_title(const Glib::ustring& table_name, const Glib::ustr
}
}
//TODO: Avoid doing this all in one go, because that leaves all the data in memory at once.
void Document::set_table_example_data(const Glib::ustring& table_name, const type_example_rows& rows)
{
if(!table_name.empty())
......@@ -1697,6 +1702,7 @@ void Document::set_table_example_data(const Glib::ustring& table_name, const typ
}
}
//TODO: Avoid doing this all in one go, because that leaves all the data in memory at once.
Document::type_example_rows Document::get_table_example_data(const Glib::ustring& table_name) const
{
const sharedptr<const DocumentTableInfo> doctableinfo = get_table_info(table_name);
......@@ -2792,6 +2798,8 @@ bool Document::load_after(int& failure_code)
// Load Example Rows after fields have been loaded, because they
// need the fields to be able to associate a value to a named field.
// TODO: Allow this to be loaded progressively from disk later,
// instead of storing in it all in memory?
const xmlpp::Element* nodeExampleRows = XmlUtils::get_node_child_named(nodeTable, GLOM_NODE_EXAMPLE_ROWS);
if(nodeExampleRows)
{
......@@ -4817,14 +4825,84 @@ bool Document::load(int& failure_code)
return GlomBakery::Document_XML::load(failure_code);
}
namespace { //anonymous namespace
static void handle_archive_error(archive* a)
{
std::cerr << " " << archive_error_string(a) << std::endl;
}
// We use this to make sure that the C object is always released.
template <typename T_Object>
class ScopedArchivePtr
{
public:
typedef int (*T_ReleaseFunc)(T_Object*);
ScopedArchivePtr(T_Object* ptr, T_ReleaseFunc release_func)
: ptr_(ptr),
release_func_(release_func)
{}
~ScopedArchivePtr()
{
if(!release_func_)
return;
const int r = (*release_func_)(ptr_);
if(r != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": The release_func failed." << std::endl;
handle_archive_error(ptr_);
}
}
private:
T_Object* ptr_;
T_ReleaseFunc release_func_;
ScopedArchivePtr(const ScopedArchivePtr<T_Object>&);
ScopedArchivePtr<T_Object>& operator=(const ScopedArchivePtr<T_Object>&);
};
//The same as ScopedArchivePtr but with a different release function signature.
template <typename T_Object>
class ScopedArchiveEntryPtr
{
public:
typedef void (*T_ReleaseFunc)(T_Object*);
ScopedArchiveEntryPtr(T_Object* ptr, T_ReleaseFunc release_func)
: ptr_(ptr),
release_func_(release_func)
{}
~ScopedArchiveEntryPtr()
{
if(release_func_)
(*release_func_)(ptr_);
}
private:
T_Object* ptr_;
T_ReleaseFunc release_func_;
ScopedArchiveEntryPtr(const ScopedArchivePtr<T_Object>&);
ScopedArchiveEntryPtr<T_Object>& operator=(const ScopedArchivePtr<T_Object>&);
};
} ////anonymous namespace
//TODO: Make this async, using File::read_async() and IOStream::read_async().
Glib::ustring Document::save_backup_file(const Glib::ustring& uri, const SlotProgress& slot_progress)
{
//Save a copy of the .glom document,
//with the same name as the directory:
//For instance <path>/chosendirectory/chosendirectory.glom
const std::string path_dir = Glib::filename_from_uri(uri);
const std::string basename = Glib::path_get_basename(path_dir);
const std::string& filepath_document = Glib::build_filename(path_dir, basename + ".glom");
const std::string basename_dir = Glib::path_get_basename(path_dir);
const std::string& filepath_document = Glib::build_filename(path_dir, basename_dir + ".glom");
const Glib::ustring uri_document = Glib::filename_to_uri(filepath_document);
const Glib::ustring fileuri_old = get_file_uri();
......@@ -4855,103 +4933,214 @@ Glib::ustring Document::save_backup_file(const Glib::ustring& uri, const SlotPro
}
//Compress the backup in a .tar.gz, so it is slightly more safe from changes:
const std::string path_tar = Glib::find_program_in_path("tar");
if(path_tar.empty())
const Glib::RefPtr<const Gio::File> gio_file = Gio::File::create_for_path(path_dir);
const std::string dir_basename = gio_file->get_basename();
const Glib::RefPtr<const Gio::File> gio_file_parent = gio_file->get_parent();
const std::string parent_dir = gio_file_parent->get_path();
if(parent_dir.empty() || dir_basename.empty())
{
std::cerr << G_STRFUNC << ": The tar executable could not be found." << std::endl;
std::cerr << G_STRFUNC << "parent_dir or basename are empty." << std::endl;
return Glib::ustring();
}
else
const std::string tarball_path = path_dir + ".tar.gz";
//TODO: Use read_async() when this calling method is async.
Glib::RefPtr<Gio::File> file = Gio::File::create_for_uri(uri_document);
Glib::RefPtr<Gio::FileInputStream> stream;
try
{
Glib::RefPtr<const Gio::File> gio_file = Gio::File::create_for_path(path_dir);
const std::string basename = gio_file->get_basename();
Glib::RefPtr<const Gio::File> gio_file_parent = gio_file->get_parent();
const std::string parent_dir = gio_file_parent->get_path();
if(parent_dir.empty() || basename.empty())
{
std::cerr << G_STRFUNC << "parent_dir or basename are empty." << std::endl;
return Glib::ustring();
}
else
stream = file->read();
// Query size of the file, so that we can show progress:
//TODO: stream->query_info_async(sigc::mem_fun(*this, &DialogImageLoadProgress::on_query_info), G_FILE_ATTRIBUTE_STANDARD_SIZE);
}
catch(const Glib::Error& ex)
{
std::cerr << G_STRFUNC << ": Gio::File::read() failed: " << ex.what() << std::endl;
return Glib::ustring();
}
struct archive* a = archive_write_new();
ScopedArchivePtr<archive> scoped(a, &archive_write_free); //Make sure it is always released.
if(archive_write_add_filter_gzip(a) != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": libarchive does not support tar." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
if(archive_write_set_format_pax_restricted(a) != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": libarchive does not support pax_restricted." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
if(archive_write_set_bytes_per_block(a, 4096) != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": libarchive: cannot set bytes per block." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
if(archive_write_open_filename(a, tarball_path.c_str()) != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": Could not open a new archive file for writing." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
struct stat st;
stat(filepath_document.c_str(), &st);
struct archive_entry* entry = archive_entry_new();
ScopedArchiveEntryPtr<archive_entry> scoped_entry(entry, &archive_entry_free); //Make sure it is always released.
archive_entry_copy_stat(entry, &st); //This has no return value.
const std::string basename = Glib::path_get_basename(filepath_document);
archive_entry_set_pathname(entry, basename.c_str()); //This has no return value.
if(archive_write_header(a, entry) != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": Could not write archive header." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
//TODO: Use read_async() when this calling method is async.
try
{
// Query size of the file, so that we can show progress:
//TODO: stream->query_info_async(sigc::mem_fun(*this, &DialogImageLoadProgress::on_query_info), G_FILE_ATTRIBUTE_STANDARD_SIZE);
const guint BYTES_TO_PROCESS = 256;
guint buffer[BYTES_TO_PROCESS] = {0, }; // For each chunk.
bool bContinue = true;
while(bContinue)
{
const std::string tarball_path = path_dir + ".tar.gz";
//TODO: Find some way to do this without using the command-line,
//which feels fragile:
const std::string command_tar = Glib::shell_quote(path_tar) +
" --force-local --no-wildcards" + //Avoid side-effects of special characters.
" --remove-files" +
" -czf"
" " + Glib::shell_quote(tarball_path) +
" --directory " + Glib::shell_quote(parent_dir) + //This must be right before the mention of the file name:
" " + Glib::shell_quote(basename);
//std::cout << "DEBUG: command_tar=" << command_tar << std::endl;
const bool tarred = Glom::Spawn::execute_command_line_and_wait(command_tar,
slot_progress);
if(!tarred)
const gssize bytes_read = stream->read(buffer, BYTES_TO_PROCESS);
if(bytes_read == 0)
bContinue = false; //stop because we reached the end.
else
{
std::cerr << G_STRFUNC << "tar failed with command:" << command_tar << std::endl;
return Glib::ustring();
// Add the data to the archive:
ssize_t check = archive_write_data(a, buffer, bytes_read);
if(check != bytes_read)
{
std::cerr << G_STRFUNC << ": archive_write_data() wrote an unexpected number of bytes. " << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
}
return Glib::filename_to_uri(tarball_path);
}
}
catch(const Glib::Error& ex)
{
std::cerr << G_STRFUNC << ": stream read() failed: " << ex.what() << std::endl;
return Glib::ustring();
}
//TODO? archive_write_finish_entry(entry);
if(archive_write_close(a))
{
std::cerr << G_STRFUNC << ": Could not close archive." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
return Glib::filename_to_uri(tarball_path);
}
Glib::ustring Document::restore_backup_file(const Glib::ustring& backup_uri, const SlotProgress& slot_progress)
Glib::ustring Document::extract_backup_file(const Glib::ustring& backup_uri, const SlotProgress& slot_progress)
{
// We cannot use an uri here, because we cannot untar remote files.
const std::string filename_tarball = Glib::filename_from_uri(backup_uri);
const std::string path_tar = Glib::find_program_in_path("tar");
if(path_tar.empty())
struct archive* a = archive_read_new();
ScopedArchivePtr<archive> scoped(a, &archive_read_free); //Make sure it is always released.
if(archive_read_support_filter_gzip(a) != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": The tar executable could not be found." << std::endl;
std::cerr << G_STRFUNC << ": libarchive apparently does not support gzip." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
//Create a temporary directory into which we will untar the tarball:
const std::string path_tmp = Utils::get_temp_directory_path(
Glib::path_get_basename(filename_tarball) + "_extracted");
if(archive_read_support_format_all(a) != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": libarchive apparently does not support standard formats." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
//archive_read_support_compression_all(a);
//Untar into the tmp directory:
//TODO: Find some way to do this without using the command-line,
//which feels fragile:
const std::string command_tar = Glib::shell_quote(path_tar) +
" --force-local --no-wildcards" + //Avoid side-effects of special characters.
" -xzf"
" " + Glib::shell_quote(filename_tarball) +
" --directory " + Glib::shell_quote(path_tmp);
if(archive_read_open_filename(a, filename_tarball.c_str(), 10240) != ARCHIVE_OK) //TODO
{
std::cerr << G_STRFUNC << ": could not read filename from archive." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
//std::cout << "DEBUG: command_tar=" << command_tar << std::endl;
slot_progress();
const bool untarred = Glom::Spawn::execute_command_line_and_wait(command_tar,
slot_progress);
if(!untarred)
//We expect just one file:
struct archive_entry* entry = 0;
if(archive_read_next_header(a, &entry) != ARCHIVE_OK)
{
std::cerr << G_STRFUNC << ": tar failed with command:" << command_tar << std::endl;
std::cerr << G_STRFUNC << ": Could not read next archive entry." << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
//const char *name = archive_entry_pathname(entry);
//std::cout << "debug: name=" << name << std::endl;
slot_progress();
Glib::ustring contents;
//Open the .glom file that is in the tmp directory:
const Glib::ustring uri_tmp = Glib::filename_to_uri(path_tmp);
const Glib::ustring untarred_uri = Utils::get_directory_child_with_suffix(uri_tmp, ".glom", true /* recurse */);
if(untarred_uri.empty())
//Read the whole file in one go,
//We'd have to keep it all in memory anyway as we concatentated it,
//if we did it in chunks.
//TODO: Backup files will, of course, often have large amounts of (example) data.
//So we should, elsewhere, make it possible to load that data progressively,
//maybe discarding it during a first read, and adapt this code to that new API.
slot_progress();
const size_t size = archive_entry_size(entry);
const Glib::ScopedPtr<char> buf ((char*) g_malloc(size + 1));
const ssize_t r = archive_read_data(a, buf.get(), size);
if((r == ARCHIVE_FATAL) || (r == ARCHIVE_WARN) ||
(r == ARCHIVE_RETRY)) //0 or a number of bytes read are the signs of success.
{
std::cerr << G_STRFUNC << ": There was an error while restoring the backup. The .glom file could not be found.";
return Glib::ustring();
std::cerr << G_STRFUNC << ": Error while reading data from archive entry. r=" << r << std::endl;
handle_archive_error(a);
return Glib::ustring();
}
//Delete the temporary untarred directory:
//Actually, we just leave this here, where the system will clean it up anyway,
//because open_document() starts a new process,
//so we don't know when we can safely delete the files.
//Utils::delete_directory(uri_tmp);
try
{
//For std::string, size is number of characters. For ustring it would be number of characters.
contents += std::string(buf.get(), r);
}
catch(const std::exception& ex)
{
std::cerr << G_STRFUNC << ": std::exception error while concatenating archive data: "
<< ex.what() << std::endl;
return Glib::ustring();
}
return untarred_uri;
return contents;
}
......
......@@ -466,9 +466,9 @@ public:
/**
* @param backup_uri: The URI of a .tar.gz backup file.
* @result The URI of the restored .glom file.
* @result The contents of the .glom file from the .tar.gz file.
*/
static Glib::ustring restore_backup_file(const Glib::ustring& backup_uri, const SlotProgress& slot_progress);
static Glib::ustring extract_backup_file(const Glib::ustring& backup_uri, const SlotProgress& slot_progress);
protected:
......@@ -530,6 +530,25 @@ private:
type_list_translatables get_translatable_report_items(const Glib::ustring& table_name, const Glib::ustring& report_name, const Glib::ustring& hint);
type_list_translatables get_translatable_print_layout_items(const Glib::ustring& table_name, const Glib::ustring& print_layout_name, const Glib::ustring& hint);
/* For use when making save_backup_file() async.
class FileReadWriteToArchiveData
{
FileReadWriteToArchiveData()
: a(0)
{}
Glib::RefPtr<Gio::File> file;
Glib::RefPtr<Gio::FileInputStream> stream;
struct archive* a;
private:
//Prevent copying:
FileReadWriteToArchiveData(const FileReadWriteToArchiveData& src);
FileReadWriteToArchiveData operator=(const FileReadWriteToArchiveData& src);
};
*/
AppState m_app_state;
type_signal_userlevel_changed m_signal_userlevel_changed;
......
......@@ -62,13 +62,13 @@ static bool test(Glom::Document::HostingMode hosting_mode)
//Create a new document from the backup:
{
const Glib::ustring recreated_uri =
Glom::Document::restore_backup_file(
const Glib::ustring backup_file_contents =
Glom::Document::extract_backup_file(
backup_uri_tarball,
sigc::ptr_fun(&on_backup_progress));
if(recreated_uri.empty())
if(backup_file_contents.empty())
{
std::cerr << G_STRFUNC << ": Recreation from the example failed." << std::endl;
std::cerr << G_STRFUNC << ": Extraction from the backup file failed." << std::endl;
return false;
}
......@@ -76,7 +76,7 @@ static bool test(Glom::Document::HostingMode hosting_mode)
//std::cout << "debug: recreated_uri=" << recreated_uri << std::endl;
Glom::Document document;
const bool recreated =
test_create_and_selfhost_from_uri(recreated_uri, document, hosting_mode);
test_create_and_selfhost_from_data(backup_file_contents, document, hosting_mode);
if(!recreated)
{
std::cerr << G_STRFUNC << ": Recreation from the backup failed." << std::endl;
......
......@@ -322,6 +322,27 @@ bool test_create_and_selfhost_from_test_example(const std::string& example_filen
return test_create_and_selfhost_from_example_full_path(path, document, hosting_mode);
}
static bool after_load(Glom::Document& document, Glom::Document::HostingMode hosting_mode, const std::string& subdirectory_path)
{
if(!document.get_is_example_file() && !document.get_is_backup_file())
{
std::cerr << G_STRFUNC << ": The document is not an example or a backup." << std::endl;
return false;
}
if(!test_create_and_selfhost_new_empty(document, hosting_mode, subdirectory_path))
{
std::cerr << G_STRFUNC << ": test_create_and_selfhost_new_empty() failed." << std::endl;
return false;
}
const bool recreated = Glom::DbUtils::recreate_database_from_document(&document, sigc::ptr_fun(&on_recreate_progress) );
if(!recreated)
test_selfhosting_cleanup();
return recreated;
}
bool test_create_and_selfhost_from_uri(const Glib::ustring& example_file_uri, Glom::Document& document, Glom::Document::HostingMode hosting_mode, const std::string& subdirectory_path)
{
if( (hosting_mode != Glom::Document::HOSTING_MODE_POSTGRES_SELF) &&
......@@ -345,25 +366,32 @@ bool test_create_and_selfhost_from_uri(const Glib::ustring& example_file_uri, Gl
return false;
}
if(!document.get_is_example_file() && !document.get_is_backup_file())
return after_load(document, hosting_mode, subdirectory_path);
}
bool test_create_and_selfhost_from_data(const Glib::ustring& example_file_contents, Glom::Document& document, Glom::Document::HostingMode hosting_mode, const std::string& subdirectory_path)
{
if( (hosting_mode != Glom::Document::HOSTING_MODE_POSTGRES_SELF) &&
(hosting_mode != Glom::Document::HOSTING_MODE_MYSQL_SELF) &&
(hosting_mode != Glom::Document::HOSTING_MODE_SQLITE) )
{
std::cerr << G_STRFUNC << ": The document is not an example or a backup. uri=" << example_file_uri << std::endl;
std::cerr << G_STRFUNC << ": This test function does not support the specified hosting_mode: " << hosting_mode << std::endl;
return false;
}
if(!test_create_and_selfhost_new_empty(document, hosting_mode, subdirectory_path))
document.set_allow_autosave(false); //To simplify things and to not depend implicitly on autosave.
int failure_code = 0;
const bool test = document.load_from_data((const guchar*)example_file_contents.c_str(), example_file_contents.size(), failure_code);
if(!test)
{
std::cerr << G_STRFUNC << ": test_create_and_selfhost_new_empty() failed. uri=" << example_file_uri << std::endl;
std::cerr << G_STRFUNC << ": Document::load_from_data() failed with failure_code=" << failure_code << std::endl;
return false;
}
const bool recreated = Glom::DbUtils::recreate_database_from_document(&document, sigc::ptr_fun(&on_recreate_progress) );
if(!recreated)
test_selfhosting_cleanup();
return recreated;
return after_load(document, hosting_mode, subdirectory_path);
}
bool test_model_expected_size(const Glib::RefPtr<const Gnome::Gda::DataModel>& data_model, guint columns_count, guint rows_count)
{
if(!data_model)
......
......@@ -64,7 +64,15 @@ bool test_create_and_selfhost_from_test_example(const std::string& example_filen
* @param hosting_mode Either HOSTING_MODE_POSTGRES_SELF or HOSTING_MODE_SQLITE
* @param subdirectory_path: An additional directory path to use under the temporary directory that will be used to save the file.
*/
bool test_create_and_selfhost_from_uri(const Glib::ustring& file_uri, Glom::Document& document, Glom::Document::HostingMode hosting_mode, const std::string& subdirectory_path = std::string());
bool test_create_and_selfhost_from_uri(const Glib::ustring& example_file_uri, Glom::Document& document, Glom::Document::HostingMode hosting_mode, const std::string& subdirectory_path = std::string());
/** Create a .glom file from an existing .glom example file with database data, and start a PostgreSQL server if necessary.
*
* @param file_uri The full URI of the example .glom file.
* @param hosting_mode Either HOSTING_MODE_POSTGRES_SELF or HOSTING_MODE_SQLITE
* @param subdirectory_path: An additional directory path to use under the temporary directory that will be used to save the file.
*/
bool test_create_and_selfhost_from_data(const Glib::ustring& example_file_contents, Glom::Document& document, Glom::Document::HostingMode hosting_mode, const std::string& subdirectory_path = std::string());
/** Start self-hosting of a .glom document.
* @param document The document must already be saved to a file.
......
Supports Markdown
0% or .