Commit 58b79f53 authored by Jordan Williams's avatar Jordan Williams Committed by Jordan Petridis
Browse files

Add hash-based image caching

This commit adds another cache validation parameter.
Previously, support for time-based caching was added.
This may not retrieve updated images very quickly for the user.
Thus, the image's URI is checked for changes.
A hash value of the image's URI is updated when it is downloaded.
When feeds are updated, the image URI is checked against this hash.
If the hash has changed the image is downloaded and the hash updated.

The hash function uses Rust's standard hasher.
This produces a 64-bit unsigned integer.
The caveat is that sqlite doesn't support unsigned integer values.
To get around this, the hash function converts this to a signed integer.
I should probably double check the corner cases of the cast done here.

Tests still need to be added for this functionality.
The changelog will also need to be updated in a subsequent commit.
parent 7c20c7ac
ALTER TABLE shows
RENAME TO old_table;
CREATE TABLE shows
(
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
`title` TEXT NOT NULL,
`link` TEXT NOT NULL,
`description` TEXT NOT NULL,
`image_uri` TEXT,
`image_cached` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`source_id` INTEGER NOT NULL UNIQUE
);
INSERT INTO shows (id, title, link, description, image_uri, image_cached, source_id)
SELECT id, title, link, description, image_uri, image_cached, source_id
FROM old_table;
Drop table old_table;
ALTER TABLE shows
RENAME TO old_table;
CREATE TABLE shows
(
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
`title` TEXT NOT NULL,
`link` TEXT NOT NULL,
`description` TEXT NOT NULL,
`image_uri` TEXT,
`image_uri_hash` BIGINT,
`image_cached` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`source_id` INTEGER NOT NULL UNIQUE
);
INSERT INTO shows (id, title, link, description, image_uri, image_cached, source_id)
SELECT id, title, link, description, image_uri, image_cached, source_id
FROM old_table;
Drop table old_table;
......@@ -184,7 +184,7 @@ pub fn get_podcast_cover_from_id(pid: i32) -> Result<ShowCoverModel, DataError>
let con = db.get()?;
shows
.select((id, title, image_uri, image_cached))
.select((id, title, image_uri, image_uri_hash, image_cached))
.filter(id.eq(pid))
.get_result::<ShowCoverModel>(&con)
.map_err(From::from)
......
......@@ -29,7 +29,7 @@ use crate::schema::shows;
use crate::database::connection;
use crate::dbqueries;
use crate::utils::url_cleaner;
use crate::utils::{calculate_hash, url_cleaner};
use chrono::{NaiveDateTime, Utc};
......@@ -44,6 +44,7 @@ pub(crate) struct NewShow {
link: String,
description: String,
image_uri: Option<String>,
image_uri_hash: Option<i64>,
image_cached: Option<NaiveDateTime>,
source_id: i32,
}
......@@ -147,6 +148,7 @@ impl NewShow {
.title(title)
.description(description)
.link(link)
.image_uri_hash(calculate_hash(&image_uri))
.image_uri(image_uri)
.image_cached(Utc::now().naive_utc())
.source_id(source_id)
......
......@@ -22,6 +22,7 @@ use crate::models::Source;
use crate::schema::shows;
use crate::database::connection;
use crate::utils::calculate_hash;
use chrono::{NaiveDateTime, Utc};
use diesel::query_dsl::filter_dsl::FilterDsl;
use diesel::{ExpressionMethods, RunQueryDsl};
......@@ -38,6 +39,7 @@ pub struct Show {
link: String,
description: String,
image_uri: Option<String>,
image_uri_hash: Option<i64>,
image_cached: NaiveDateTime,
source_id: i32,
}
......@@ -72,7 +74,12 @@ impl Show {
self.image_uri.as_ref().map(|s| s.as_str())
}
/// Get the Feed `image_cached`.
/// Get the `image_uri_hash`.
pub fn image_uri_hash(&self) -> Option<i64> {
self.image_uri_hash
}
/// Get the `image_cached`.
pub fn image_cached(&self) -> &NaiveDateTime {
&self.image_cached
}
......@@ -82,6 +89,20 @@ impl Show {
self.source_id
}
/// Update the hash of the image's URI.
pub fn update_image_uri_hash(&self) -> Result<(), DataError> {
use crate::schema::shows::dsl::*;
let db = connection();
let con = db.get()?;
info!("Updating the hash for image URI for podcast {}", self.title);
diesel::update(shows.filter(id.eq(self.source_id)))
.set(image_uri_hash.eq(calculate_hash(&self.image_uri)))
.execute(&con)
.map(|_| ())
.map_err(From::from)
}
/// Update the timestamp when the image has been cached.
pub fn update_image_cached(&self) -> Result<(), DataError> {
use crate::schema::shows::dsl::*;
......@@ -98,6 +119,22 @@ impl Show {
.map(|_| ())
.map_err(From::from)
}
/// Update the image's timestamp and URI hash value.
pub fn update_image_cache_values(&self) -> Result<(), DataError> {
match self.image_uri_hash() {
None => self.update_image_uri_hash()?,
Some(hash) => {
if calculate_hash(&self.image_uri()) != hash {
self.update_image_uri_hash()?;
}
}
}
match self.update_image_cached() {
Ok(s) => Ok(s),
Err(e) => Err(e),
}
}
}
#[derive(Queryable, Debug, Clone)]
......@@ -107,6 +144,7 @@ pub struct ShowCoverModel {
id: i32,
title: String,
image_uri: Option<String>,
image_uri_hash: Option<i64>,
image_cached: NaiveDateTime,
}
......@@ -116,6 +154,7 @@ impl From<Show> for ShowCoverModel {
id: p.id(),
title: p.title,
image_uri: p.image_uri,
image_uri_hash: p.image_uri_hash,
image_cached: p.image_cached,
}
}
......@@ -139,7 +178,12 @@ impl ShowCoverModel {
self.image_uri.as_ref().map(|s| s.as_str())
}
/// Get the Feed `image_cached`.
/// Get the `image_uri_hash`.
pub fn image_uri_hash(&self) -> Option<i64> {
self.image_uri_hash
}
/// Get the `image_cached`.
pub fn image_cached(&self) -> &NaiveDateTime {
&self.image_cached
}
......
......@@ -23,6 +23,7 @@ table! {
link -> Text,
description -> Text,
image_uri -> Nullable<Text>,
image_uri_hash -> Nullable<BigInt>,
image_cached -> Timestamp,
source_id -> Integer,
}
......
......@@ -32,6 +32,13 @@ use crate::xdg_dirs::DL_DIR;
use std::fs;
use std::path::Path;
/// Hash a given value.
pub fn calculate_hash<T: Hash>(t: &T) -> i64 {
let mut s = DefaultHasher::new();
t.hash(&mut s);
s.finish() as i64
}
/// Scan downloaded `episode` entries that might have broken `local_uri`s and
/// set them to `None`.
fn download_checker() -> Result<(), DataError> {
......@@ -154,6 +161,8 @@ pub fn delete_show(pd: &Show) -> Result<(), DataError> {
#[cfg(test)]
use crate::Feed;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
#[cfg(test)]
/// Helper function that open a local file, parse the rss::Channel and gives back a Feed object.
......
......@@ -44,7 +44,7 @@ use podcasts_data::downloader;
use podcasts_data::errors::DownloadError;
use podcasts_data::opml;
use podcasts_data::pipeline::pipeline;
use podcasts_data::utils::checkup;
use podcasts_data::utils::{calculate_hash, checkup};
use podcasts_data::Source;
use std::collections::{HashMap, HashSet};
......@@ -308,15 +308,25 @@ lazy_static! {
static ref THREADPOOL: rayon::ThreadPool = rayon::ThreadPoolBuilder::new().build().unwrap();
}
// Determine whether a cached image is still valid.
// Determine whether a cached image is valid.
//
// A cached image is valid for 4 weeks from the time of its previous download.
// A cached image is valid for a maximum of 4 weeks from the time of its previous download.
// Otherwise, a cached image is only valid so long as the hash of its URI remains unchanged.
fn cached_image_valid(pd: &podcasts_data::ShowCoverModel) -> bool {
let cache_valid_duration = Duration::weeks(4);
Utc::now()
if Utc::now()
.naive_utc()
.signed_duration_since(*pd.image_cached())
<= cache_valid_duration
> cache_valid_duration
{
return false;
}
if let Some(new) = &pd.image_uri() {
if let Some(orig) = pd.image_uri_hash() {
return calculate_hash(new) == orig;
}
}
false
}
// Since gdk_pixbuf::Pixbuf is reference counted and every episode,
......@@ -328,7 +338,6 @@ fn cached_image_valid(pd: &podcasts_data::ShowCoverModel) -> bool {
// TODO: maybe use something that would just scale to requested size?
pub(crate) fn set_image_from_path(image: &gtk::Image, show_id: i32, size: u32) -> Result<()> {
if let Ok(hashmap) = CACHED_PIXBUFS.read() {
// todo Add caching refresh logic here.
if let Ok(pd) = dbqueries::get_podcast_cover_from_id(show_id) {
// If the image is still valid, check if the requested (cover + size) is already in the
// cache and if so do an early return after that.
......@@ -413,9 +422,9 @@ pub(crate) fn set_image_from_path(image: &gtk::Image, show_id: i32, size: u32) -
_ => {}
}
if let Ok(pd) = dbqueries::get_podcast_from_id(show_id) {
if let Err(err) = pd.update_image_cached() {
if let Err(err) = pd.update_image_cache_values() {
error!(
"Failed to update the image cached timestamp for podcast {}: {}",
"Failed to update the image's cache values for podcast {}: {}",
pd.title(),
err
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment