Commit b3a812c1 authored by Jordan Petridis's avatar Jordan Petridis 🌱
Browse files

Merge branch 'image-cache' into 'master'

Refresh cached cover art

Closes #144

See merge request !176
parents 0a080c49 5fb7574c
Pipeline #241171 passed with stages
in 21 minutes and 42 seconds
......@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]
### Added:
- Refresh cached cover art World/podcasts!176
### Changed:
......
......@@ -331,6 +331,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2de9deab977a153492a1468d1b1c0662c1cf39e5ea87d0c060ecd59ef18d8c"
dependencies = [
"byteorder",
"chrono",
"diesel_derives",
"libsqlite3-sys",
"r2d2",
......
......@@ -32,7 +32,7 @@ glob = "0.3"
crossbeam-channel = "0.4"
[dependencies.diesel]
features = ["sqlite", "r2d2"]
features = ["chrono", "sqlite", "r2d2"]
version = "1.4"
[dependencies.diesel_migrations]
......
ALTER TABLE shows
RENAME TO old_table;
CREATE TABLE shows
(
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
`title` TEXT NOT NULL,
`link` TEXT NOT NULL,
`description` TEXT NOT NULL,
`image_uri` TEXT,
`source_id` INTEGER NOT NULL UNIQUE
);
INSERT INTO shows (id, title, link, description, image_uri, source_id)
SELECT id, title, link, description, image_uri, source_id
FROM old_table;
Drop table old_table;
ALTER TABLE shows
RENAME TO old_table;
CREATE TABLE shows
(
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
`title` TEXT NOT NULL,
`link` TEXT NOT NULL,
`description` TEXT NOT NULL,
`image_uri` TEXT,
`image_cached` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`source_id` INTEGER NOT NULL UNIQUE
);
INSERT INTO shows (id, title, link, description, image_uri, source_id)
SELECT id, title, link, description, image_uri, source_id
FROM old_table;
Drop table old_table;
ALTER TABLE shows
RENAME TO old_table;
CREATE TABLE shows
(
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
`title` TEXT NOT NULL,
`link` TEXT NOT NULL,
`description` TEXT NOT NULL,
`image_uri` TEXT,
`image_cached` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`source_id` INTEGER NOT NULL UNIQUE
);
INSERT INTO shows (id, title, link, description, image_uri, image_cached, source_id)
SELECT id, title, link, description, image_uri, image_cached, source_id
FROM old_table;
Drop table old_table;
ALTER TABLE shows
RENAME TO old_table;
CREATE TABLE shows
(
`id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
`title` TEXT NOT NULL,
`link` TEXT NOT NULL,
`description` TEXT NOT NULL,
`image_uri` TEXT,
`image_uri_hash` BLOB,
`image_cached` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`source_id` INTEGER NOT NULL UNIQUE
);
INSERT INTO shows (id, title, link, description, image_uri, image_cached, source_id)
SELECT id, title, link, description, image_uri, image_cached, source_id
FROM old_table;
Drop table old_table;
......@@ -82,7 +82,6 @@ fn run_migration_on(connection: &SqliteConnection) -> Result<(), DataError> {
/// Reset the database into a clean state.
// Test share a Temp file db.
#[cfg(test)]
pub fn truncate_db() -> Result<(), DataError> {
let db = connection();
let con = db.get()?;
......
......@@ -184,7 +184,7 @@ pub fn get_podcast_cover_from_id(pid: i32) -> Result<ShowCoverModel, DataError>
let con = db.get()?;
shows
.select((id, title, image_uri))
.select((id, title, image_uri, image_uri_hash, image_cached))
.filter(id.eq(pid))
.get_result::<ShowCoverModel>(&con)
.map_err(From::from)
......
......@@ -271,11 +271,9 @@ pub fn cache_image(pd: &ShowCoverModel) -> Result<String, DownloadError> {
#[cfg(test)]
mod tests {
use super::*;
use crate::dbqueries;
use crate::pipeline::pipeline;
use crate::Source;
use crate::{dbqueries, Source};
use anyhow::Result;
use std::fs;
#[test]
......
......@@ -29,11 +29,16 @@ use crate::schema::shows;
use crate::database::connection;
use crate::dbqueries;
use crate::utils::url_cleaner;
use crate::utils::{calculate_hash, u64_to_vec_u8, url_cleaner};
#[cfg(test)]
use crate::utils::vec_u8_to_u64;
use chrono::{NaiveDateTime, Utc};
#[derive(Insertable, AsChangeset)]
#[table_name = "shows"]
#[derive(Debug, Clone, Default, Builder, PartialEq)]
#[derive(Debug, Clone, Default, Builder)]
#[builder(default)]
#[builder(derive(Debug))]
#[builder(setter(into))]
......@@ -42,6 +47,8 @@ pub(crate) struct NewShow {
link: String,
description: String,
image_uri: Option<String>,
image_uri_hash: Option<Vec<u8>>,
image_cached: Option<NaiveDateTime>,
source_id: i32,
}
......@@ -100,6 +107,16 @@ impl Index<()> for NewShow {
}
}
impl PartialEq<NewShow> for NewShow {
fn eq(&self, other: &NewShow) -> bool {
(self.link() == other.link())
&& (self.title() == other.title())
&& (self.image_uri() == other.image_uri())
&& (self.description() == other.description())
&& (self.source_id() == other.source_id())
}
}
impl PartialEq<Show> for NewShow {
fn eq(&self, other: &Show) -> bool {
(self.link() == other.link())
......@@ -130,11 +147,18 @@ impl NewShow {
// If itunes is None, try to get the channel.image from the rss spec
let image_uri = itunes_img.or_else(|| chan.image().map(|s| s.url().trim().to_owned()));
let mut hash: Option<Vec<u8>> = None;
if let Some(i) = &image_uri {
hash = Some(u64_to_vec_u8(calculate_hash(i)));
}
NewShowBuilder::default()
.title(title)
.description(description)
.link(link)
.image_uri(image_uri)
.image_uri_hash(hash)
.image_cached(Utc::now().naive_utc())
.source_id(source_id)
.build()
.unwrap()
......@@ -149,7 +173,6 @@ impl NewShow {
// Ignore the following geters. They are used in unit tests mainly.
impl NewShow {
#[allow(dead_code)]
pub(crate) fn source_id(&self) -> i32 {
self.source_id
}
......@@ -169,6 +192,19 @@ impl NewShow {
pub(crate) fn image_uri(&self) -> Option<&str> {
self.image_uri.as_ref().map(|s| s.as_str())
}
#[cfg(test)]
pub fn image_uri_hash(&self) -> Option<u64> {
if let Some(b) = &self.image_uri_hash {
return Some(vec_u8_to_u64(b.clone()));
}
None
}
#[cfg(test)]
pub(crate) fn image_cached(&self) -> Option<NaiveDateTime> {
self.image_cached
}
}
#[cfg(test)]
......
......@@ -17,9 +17,16 @@
//
// SPDX-License-Identifier: GPL-3.0-or-later
use crate::errors::DataError;
use crate::models::Source;
use crate::schema::shows;
use crate::database::connection;
use crate::utils::{calculate_hash, u64_to_vec_u8, vec_u8_to_u64};
use chrono::{Duration, NaiveDateTime, Utc};
use diesel::query_dsl::filter_dsl::FilterDsl;
use diesel::{ExpressionMethods, RunQueryDsl};
#[derive(Queryable, Identifiable, AsChangeset, Associations, PartialEq)]
#[belongs_to(Source, foreign_key = "source_id")]
#[changeset_options(treat_none_as_null = "true")]
......@@ -32,6 +39,8 @@ pub struct Show {
link: String,
description: String,
image_uri: Option<String>,
image_uri_hash: Option<Vec<u8>>,
image_cached: NaiveDateTime,
source_id: i32,
}
......@@ -65,10 +74,72 @@ impl Show {
self.image_uri.as_ref().map(|s| s.as_str())
}
/// Get the `image_uri_hash`.
pub fn image_uri_hash(&self) -> Option<u64> {
if let Some(b) = &self.image_uri_hash {
return Some(vec_u8_to_u64(b.clone()));
}
None
}
/// Get the `image_cached`.
pub fn image_cached(&self) -> &NaiveDateTime {
&self.image_cached
}
/// `Source` table foreign key.
pub fn source_id(&self) -> i32 {
self.source_id
}
/// Update the hash of the image's URI.
pub fn update_image_uri_hash(&self) -> Result<(), DataError> {
use crate::schema::shows::dsl::*;
let db = connection();
let con = db.get()?;
let mut hash: Option<Vec<u8>> = None;
if let Some(i) = &self.image_uri {
hash = Some(u64_to_vec_u8(calculate_hash(i)));
}
diesel::update(shows.filter(id.eq(self.id)))
.set(image_uri_hash.eq(&hash))
.execute(&con)
.map(|_| ())
.map_err(From::from)
}
/// Update the timestamp when the image has been cached.
pub fn update_image_cached(&self) -> Result<(), DataError> {
use crate::schema::shows::dsl::*;
let db = connection();
let con = db.get()?;
diesel::update(shows.filter(id.eq(self.id)))
.set(image_cached.eq(Utc::now().naive_utc()))
.execute(&con)
.map(|_| ())
.map_err(From::from)
}
/// Update the image's timestamp and URI hash value.
pub fn update_image_cache_values(&self) -> Result<(), DataError> {
match self.image_uri_hash() {
None => self.update_image_uri_hash()?,
Some(hash) => match self.image_uri() {
None => self.update_image_uri_hash()?,
Some(image_uri) => {
if calculate_hash(&image_uri) != hash {
self.update_image_uri_hash()?;
}
}
},
}
match self.update_image_cached() {
Ok(s) => Ok(s),
Err(e) => Err(e),
}
}
}
#[derive(Queryable, Debug, Clone)]
......@@ -78,14 +149,18 @@ pub struct ShowCoverModel {
id: i32,
title: String,
image_uri: Option<String>,
image_uri_hash: Option<Vec<u8>>,
image_cached: NaiveDateTime,
}
impl From<Show> for ShowCoverModel {
fn from(p: Show) -> ShowCoverModel {
ShowCoverModel {
id: p.id(),
id: p.id,
title: p.title,
image_uri: p.image_uri,
image_uri_hash: p.image_uri_hash,
image_cached: p.image_cached,
}
}
}
......@@ -107,4 +182,245 @@ impl ShowCoverModel {
pub fn image_uri(&self) -> Option<&str> {
self.image_uri.as_ref().map(|s| s.as_str())
}
/// Get the `image_uri_hash`.
pub fn image_uri_hash(&self) -> Option<u64> {
if let Some(b) = &self.image_uri_hash {
return Some(vec_u8_to_u64(b.clone()));
}
None
}
/// Get the `image_cached`.
pub fn image_cached(&self) -> &NaiveDateTime {
&self.image_cached
}
/// Determine whether a cached image is valid.
///
/// A cached image is valid from the time of its previous download for the given length of time.
/// Otherwise, a cached image is invalidated when the hash of its URI has changed.
pub fn is_cached_image_valid(&self, valid: &Duration) -> bool {
if Utc::now()
.naive_utc()
.signed_duration_since(*self.image_cached())
> *valid
{
return false;
}
if let Some(new) = &self.image_uri() {
if let Some(orig) = self.image_uri_hash() {
return calculate_hash(new) == orig;
}
}
false
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::database::truncate_db;
use crate::dbqueries;
use crate::models::{Insert, NewShow, NewShowBuilder, Update};
use anyhow::Result;
use std::{thread, time};
lazy_static! {
static ref EXPECTED_INTERCEPTED: NewShow = {
let descr = "The people behind The Intercept’s fearless reporting and incisive \
commentary—Jeremy Scahill, Glenn Greenwald, Betsy Reed and \
others—discuss the crucial issues of our time: national security, civil \
liberties, foreign policy, and criminal justice. Plus interviews with \
artists, thinkers, and newsmakers who challenge our preconceptions about \
the world we live in.";
let image_uri =
"http://static.megaphone.fm/podcasts/d5735a50-d904-11e6-8532-73c7de466ea6/image/\
uploads_2F1484252190700-qhn5krasklbce3dh-a797539282700ea0298a3a26f7e49b0b_\
2FIntercepted_COVER%2B_281_29.png";
NewShowBuilder::default()
.title("Intercepted with Jeremy Scahill")
.link("https://theintercept.com/podcasts")
.description(descr)
.image_uri(String::from(image_uri))
.image_uri_hash(Some(vec![164, 62, 7, 221, 215, 202, 38, 41]))
.image_cached(Utc::now().naive_utc())
.source_id(42)
.build()
.unwrap()
};
static ref UPDATED_IMAGE_URI_INTERCEPTED: NewShow = {
let image_uri = "https://assets.fireside.fm/file/fireside-images/podcasts/images/f/f31a453c-fa15-491f-8618-3f71f1d565e5/cover.jpg?v=3";
NewShowBuilder::default()
.title("Intercepted with Jeremy Scahill")
.link("https://theintercept.com/podcasts")
.description(EXPECTED_INTERCEPTED.description())
.image_uri(String::from(image_uri))
.image_uri_hash(Some(vec![164, 62, 7, 221, 215, 202, 38, 41]))
.image_cached(EXPECTED_INTERCEPTED.image_cached().unwrap())
.source_id(42)
.build()
.unwrap()
};
}
#[test]
fn should_update_timestamp_when_update_image_cached_is_called_after_the_timestamp_has_expired(
) -> Result<()> {
truncate_db()?;
EXPECTED_INTERCEPTED.insert()?;
let show = EXPECTED_INTERCEPTED.to_podcast()?;
let original_timestamp = show.image_cached();
show.update_image_cached().unwrap();
let show = dbqueries::get_podcast_from_id(show.id())?;
let updated_timestamp = show.image_cached();
assert!(original_timestamp < updated_timestamp);
// The image's URI and its hash should remain unchanged.
assert_eq!(
show.image_uri().unwrap(),
"http://static.megaphone.fm/podcasts/d5735a50-d904-11e6-8532-73c7de466ea6/image/\
uploads_2F1484252190700-qhn5krasklbce3dh-a797539282700ea0298a3a26f7e49b0b_\
2FIntercepted_COVER%2B_281_29.png"
);
assert_eq!(show.image_uri_hash().unwrap(), 2965280433145069220);
Ok(())
}
#[test]
fn should_update_hash_when_update_image_uri_hash_is_called_when_the_hash_is_invalid(
) -> Result<()> {
truncate_db()?;
EXPECTED_INTERCEPTED.insert()?;
let original = EXPECTED_INTERCEPTED.to_podcast()?;
let original_hash: u64 = 2965280433145069220;
let updated = &*UPDATED_IMAGE_URI_INTERCEPTED;
updated.update(original.id())?;
let show = dbqueries::get_podcast_from_id(original.id())?;
let not_yet_updated_hash = updated.image_uri_hash().unwrap();
assert_eq!(not_yet_updated_hash, original_hash);
show.update_image_uri_hash().unwrap();
let show = dbqueries::get_podcast_from_id(original.id())?;
let updated_hash = show.image_uri_hash().unwrap();
let expected_updated_hash: u64 = 1748982167920802687;
assert_eq!(updated_hash, expected_updated_hash);
assert_eq!(
show.image_uri().unwrap(),
"https://assets.fireside.fm/file/fireside-images/podcasts/images/f/f31a453c-fa15-491f-8618-3f71f1d565e5/cover.jpg?v=3"
);
Ok(())
}
#[test]
fn should_update_timestamp_only_when_update_image_cached_values_is_called_after_the_timestamp_has_expired(
) -> Result<()> {
truncate_db()?;
EXPECTED_INTERCEPTED.insert()?;
let show = EXPECTED_INTERCEPTED.to_podcast()?;
let original_timestamp = show.image_cached();
show.update_image_cache_values().unwrap();
let show = dbqueries::get_podcast_from_id(show.id())?;
let updated_timestamp = show.image_cached();
assert!(original_timestamp < updated_timestamp);
assert_eq!(
show.image_uri().unwrap(),
"http://static.megaphone.fm/podcasts/d5735a50-d904-11e6-8532-73c7de466ea6/image/\
uploads_2F1484252190700-qhn5krasklbce3dh-a797539282700ea0298a3a26f7e49b0b_\
2FIntercepted_COVER%2B_281_29.png"
);
assert_eq!(show.image_uri_hash().unwrap(), 2965280433145069220);
Ok(())
}
#[test]
fn should_update_timestamp_and_hash_when_update_image_cached_values_is_called_when_hash_is_invalid(
) -> Result<()> {
truncate_db()?;
EXPECTED_INTERCEPTED.insert()?;
let original = EXPECTED_INTERCEPTED.to_podcast()?;
let original_timestamp = original.image_cached();
let updated = &*UPDATED_IMAGE_URI_INTERCEPTED;
updated.update(original.id())?;
let show = dbqueries::get_podcast_from_id(original.id())?;
let not_yet_updated_hash = show.image_uri_hash().unwrap();
let original_hash: u64 = 2965280433145069220;
assert_eq!(not_yet_updated_hash, original_hash);
show.update_image_cache_values().unwrap();
let show = dbqueries::get_podcast_from_id(show.id())?;
let updated_timestamp = show.image_cached();
assert!(original_timestamp < updated_timestamp);
let updated_hash = show.image_uri_hash().unwrap();
let expected_updated_hash: u64 = 1748982167920802687;
assert_eq!(updated_hash, expected_updated_hash);
assert_eq!(
show.image_uri().unwrap(),
"https://assets.fireside.fm/file/fireside-images/podcasts/images/f/f31a453c-fa15-491f-8618-3f71f1d565e5/cover.jpg?v=3"
);
Ok(())
}
#[test]
fn cached_image_should_be_valid_when_uri_and_hash_are_unchanged() -> Result<()> {
let image_uri = String::from(
"http://www.jupiterbroadcasting.com/wp-content/uploads/2018/01/lup-0232-v.jpg",
);
let hash = vec![191, 166, 24, 137, 178, 75, 5, 227];
let cover = ShowCoverModel {
id: 0,
title: String::from("Linux Unplugged"),
image_uri: Some(image_uri),
image_uri_hash: Some(hash),
image_cached: Utc::now().naive_utc(),
};
let valid = Duration::weeks(4);
assert!(cover.is_cached_image_valid(&valid));
Ok(())
}
#[test]
fn a_different_uri_should_invalidate_cached_image() -> Result<()> {
// The old image URI used for the hash here is:
// http://www.jupiterbroadcasting.com/wp-content/uploads/2018/01/lup-0232-v.jpg
let new_image_uri = String::from(
"https://assets.fireside.fm/file/fireside-images/podcasts/images/f/f31a453c-fa15-491f-8618-3f71f1d565e5/cover.jpg?v=3",
);
let hash = vec![191, 166, 24, 137, 178, 75, 5, 227];
let cover = ShowCoverModel {
id: 0,
title: String::from("Linux Unplugged"),
image_uri: Some(new_image_uri),
image_uri_hash: Some(hash),
image_cached: Utc::now().naive_utc(),
};
let valid = Duration::weeks(4);
assert!(!cover.is_cached_image_valid(&valid));
Ok(())
}
#[test]
fn cached_image_should_be_invalidated_after_valid_duration() -> Result<()> {
let image_uri = String::from(
"http://www.jupiterbroadcasting.com/wp-content/uploads/2018/01/lup-0232-v.jpg",
);
let hash = vec![191, 166, 24, 137, 178, 75, 5, 227];
let cover = ShowCoverModel {
id: 0,
title: String::from("Linux Unplugged"),
image_uri: Some(image_uri),
image_uri_hash: Some(hash),
image_cached: Utc::now().naive_utc(),
};
let valid = Duration::nanoseconds(1);
thread::sleep(time::Duration::from_nanos(2));
assert!(!cover.is_cached_image_valid(&valid));
Ok(())
}
}
......@@ -23,6 +23,8 @@ table! {
link -> Text,
description -> Text,
image_uri -> Nullable<Text>,
image_uri_hash -> Nullable<Binary>,
image_cached -> Timestamp,