Refactored indexing functions of the Diesel models.

parent 6996759d
Pipeline #891 passed with stage
in 31 minutes 55 seconds
......@@ -253,6 +253,15 @@ dependencies = [
[[package]]
name = "diesel"
version = "0.16.0"
source = "git+https://github.com/diesel-rs/diesel.git#07f80c3a0d07daa26efff3166fbf0297dc0f0a7b"
dependencies = [
"byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libsqlite3-sys 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "diesel"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
......@@ -538,7 +547,7 @@ name = "hammond-data"
version = "0.1.0"
dependencies = [
"chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
"diesel 0.16.0 (git+https://github.com/diesel-rs/diesel.git)",
"diesel_codegen 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
"dotenv 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
"error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
......@@ -557,7 +566,7 @@ dependencies = [
name = "hammond-downloader"
version = "0.1.0"
dependencies = [
"diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
"diesel 0.16.0 (git+https://github.com/diesel-rs/diesel.git)",
"error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"hammond-data 0.1.0",
"hyper 0.11.6 (registry+https://github.com/rust-lang/crates.io-index)",
......@@ -1559,6 +1568,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum derive-error-chain 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3c9ca9ade651388daad7c993f005d0d20c4f6fe78c1cdc93e95f161c6f5ede4a"
"checksum derive_builder 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "03600ae366b6eb2314e54d62adc833d9866da03798acc61c61789654ceaa227a"
"checksum derive_builder_core 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eed37eae64daa5511467b1a55cebdf472deeaef108d22f62f25e8bbcaffd56ac"
"checksum diesel 0.16.0 (git+https://github.com/diesel-rs/diesel.git)" = "<none>"
"checksum diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "304226fa7a3982b0405f6bb95dd9c10c3e2000709f194038a60ec2c277150951"
"checksum diesel_codegen 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18a42ca5c9b660add51d58bc5a50a87123380e1e458069c5504528a851ed7384"
"checksum diesel_infer_schema 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bf1957ff5cd3b04772e43c162c2f69c2aa918080ff9b020276792d236be8be52"
......
......@@ -18,7 +18,7 @@ xdg = "2.1.0"
[dependencies.diesel]
features = ["sqlite"]
version = "0.16.0"
git = "https://github.com/diesel-rs/diesel.git"
[dependencies.diesel_codegen]
features = ["sqlite"]
......
......@@ -15,15 +15,29 @@ use rand::Rng;
use test::Bencher;
use hammond_data::run_migration_on;
use hammond_data::index_feed::{complete_index, insert_return_source};
use hammond_data::index_feed::{complete_index, insert_return_source, Database};
use std::io::BufReader;
// use std::io::BufRead;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::fs;
struct TempDB(tempdir::TempDir, PathBuf, SqliteConnection);
// Big rss feed
const PCPER: &[u8] = include_bytes!("feeds/pcpermp3.xml");
const UNPLUGGED: &[u8] = include_bytes!("feeds/linuxunplugged.xml");
const RADIO: &[u8] = include_bytes!("feeds/coderradiomp3.xml");
const SNAP: &[u8] = include_bytes!("feeds/techsnapmp3.xml");
const LAS: &[u8] = include_bytes!("feeds/TheLinuxActionShow.xml");
static URLS: &[(&[u8], &str)] = &[
(PCPER, "https://www.pcper.com/rss/podcasts-mp3.rss"),
(UNPLUGGED, "http://feeds.feedburner.com/linuxunplugged"),
(RADIO, "https://feeds.feedburner.com/coderradiomp3"),
(SNAP, "https://feeds.feedburner.com/techsnapmp3"),
(LAS, "https://feeds2.feedburner.com/TheLinuxActionShow"),
];
/// Create and return a Temporary DB.
/// Will be destroed once the returned variable(s) is dropped.
fn get_temp_db() -> TempDB {
......@@ -40,46 +54,42 @@ fn get_temp_db() -> TempDB {
TempDB(tmp_dir, db_path, db)
}
fn index_urls(m: &Database) {
URLS.par_iter().for_each(|&(buff, url)| {
// Create and insert a Source into db
let s = {
let temp = m.lock().unwrap();
insert_return_source(&temp, url).unwrap()
};
// parse it into a channel
let chan = rss::Channel::read_from(buff).unwrap();
// Index the channel
complete_index(m, &chan, &s).unwrap();
});
}
#[bench]
fn bench_index_feeds(b: &mut Bencher) {
let TempDB(_tmp_dir, _db_path, db) = get_temp_db();
let m = Arc::new(Mutex::new(db));
b.iter(|| {
index_urls(&Arc::clone(&m));
});
}
#[bench]
fn bench_index_test_files(b: &mut Bencher) {
fn bench_index_unchanged_feeds(b: &mut Bencher) {
let TempDB(_tmp_dir, _db_path, db) = get_temp_db();
// complete_index runs in parallel so it requires a mutex as argument.
let m = Arc::new(Mutex::new(db));
// include them in the binary to avoid loading from disk making file open syscalls.
let pcper = include_bytes!("feeds/pcpermp3.xml");
let unplugged = include_bytes!("feeds/linuxunplugged.xml");
let radio = include_bytes!("feeds/coderradiomp3.xml");
let snap = include_bytes!("feeds/techsnapmp3.xml");
let las = include_bytes!("feeds/TheLinuxActionShow.xml");
// vec of (&vec<u8>, url) tuples.
let urls = vec![
(pcper.as_ref(), "https://www.pcper.com/rss/podcasts-mp3.rss"),
(
unplugged.as_ref(),
"http://feeds.feedburner.com/linuxunplugged",
),
(radio.as_ref(), "https://feeds.feedburner.com/coderradiomp3"),
(snap.as_ref(), "https://feeds.feedburner.com/techsnapmp3"),
(
las.as_ref(),
"https://feeds2.feedburner.com/TheLinuxActionShow",
),
];
// Index first so it will only bench the comparison test case.
index_urls(&Arc::clone(&m));
b.iter(|| {
urls.par_iter().for_each(|&(buff, url)| {
// Create and insert a Source into db
let s = {
let temp = m.lock().unwrap();
insert_return_source(&temp, url).unwrap()
};
// parse it into a channel
let chan = rss::Channel::read_from(buff).unwrap();
// Index the channel
complete_index(&m, &chan, &s).unwrap();
});
for _ in 0..10 {
index_urls(&Arc::clone(&m));
}
});
}
......@@ -4,7 +4,6 @@ use rss;
use reqwest;
use rayon::prelude::*;
use schema;
use dbqueries;
use models::*;
use errors::*;
......@@ -17,47 +16,41 @@ pub struct Feed(pub reqwest::Response, pub Source);
pub type Database = Arc<Mutex<SqliteConnection>>;
fn index_source(con: &SqliteConnection, foo: &NewSource) -> QueryResult<usize> {
match dbqueries::get_source_from_uri(con, foo.uri) {
Ok(_) => Ok(1),
Err(_) => diesel::insert(foo).into(schema::source::table).execute(con),
}
fn index_source(con: &SqliteConnection, foo: &NewSource) {
use schema::source::dsl::*;
// Throw away the result like `insert or ignore`
// Diesel deos not support `insert or ignore` yet.
let _ = diesel::insert_into(source).values(foo).execute(con);
}
fn index_podcast(con: &SqliteConnection, pd: &NewPodcast) -> Result<()> {
use schema::podcast::dsl::*;
match dbqueries::get_podcast_from_title(con, &pd.title) {
Ok(mut foo) => if foo.link() != pd.link || foo.description() != pd.description {
foo.set_link(&pd.link);
foo.set_description(&pd.description);
foo.set_image_uri(pd.image_uri.as_ref().map(|s| s.as_str()));
foo.save_changes::<Podcast>(con)?;
Ok(foo) => if foo.link() != pd.link || foo.description() != pd.description {
diesel::replace_into(podcast).values(pd).execute(con)?;
},
Err(_) => {
diesel::insert(pd)
.into(schema::podcast::table)
.execute(con)?;
diesel::insert_into(podcast).values(pd).execute(con)?;
}
}
Ok(())
}
fn index_episode(con: &SqliteConnection, ep: &NewEpisode) -> Result<()> {
// TODO: Currently using diesel from master git.
// Watch out for v0.99.0 beta and change the toml.
fn index_episode(con: &SqliteConnection, ep: &NewEpisode) -> QueryResult<()> {
use schema::episode::dsl::*;
match dbqueries::get_episode_from_uri(con, ep.uri.unwrap()) {
Ok(mut foo) => if foo.title() != ep.title
Ok(foo) => if foo.title() != ep.title
|| foo.published_date() != ep.published_date.as_ref().map(|x| x.as_str())
{
foo.set_title(ep.title);
foo.set_description(ep.description);
foo.set_published_date(ep.published_date.as_ref().map(|x| x.as_str()));
foo.set_guid(ep.guid);
foo.set_length(ep.length);
foo.set_epoch(ep.epoch);
foo.save_changes::<Episode>(con)?;
diesel::replace_into(episode).values(ep).execute(con)?;
},
Err(_) => {
diesel::insert(ep)
.into(schema::episode::table)
.execute(con)?;
diesel::insert_into(episode).values(ep).execute(con)?;
}
}
Ok(())
......@@ -65,7 +58,7 @@ fn index_episode(con: &SqliteConnection, ep: &NewEpisode) -> Result<()> {
pub fn insert_return_source(con: &SqliteConnection, url: &str) -> Result<Source> {
let foo = NewSource::new_with_uri(url);
index_source(con, &foo)?;
index_source(con, &foo);
Ok(dbqueries::get_source_from_uri(con, foo.uri)?)
}
......@@ -270,7 +263,7 @@ mod tests {
inpt.iter().for_each(|feed| {
let tempdb = db.lock().unwrap();
index_source(&tempdb, &NewSource::new_with_uri(feed)).unwrap();
index_source(&tempdb, &NewSource::new_with_uri(feed));
});
full_index_loop(&db).unwrap();
......
......@@ -15,7 +15,7 @@ tempdir = "0.3.5"
[dependencies.diesel]
features = ["sqlite"]
version = "0.16.0"
git = "https://github.com/diesel-rs/diesel.git"
[dependencies.hammond-data]
path = "../hammond-data"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment