Skip to content
Snippets Groups Projects
Commit 6f1458af authored by Stephen D's avatar Stephen D
Browse files

add statsd

parent 6b9b40ce
No related branches found
No related tags found
1 merge request!1Postgres
...@@ -145,6 +145,7 @@ name = "commoncrawl_graph" ...@@ -145,6 +145,7 @@ name = "commoncrawl_graph"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"datadog-statsd",
"flate2", "flate2",
"futures", "futures",
"itertools", "itertools",
...@@ -242,6 +243,15 @@ dependencies = [ ...@@ -242,6 +243,15 @@ dependencies = [
"typenum", "typenum",
] ]
[[package]]
name = "datadog-statsd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03956bab8c09234121d7338656b3abeb22e85b886f05c1d61ad82c73f2f78301"
dependencies = [
"rand 0.3.23",
]
[[package]] [[package]]
name = "der" name = "der"
version = "0.7.9" version = "0.7.9"
...@@ -384,6 +394,12 @@ dependencies = [ ...@@ -384,6 +394,12 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.31" version = "0.3.31"
...@@ -904,7 +920,7 @@ dependencies = [ ...@@ -904,7 +920,7 @@ dependencies = [
"num-integer", "num-integer",
"num-iter", "num-iter",
"num-traits", "num-traits",
"rand", "rand 0.8.5",
"smallvec", "smallvec",
"zeroize", "zeroize",
] ]
...@@ -1114,6 +1130,29 @@ dependencies = [ ...@@ -1114,6 +1130,29 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "rand"
version = "0.3.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c"
dependencies = [
"libc",
"rand 0.4.6",
]
[[package]]
name = "rand"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
dependencies = [
"fuchsia-cprng",
"libc",
"rand_core 0.3.1",
"rdrand",
"winapi",
]
[[package]] [[package]]
name = "rand" name = "rand"
version = "0.8.5" version = "0.8.5"
...@@ -1122,7 +1161,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" ...@@ -1122,7 +1161,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [ dependencies = [
"libc", "libc",
"rand_chacha", "rand_chacha",
"rand_core", "rand_core 0.6.4",
] ]
[[package]] [[package]]
...@@ -1132,9 +1171,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1132,9 +1171,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [ dependencies = [
"ppv-lite86", "ppv-lite86",
"rand_core", "rand_core 0.6.4",
] ]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]] [[package]]
name = "rand_core" name = "rand_core"
version = "0.6.4" version = "0.6.4"
...@@ -1144,6 +1198,15 @@ dependencies = [ ...@@ -1144,6 +1198,15 @@ dependencies = [
"getrandom", "getrandom",
] ]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.5.7" version = "0.5.7"
...@@ -1224,7 +1287,7 @@ dependencies = [ ...@@ -1224,7 +1287,7 @@ dependencies = [
"num-traits", "num-traits",
"pkcs1", "pkcs1",
"pkcs8", "pkcs8",
"rand_core", "rand_core 0.6.4",
"signature", "signature",
"spki", "spki",
"subtle", "subtle",
...@@ -1421,7 +1484,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1421,7 +1484,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [ dependencies = [
"digest", "digest",
"rand_core", "rand_core 0.6.4",
] ]
[[package]] [[package]]
...@@ -1601,7 +1664,7 @@ dependencies = [ ...@@ -1601,7 +1664,7 @@ dependencies = [
"memchr", "memchr",
"once_cell", "once_cell",
"percent-encoding", "percent-encoding",
"rand", "rand 0.8.5",
"rsa", "rsa",
"serde", "serde",
"sha1", "sha1",
...@@ -1640,7 +1703,7 @@ dependencies = [ ...@@ -1640,7 +1703,7 @@ dependencies = [
"md-5", "md-5",
"memchr", "memchr",
"once_cell", "once_cell",
"rand", "rand 0.8.5",
"serde", "serde",
"serde_json", "serde_json",
"sha2", "sha2",
...@@ -2075,6 +2138,28 @@ dependencies = [ ...@@ -2075,6 +2138,28 @@ dependencies = [
"wasite", "wasite",
] ]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-registry" name = "windows-registry"
version = "0.2.0" version = "0.2.0"
......
...@@ -5,6 +5,7 @@ edition = "2021" ...@@ -5,6 +5,7 @@ edition = "2021"
[dependencies] [dependencies]
anyhow = "1.0.89" anyhow = "1.0.89"
datadog-statsd = "0.1.2"
flate2 = "1.0.34" flate2 = "1.0.34"
futures = "0.3.31" futures = "0.3.31"
itertools = "0.13.0" itertools = "0.13.0"
......
use std::{ use std::{
env, env,
future::Future,
io::{Cursor, Read}, io::{Cursor, Read},
time::Instant,
}; };
use anyhow::bail; use anyhow::bail;
use datadog_statsd::Client;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
use reqwest::Url; use reqwest::Url;
use sqlx::{ use sqlx::{
...@@ -24,25 +27,32 @@ async fn main() -> anyhow::Result<()> { ...@@ -24,25 +27,32 @@ async fn main() -> anyhow::Result<()> {
.connect("postgres://localhost/commoncrawl_graph") .connect("postgres://localhost/commoncrawl_graph")
.await?; .await?;
let client = Client::new("localhost:8125", "commoncrawl_graph", None)?;
println!("Truncate tables"); println!("Truncate tables");
truncate_tables(&pool).await?; measure_async(&client, "truncate_tables", truncate_tables(&pool)).await?;
println!("Populate tables"); println!("Populate tables");
populate_tables(&pool, vertices_path, edges_path).await?; measure_async(
&client,
"populate_tables",
populate_tables(&pool, vertices_path, edges_path),
)
.await?;
println!("Create indexes"); println!("Create indexes");
create_indexes(&pool).await?; measure_async(&client, "create_indexes", create_indexes(&pool)).await?;
println!("Swap tables"); println!("Swap tables");
swap_tables(&pool).await?; measure_async(&client, "swap_tables", swap_tables(&pool)).await?;
println!("Create tables"); println!("Create tables");
create_tables(&pool).await?; measure_async(&client, "create_tables", create_tables(&pool)).await?;
Ok(()) Ok(())
} }
...@@ -150,9 +160,15 @@ async fn populate_tables( ...@@ -150,9 +160,15 @@ async fn populate_tables(
} }
async fn create_indexes(pool: &Pool<Postgres>) -> anyhow::Result<()> { async fn create_indexes(pool: &Pool<Postgres>) -> anyhow::Result<()> {
// TODO primary keys? query!("ALTER TABLE nodes_dupe ADD CONSTRAINT nodes_dupe_pkey PRIMARY KEY (id);")
.execute(pool)
.await?;
query!("ALTER TABLE edges_dupe ADD CONSTRAINT edges_dupe_pkey PRIMARY KEY (from_id, to_id);")
.execute(pool)
.await?;
query!("CREATE INDEX nodes_text_idx_dupe ON nodes(name, text_pattern_ops);") query!("CREATE INDEX nodes_text_idx_dupe ON nodes_dupe(name text_pattern_ops);")
.execute(pool) .execute(pool)
.await?; .await?;
...@@ -218,3 +234,23 @@ CREATE TABLE edges_dupe( ...@@ -218,3 +234,23 @@ CREATE TABLE edges_dupe(
Ok(()) Ok(())
} }
async fn measure_async<T, Fut: Future<Output = anyhow::Result<T>>>(
client: &Client,
metric_name: &str,
f: Fut,
) -> anyhow::Result<T> {
let start = Instant::now();
let res = f.await;
let end = Instant::now();
client.timer(
metric_name,
(end - start).as_millis() as f64,
&Some(vec![&format!("success:{}", res.is_ok())]),
);
res
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment