Skip to content
Snippets Groups Projects
Commit 6f1458af authored by Stephen D's avatar Stephen D
Browse files

add statsd

parent 6b9b40ce
No related branches found
No related tags found
1 merge request!1Postgres
......@@ -145,6 +145,7 @@ name = "commoncrawl_graph"
version = "0.1.0"
dependencies = [
"anyhow",
"datadog-statsd",
"flate2",
"futures",
"itertools",
......@@ -242,6 +243,15 @@ dependencies = [
"typenum",
]
[[package]]
name = "datadog-statsd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03956bab8c09234121d7338656b3abeb22e85b886f05c1d61ad82c73f2f78301"
dependencies = [
"rand 0.3.23",
]
[[package]]
name = "der"
version = "0.7.9"
......@@ -384,6 +394,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "futures"
version = "0.3.31"
......@@ -904,7 +920,7 @@ dependencies = [
"num-integer",
"num-iter",
"num-traits",
"rand",
"rand 0.8.5",
"smallvec",
"zeroize",
]
......@@ -1114,6 +1130,29 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.3.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c"
dependencies = [
"libc",
"rand 0.4.6",
]
[[package]]
name = "rand"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
dependencies = [
"fuchsia-cprng",
"libc",
"rand_core 0.3.1",
"rdrand",
"winapi",
]
[[package]]
name = "rand"
version = "0.8.5"
......@@ -1122,7 +1161,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_core 0.6.4",
]
[[package]]
......@@ -1132,9 +1171,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
"rand_core 0.6.4",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]]
name = "rand_core"
version = "0.6.4"
......@@ -1144,6 +1198,15 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "redox_syscall"
version = "0.5.7"
......@@ -1224,7 +1287,7 @@ dependencies = [
"num-traits",
"pkcs1",
"pkcs8",
"rand_core",
"rand_core 0.6.4",
"signature",
"spki",
"subtle",
......@@ -1421,7 +1484,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [
"digest",
"rand_core",
"rand_core 0.6.4",
]
[[package]]
......@@ -1601,7 +1664,7 @@ dependencies = [
"memchr",
"once_cell",
"percent-encoding",
"rand",
"rand 0.8.5",
"rsa",
"serde",
"sha1",
......@@ -1640,7 +1703,7 @@ dependencies = [
"md-5",
"memchr",
"once_cell",
"rand",
"rand 0.8.5",
"serde",
"serde_json",
"sha2",
......@@ -2075,6 +2138,28 @@ dependencies = [
"wasite",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-registry"
version = "0.2.0"
......
......@@ -5,6 +5,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0.89"
datadog-statsd = "0.1.2"
flate2 = "1.0.34"
futures = "0.3.31"
itertools = "0.13.0"
......
use std::{
env,
future::Future,
io::{Cursor, Read},
time::Instant,
};
use anyhow::bail;
use datadog_statsd::Client;
use flate2::read::GzDecoder;
use reqwest::Url;
use sqlx::{
......@@ -24,25 +27,32 @@ async fn main() -> anyhow::Result<()> {
.connect("postgres://localhost/commoncrawl_graph")
.await?;
let client = Client::new("localhost:8125", "commoncrawl_graph", None)?;
println!("Truncate tables");
truncate_tables(&pool).await?;
measure_async(&client, "truncate_tables", truncate_tables(&pool)).await?;
println!("Populate tables");
populate_tables(&pool, vertices_path, edges_path).await?;
measure_async(
&client,
"populate_tables",
populate_tables(&pool, vertices_path, edges_path),
)
.await?;
println!("Create indexes");
create_indexes(&pool).await?;
measure_async(&client, "create_indexes", create_indexes(&pool)).await?;
println!("Swap tables");
swap_tables(&pool).await?;
measure_async(&client, "swap_tables", swap_tables(&pool)).await?;
println!("Create tables");
create_tables(&pool).await?;
measure_async(&client, "create_tables", create_tables(&pool)).await?;
Ok(())
}
......@@ -150,9 +160,15 @@ async fn populate_tables(
}
async fn create_indexes(pool: &Pool<Postgres>) -> anyhow::Result<()> {
// TODO primary keys?
query!("ALTER TABLE nodes_dupe ADD CONSTRAINT nodes_dupe_pkey PRIMARY KEY (id);")
.execute(pool)
.await?;
query!("ALTER TABLE edges_dupe ADD CONSTRAINT edges_dupe_pkey PRIMARY KEY (from_id, to_id);")
.execute(pool)
.await?;
query!("CREATE INDEX nodes_text_idx_dupe ON nodes(name, text_pattern_ops);")
query!("CREATE INDEX nodes_text_idx_dupe ON nodes_dupe(name text_pattern_ops);")
.execute(pool)
.await?;
......@@ -218,3 +234,23 @@ CREATE TABLE edges_dupe(
Ok(())
}
async fn measure_async<T, Fut: Future<Output = anyhow::Result<T>>>(
client: &Client,
metric_name: &str,
f: Fut,
) -> anyhow::Result<T> {
let start = Instant::now();
let res = f.await;
let end = Instant::now();
client.timer(
metric_name,
(end - start).as_millis() as f64,
&Some(vec![&format!("success:{}", res.is_ok())]),
);
res
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment