Skip to content
Snippets Groups Projects
serve.rs 3.67 KiB
Newer Older
Stephen D's avatar
Stephen D committed
use std::{fmt::Write, sync::Arc};

use axum::{
    extract::{Query, State},
    response::{Html, IntoResponse},
    routing::get,
    Router,
};
use commoncrawl_graph::{lookup::DomainView, measure_async};
use datadog_statsd::Client;
use reqwest::StatusCode;
use serde::Deserialize;
use sqlx::{postgres::PgPoolOptions, Pool, Postgres};

struct AppStateInner {
    pool: Pool<Postgres>,
    statsd: Arc<Client>,
}

type AppState = Arc<AppStateInner>;

#[derive(Deserialize)]
struct RootQuery {
    #[serde(default)]
    k: String,
}
Stephen D's avatar
Stephen D committed

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let pool = PgPoolOptions::new()
        .max_connections(48)
        .connect("postgres://localhost/commoncrawl_graph")
        .await?;

Stephen D's avatar
Stephen D committed
    let statsd = Arc::new(Client::new(
        "127.0.0.1:8125",
        "commoncrawl_graph_import",
        None,
    )?);
Stephen D's avatar
Stephen D committed

Stephen D's avatar
Stephen D committed
    let app = Router::new()
        .route("/", get(root))
        .with_state(Arc::new(AppStateInner { pool, statsd }));

    let listener = tokio::net::TcpListener::bind("0.0.0.0:3120").await.unwrap();
    axum::serve(listener, app).await.unwrap();
Stephen D's avatar
Stephen D committed

    Ok(())
}
Stephen D's avatar
Stephen D committed

async fn root(State(state): State<AppState>, Query(query): Query<RootQuery>) -> impl IntoResponse {
    match measure_async(&state.statsd, "root.latency", root_inner(&state, query.k)).await {
        Ok(x) => (StatusCode::OK, x),
        Err(e) => (
            StatusCode::INTERNAL_SERVER_ERROR,
            Html::from(format!("Error: {e}")),
        ),
    }
}

async fn root_inner(state: &AppState, k: String) -> anyhow::Result<Html<String>> {
    let results = if k.is_empty() {
        "".to_string()
    } else {
        let d = DomainView::lookup(&state.pool, &k).await?;

        let inner_k = html_escape::encode_text(&d.query);

        let res = if d.is_empty() {
            "No results ):".to_string()
        } else {
            let from_table: String = d
                .links_from
                .into_iter()
                .fold(String::new(), |mut cur, (from, to)| {
                    write!(&mut cur, r#"<tr><td><a href="/?k={from}">{from}</a></td><td><a href="/?k={to}">{to}</a></td></tr>"#).unwrap();

                    cur
                })
;

            let to_table: String = d
                .links_to
                .into_iter()
                .fold(String::new(), |mut cur, (from, to)| {
                    write!(&mut cur, r#"<tr><td><a href="/?k={from}">{from}</a></td><td><a href="/?k={to}">{to}</a></td></tr>"#).unwrap();

                        cur
                });

            format!(
                r#"<div class="table-container"><div><table><tr><th>Source</th><th>Destination</th></tr>{to_table}</table></div><div><table><tr><th>Source</th><th>Destination</th></tr>{from_table}</table></div></div>"#
            )
        };

        format!("<h2>Results for {inner_k}:</h2>{res}")
    };

    let inner_k = html_escape::encode_double_quoted_attribute(&k);

    let preamble = r#"<h1>Who's Linking to Me?</h1><p>This site uses <a href="https://commoncrawl.org/">Common Crawl</a> data to find all hosts that link to a site (and all sites linked to by that site). Wildcards are supported at the beginning of domain names, e.g. '*.scd31.com'. Only 1 000 maximum wildcard matches are shown, and a maximum of 10 000 edges (5 000 in either direction)."#;

    let html = format!(
        r#"<!DOCTYPE html><head><style>.table-container{{display: flex;justify-content: space-around;}}tr:nth-of-type(odd){{ background-color:#ccc;}}td{{padding:0px 8px;}}</style></head><body>{preamble}<form><label for="k">Domain: </label><input name="k" id="k" value="{inner_k}" /><input type="submit" value="Find" /></form><br>{results}</body></html>"#
    );

    Ok(Html::from(html.to_string()))
}