diff --git a/january/src/util/mod.rs b/january/src/util/mod.rs new file mode 100644 index 0000000..10b3328 --- /dev/null +++ b/january/src/util/mod.rs @@ -0,0 +1,3 @@ +pub mod request; +pub mod result; +pub mod variables; diff --git a/january/src/util/request.rs b/january/src/util/request.rs new file mode 100644 index 0000000..eb3af26 --- /dev/null +++ b/january/src/util/request.rs @@ -0,0 +1,123 @@ +use std::time::Duration; + +use actix_web::web::Bytes; +use encoding_rs::{Encoding, UTF_8_INIT}; +use mime::Mime; +use reqwest::{ + header::{self, CONTENT_TYPE}, + Client, Response, +}; +use scraper::Html; +use std::io::Write; +use tempfile::NamedTempFile; + +use super::{result::Error, variables::MAX_BYTES}; + +lazy_static! { + static ref CLIENT: Client = reqwest::Client::builder() + .user_agent("Mozilla/5.0 (compatible; January/1.0; +https://github.com/revoltchat/january)") + .timeout(Duration::from_secs(15)) + .connect_timeout(Duration::from_secs(5)) + .build() + .expect("reqwest Client"); +} + +pub async fn fetch(url: &str) -> Result<(Response, Mime), Error> { + let resp = CLIENT + .get(url) + .send() + .await + .map_err(|_| Error::ReqwestFailed)?; + + if !resp.status().is_success() { + return Err(Error::RequestFailed); + } + + let content_type = resp + .headers() + .get(CONTENT_TYPE) + .ok_or(Error::MissingContentType)? + .to_str() + .map_err(|_| Error::ConversionFailed)?; + + let mime: mime::Mime = content_type + .parse() + .map_err(|_| Error::FailedToParseContentType)?; + + Ok((resp, mime)) +} + +pub async fn get_bytes(resp: &mut Response) -> Result { + let content_length = resp.content_length().unwrap_or(0) as usize; + if content_length > *MAX_BYTES { + return Err(Error::ExceedsMaxBytes); + } + let mut bytes = Vec::with_capacity(content_length); + while let Some(chunk) = resp + .chunk() + .await + .map_err(|_| Error::FailedToConsumeBytes)? + { + if bytes.len() + chunk.len() > *MAX_BYTES { + return Err(Error::ExceedsMaxBytes); + } + bytes.extend(chunk) + } + Ok(Bytes::from(bytes)) +} + +pub async fn consume_fragment(mut resp: Response) -> Result { + let bytes = get_bytes(&mut resp).await?; + + let content_type = resp + .headers() + .get(header::CONTENT_TYPE) + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.parse::().ok()); + let encoding_name = content_type + .as_ref() + .and_then(|mime| mime.get_param("charset").map(|charset| charset.as_str())) + .unwrap_or("utf-8"); + let encoding = Encoding::for_label(encoding_name.as_bytes()).unwrap_or(&UTF_8_INIT); + + let (text, _, _) = encoding.decode(&bytes); + Ok(Html::parse_document(&text)) +} + +pub fn determine_video_size(path: &std::path::Path) -> Result<(isize, isize), Error> { + let data = ffprobe::ffprobe(path).map_err(|_| Error::ProbeError)?; + + // Take the first valid stream. + for stream in data.streams { + if let (Some(w), Some(h)) = (stream.width, stream.height) { + if let (Ok(w), Ok(h)) = (w.try_into(), h.try_into()) { + return Ok((w, h)); + } + } + } + + Err(Error::ProbeError) +} + +pub async fn consume_size(mut resp: Response, mime: Mime) -> Result<(isize, isize), Error> { + let bytes = get_bytes(&mut resp).await?; + + match mime.type_() { + mime::IMAGE => { + if let Ok(size) = imagesize::blob_size(&bytes) { + Ok((size.width as isize, size.height as isize)) + } else { + Err(Error::CouldNotDetermineImageSize) + } + } + mime::VIDEO => { + let mut tmp = NamedTempFile::new().map_err(|_| Error::CouldNotDetermineVideoSize)?; + + tmp.write_all(&bytes) + .map_err(|_| Error::CouldNotDetermineVideoSize)?; + + determine_video_size(tmp.path()) + } + _ => unreachable!(), + } +} diff --git a/january/src/util/result.rs b/january/src/util/result.rs new file mode 100644 index 0000000..1c107cf --- /dev/null +++ b/january/src/util/result.rs @@ -0,0 +1,65 @@ +use actix_web::http::StatusCode; +use actix_web::{HttpResponse, ResponseError}; +use serde::Serialize; +use serde_json; +use std::fmt::Display; +use validator::ValidationErrors; + +#[derive(Clone, Serialize, Debug)] +#[serde(tag = "type")] +pub enum Error { + CouldNotDetermineImageSize, + CouldNotDetermineVideoSize, + FailedToParseContentType, + FailedToConsumeBytes, + FailedToConsumeText, + MetaSelectionFailed, + MissingContentType, + NotAllowedToProxy, + ConversionFailed, + ExceedsMaxBytes, + ReqwestFailed, + RequestFailed, + ProbeError, + LabelMe, + FailedValidation { + #[serde(skip_serializing, skip_deserializing)] + error: ValidationErrors, + }, +} + +impl Display for Error { + fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl ResponseError for Error { + fn status_code(&self) -> StatusCode { + match *self { + Error::CouldNotDetermineImageSize => StatusCode::INTERNAL_SERVER_ERROR, + Error::CouldNotDetermineVideoSize => StatusCode::INTERNAL_SERVER_ERROR, + Error::FailedToParseContentType => StatusCode::INTERNAL_SERVER_ERROR, + Error::FailedToConsumeBytes => StatusCode::INTERNAL_SERVER_ERROR, + Error::FailedToConsumeText => StatusCode::INTERNAL_SERVER_ERROR, + Error::MetaSelectionFailed => StatusCode::INTERNAL_SERVER_ERROR, + Error::MissingContentType => StatusCode::BAD_REQUEST, + Error::NotAllowedToProxy => StatusCode::BAD_REQUEST, + Error::ConversionFailed => StatusCode::INTERNAL_SERVER_ERROR, + Error::ExceedsMaxBytes => StatusCode::BAD_REQUEST, + Error::ReqwestFailed => StatusCode::INTERNAL_SERVER_ERROR, + Error::RequestFailed => StatusCode::BAD_REQUEST, + Error::ProbeError => StatusCode::INTERNAL_SERVER_ERROR, + Error::LabelMe => StatusCode::INTERNAL_SERVER_ERROR, + Error::FailedValidation { .. } => StatusCode::BAD_REQUEST, + } + } + + fn error_response(&self) -> HttpResponse { + let body = serde_json::to_string(&self).unwrap(); + + HttpResponse::build(self.status_code()) + .content_type("application/json") + .body(body) + } +} diff --git a/january/src/util/variables.rs b/january/src/util/variables.rs new file mode 100644 index 0000000..c472715 --- /dev/null +++ b/january/src/util/variables.rs @@ -0,0 +1,9 @@ +use std::env; + +lazy_static! { + // Application Settings + pub static ref HOST: String = + env::var("JANUARY_HOST").expect("Missing JANUARY_HOST environment variable."); + pub static ref MAX_BYTES: usize = + env::var("JANUARY_MAX_BYTES").unwrap_or("104857600".to_string()).parse().expect("Invalid JANUARY_MAX_BYTES environment variable."); +}