From e7758fdaa9c7b103c58f94b3f7a35e168d6c5abf Mon Sep 17 00:00:00 2001 From: harmless-tech Date: Thu, 16 Jan 2025 11:27:32 -0500 Subject: [PATCH 1/4] Optionally record redirect history --- src/config.rs | 26 ++++++++++++++++++++++ src/lib.rs | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ src/response.rs | 14 ++++++++++++ src/run.rs | 13 ++++++++++- 4 files changed, 111 insertions(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index cec4c7ee..874e4945 100644 --- a/src/config.rs +++ b/src/config.rs @@ -145,6 +145,7 @@ pub struct Config { max_redirects: u32, max_redirects_will_error: bool, redirect_auth_headers: RedirectAuthHeaders, + save_redirect_history: bool, user_agent: AutoHeaderValue, accept: AutoHeaderValue, accept_encoding: AutoHeaderValue, @@ -277,6 +278,17 @@ impl Config { self.redirect_auth_headers } + /// If we should record a history of every redirect location, + /// including the request and final locations. + /// + /// Comes at the cost of allocating/retaining the Uri for + /// every redirect loop. + /// + /// Defaults to false + pub fn save_redirect_history(&self) -> bool { + self.save_redirect_history + } + /// Value to use for the `User-Agent` header. /// /// This can be overridden by setting a `user-agent` header on the request @@ -482,6 +494,18 @@ impl ConfigBuilder { self } + /// If we should record a history of every redirect location, + /// including the request and final locations. + /// + /// Comes at the cost of allocating/retaining the Uri for + /// every redirect loop. + /// + /// Defaults to false + pub fn save_redirect_history(mut self, v: bool) -> Self { + self.config().save_redirect_history = v; + self + } + /// Value to use for the `User-Agent` header. /// /// This can be overridden by setting a `user-agent` header on the request @@ -809,6 +833,7 @@ impl Default for Config { max_redirects: 10, max_redirects_will_error: true, redirect_auth_headers: RedirectAuthHeaders::Never, + save_redirect_history: false, user_agent: AutoHeaderValue::default(), accept: AutoHeaderValue::default(), accept_encoding: AutoHeaderValue::default(), @@ -884,6 +909,7 @@ impl fmt::Debug for Config { .field("no_delay", &self.no_delay) .field("max_redirects", &self.max_redirects) .field("redirect_auth_headers", &self.redirect_auth_headers) + .field("save_redirect_history", &self.save_redirect_history) .field("user_agent", &self.user_agent) .field("timeouts", &self.timeouts) .field("max_response_header_size", &self.max_response_header_size) diff --git a/src/lib.rs b/src/lib.rs index c8460fc5..20a427e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -847,6 +847,65 @@ pub(crate) mod test { assert_eq!(response_uri.path(), "/get") } + #[test] + fn redirect_history_none() { + init_test_log(); + let res = get("http://httpbin.org/redirect-to?url=%2Fget") + .call() + .unwrap(); + let redirect_history = res.get_redirect_history(); + assert_eq!(redirect_history, None) + } + + #[test] + fn redirect_history_some() { + init_test_log(); + let agent: Agent = Config::builder() + .max_redirects(3) + .max_redirects_will_error(false) + .save_redirect_history(true) + .build() + .into(); + let res = agent + .get("http://httpbin.org/redirect-to?url=%2Fget") + .call() + .unwrap(); + let redirect_history = res.get_redirect_history(); + assert_eq!( + redirect_history, + Some( + vec![ + "http://httpbin.org/redirect-to?url=%2Fget".parse().unwrap(), + "http://httpbin.org/get".parse().unwrap() + ] + .as_ref() + ) + ); + let res = agent + .get( + "http://httpbin.org/redirect-to?url=%2Fredirect-to%3F\ + url%3D%2Fredirect-to%3Furl%3D%252Fredirect-to%253Furl%253D", + ) + .call() + .unwrap(); + let redirect_history = res.get_redirect_history(); + assert_eq!( + redirect_history, + Some(vec![ + "http://httpbin.org/redirect-to?url=%2Fredirect-to%3Furl%3D%2Fredirect-to%3Furl%3D%252Fredirect-to%253Furl%253D".parse().unwrap(), + "http://httpbin.org/redirect-to?url=/redirect-to?url=%2Fredirect-to%3Furl%3D".parse().unwrap(), + "http://httpbin.org/redirect-to?url=/redirect-to?url=".parse().unwrap(), + "http://httpbin.org/redirect-to?url=".parse().unwrap(), + ].as_ref()) + ); + let res = agent.get("https://www.google.com/").call().unwrap(); + let redirect_history = res.get_redirect_history(); + assert_eq!( + redirect_history, + Some(vec!["https://www.google.com/".parse().unwrap()].as_ref()) + ); + } + #[test] fn connect_https_invalid_name() { let result = get("https://example.com{REQUEST_URI}/").call(); diff --git a/src/response.rs b/src/response.rs index cfc091d9..aa366efe 100644 --- a/src/response.rs +++ b/src/response.rs @@ -6,12 +6,20 @@ use crate::http; #[derive(Debug, Clone)] pub(crate) struct ResponseUri(pub http::Uri); +#[derive(Debug, Clone)] +pub(crate) struct RedirectHistory(pub Vec); + /// Extension trait for `http::Response` objects /// /// Allows the user to access the `Uri` in http::Response pub trait ResponseExt { /// The Uri we ended up at. This can differ from the request uri when we have followed redirects. fn get_uri(&self) -> &Uri; + + /// The full history of uris, including the request and final uri. + /// + /// Returns None when `save_redirect_history` is false. + fn get_redirect_history(&self) -> Option<&[Uri]>; } impl ResponseExt for http::Response { @@ -22,4 +30,10 @@ impl ResponseExt for http::Response { .expect("uri to have been set") .0 } + + fn get_redirect_history(&self) -> Option<&[Uri]> { + self.extensions() + .get::() + .map(|r| r.0.as_ref()) + } } diff --git a/src/run.rs b/src/run.rs index a3c77e9d..2816712c 100644 --- a/src/run.rs +++ b/src/run.rs @@ -14,7 +14,7 @@ use crate::body::ResponseInfo; use crate::config::{Config, RequestLevelConfig, DEFAULT_USER_AGENT}; use crate::http; use crate::pool::Connection; -use crate::response::ResponseUri; +use crate::response::{RedirectHistory, ResponseUri}; use crate::timings::{CallTimings, CurrentTime}; use crate::transport::time::{Duration, Instant}; use crate::transport::ConnectionDetails; @@ -41,6 +41,9 @@ pub(crate) fn run( .map(Arc::new) .unwrap_or_else(|| agent.config.clone()); + let mut redirect_history: Option> = + config.save_redirect_history().then_some(Vec::new()); + let timeouts = config.timeouts(); let mut timings = CallTimings::new(timeouts, CurrentTime::default()); @@ -67,6 +70,7 @@ pub(crate) fn run( flow, &mut body, redirect_count, + &mut redirect_history, &mut timings, )? { // Follow redirect @@ -112,6 +116,7 @@ fn flow_run( mut flow: Flow, body: &mut SendBody, redirect_count: u32, + redirect_history: &mut Option>, timings: &mut CallTimings, ) -> Result { let uri = flow.uri().clone(); @@ -166,6 +171,12 @@ fn flow_run( jar.store_response_cookies(iter, &uri); } + if let Some(history) = redirect_history.as_mut() { + history.push(uri.clone()); + response + .extensions_mut() + .insert(RedirectHistory(history.clone())); + } response.extensions_mut().insert(ResponseUri(uri)); let ret = match response_result { From c22ed8dee34dbcc1f514c36773e9dca7774058bf Mon Sep 17 00:00:00 2001 From: Martin Algesten Date: Fri, 17 Jan 2025 09:25:26 +0100 Subject: [PATCH 2/4] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6951de1a..91ae6fe3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ # 3.0.0-rc5 + * Feature `Config::save_redirect_history` (#939) * `TlsConfig::unversioned_rustls_crypto_provider()` (#931) * Feature `rustls-no-provider` to compile without ring (#931) * Fix CONNECT proxy Host header (#936) From 7532260bb264ef231351d02877bb3268645a1804 Mon Sep 17 00:00:00 2001 From: Martin Algesten Date: Fri, 17 Jan 2025 09:41:39 +0100 Subject: [PATCH 3/4] ResponseExt doc improvements --- src/config.rs | 12 ++++++++---- src/response.rs | 40 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/config.rs b/src/config.rs index 874e4945..dd559973 100644 --- a/src/config.rs +++ b/src/config.rs @@ -281,10 +281,12 @@ impl Config { /// If we should record a history of every redirect location, /// including the request and final locations. /// - /// Comes at the cost of allocating/retaining the Uri for + /// Comes at the cost of allocating/retaining the `Uri` for /// every redirect loop. /// - /// Defaults to false + /// See [`ResponseExt::get_redirect_history()`][crate::ResponseExt::get_redirect_history]. + /// + /// Defaults to `false`. pub fn save_redirect_history(&self) -> bool { self.save_redirect_history } @@ -497,10 +499,12 @@ impl ConfigBuilder { /// If we should record a history of every redirect location, /// including the request and final locations. /// - /// Comes at the cost of allocating/retaining the Uri for + /// Comes at the cost of allocating/retaining the `Uri` for /// every redirect loop. /// - /// Defaults to false + /// See [`ResponseExt::get_redirect_history()`][crate::ResponseExt::get_redirect_history]. + /// + /// Defaults to `false`. pub fn save_redirect_history(mut self, v: bool) -> Self { self.config().save_redirect_history = v; self diff --git a/src/response.rs b/src/response.rs index aa366efe..9a050e8b 100644 --- a/src/response.rs +++ b/src/response.rs @@ -9,16 +9,48 @@ pub(crate) struct ResponseUri(pub http::Uri); #[derive(Debug, Clone)] pub(crate) struct RedirectHistory(pub Vec); -/// Extension trait for `http::Response` objects +/// Extension trait for [`http::Response`]. /// -/// Allows the user to access the `Uri` in http::Response +/// Adds additional convenience methods to the `Response` that are not available +/// in the plain http API. pub trait ResponseExt { - /// The Uri we ended up at. This can differ from the request uri when we have followed redirects. + /// The Uri that ultimately this Response is about. + /// + /// This can differ from the request uri when we have followed redirects. + /// + /// ``` + /// use ureq::ResponseExt; + /// + /// let res = ureq::get("https://httpbin.org/redirect-to?url=%2Fget") + /// .call().unwrap(); + /// + /// assert_eq!(res.get_uri(), "https://httpbin.org/get"); + /// ``` fn get_uri(&self) -> &Uri; /// The full history of uris, including the request and final uri. /// - /// Returns None when `save_redirect_history` is false. + /// Returns `None` when [`Config::save_redirect_history`][crate::config::Config::save_redirect_history] + /// is `false`. + /// + /// + /// ``` + /// # use ureq::http::Uri; + /// use ureq::ResponseExt; + /// + /// let uri1: Uri = "https://httpbin.org/redirect-to?url=%2Fget".parse().unwrap(); + /// let uri2: Uri = "https://httpbin.org/get".parse::().unwrap(); + /// + /// let res = ureq::get(&uri1) + /// .config() + /// .save_redirect_history(true) + /// .build() + /// .call().unwrap(); + /// + /// let history = res.get_redirect_history().unwrap(); + /// + /// assert_eq!(history, &[uri1, uri2]); + /// ``` fn get_redirect_history(&self) -> Option<&[Uri]>; } From 67e819938858104c269aa7d01f7eb17bf29f99f4 Mon Sep 17 00:00:00 2001 From: Martin Algesten Date: Fri, 17 Jan 2025 09:44:08 +0100 Subject: [PATCH 4/4] Fix clippy --- src/body/lossy.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/body/lossy.rs b/src/body/lossy.rs index 7bdd1bd3..0bc6e232 100644 --- a/src/body/lossy.rs +++ b/src/body/lossy.rs @@ -58,7 +58,7 @@ impl io::Read for LossyUtf8Reader { invalid_sequence, .. } => { - let valid_len = valid_prefix.as_bytes().len(); + let valid_len = valid_prefix.len(); let invalid_len = invalid_sequence.len(); // Switch out the problem input chars