From ac064fe7738a7f1a1c87f2bcf7f5648a3f3da60c Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Fri, 27 Feb 2026 15:59:27 +0300 Subject: [PATCH 01/10] STUN switch + Ad-tag fixes + DNS-overrides --- src/config/hot_reload.rs | 20 ++++ src/config/load.rs | 115 +++++++++++++++++++- src/config/types.rs | 12 +++ src/main.rs | 39 +++++-- src/metrics.rs | 99 +++++++++++++++--- src/network/dns_overrides.rs | 197 +++++++++++++++++++++++++++++++++++ src/network/mod.rs | 1 + src/network/probe.rs | 5 +- src/network/stun.rs | 11 ++ src/proxy/masking.rs | 7 +- src/tls_front/fetcher.rs | 34 ++++-- src/transport/upstream.rs | 46 ++++---- 12 files changed, 530 insertions(+), 56 deletions(-) create mode 100644 src/network/dns_overrides.rs diff --git a/src/config/hot_reload.rs b/src/config/hot_reload.rs index c949104..acc64cd 100644 --- a/src/config/hot_reload.rs +++ b/src/config/hot_reload.rs @@ -16,6 +16,7 @@ //! | `general` | `me_pool_drain_ttl_secs` | Applied on next ME map update | //! | `general` | `me_pool_min_fresh_ratio` | Applied on next ME map update | //! | `general` | `me_reinit_drain_timeout_secs`| Applied on next ME map update | +//! | `network` | `dns_overrides` | Applied immediately | //! | `access` | All user/quota fields | Effective immediately | //! //! Fields that require re-binding sockets (`server.port`, `censorship.*`, @@ -39,6 +40,7 @@ use super::load::ProxyConfig; pub struct HotFields { pub log_level: LogLevel, pub ad_tag: Option, + pub dns_overrides: Vec, pub middle_proxy_pool_size: usize, pub desync_all_full: bool, pub update_every_secs: u64, @@ -58,6 +60,7 @@ impl HotFields { Self { log_level: cfg.general.log_level.clone(), ad_tag: cfg.general.ad_tag.clone(), + dns_overrides: cfg.network.dns_overrides.clone(), middle_proxy_pool_size: cfg.general.middle_proxy_pool_size, desync_all_full: cfg.general.desync_all_full, update_every_secs: cfg.general.effective_update_every_secs(), @@ -189,6 +192,13 @@ fn log_changes( ); } + if old_hot.dns_overrides != new_hot.dns_overrides { + info!( + "config reload: network.dns_overrides updated ({} entries)", + new_hot.dns_overrides.len() + ); + } + if old_hot.middle_proxy_pool_size != new_hot.middle_proxy_pool_size { info!( "config reload: middle_proxy_pool_size: {} → {}", @@ -354,6 +364,16 @@ fn reload_config( return; } + if old_hot.dns_overrides != new_hot.dns_overrides + && let Err(e) = crate::network::dns_overrides::install_entries(&new_hot.dns_overrides) + { + error!( + "config reload: invalid network.dns_overrides: {}; keeping old config", + e + ); + return; + } + warn_non_hot_changes(&old_cfg, &new_cfg); log_changes(&old_hot, &new_hot, &new_cfg, log_tx, detected_ip_v4, detected_ip_v6); config_tx.send(Arc::new(new_cfg)).ok(); diff --git a/src/config/load.rs b/src/config/load.rs index 4e0e104..c1bbdef 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -75,6 +75,23 @@ fn push_unique_nonempty(target: &mut Vec, value: String) { } } +fn is_valid_ad_tag(tag: &str) -> bool { + tag.len() == 32 && tag.chars().all(|ch| ch.is_ascii_hexdigit()) +} + +fn sanitize_ad_tag(ad_tag: &mut Option) { + let Some(tag) = ad_tag.as_ref() else { + return; + }; + + if !is_valid_ad_tag(tag) { + warn!( + "Invalid general.ad_tag value, expected exactly 32 hex chars; ad_tag is disabled" + ); + *ad_tag = None; + } +} + // ============= Main Config ============= #[derive(Debug, Clone, Serialize, Deserialize, Default)] @@ -184,6 +201,8 @@ impl ProxyConfig { } } + sanitize_ad_tag(&mut config.general.ad_tag); + if let Some(update_every) = config.general.update_every { if update_every == 0 { return Err(ProxyError::Config( @@ -380,6 +399,7 @@ impl ProxyConfig { } validate_network_cfg(&mut config.network)?; + crate::network::dns_overrides::validate_entries(&config.network.dns_overrides)?; if config.general.use_middle_proxy && config.network.ipv6 == Some(true) { warn!("IPv6 with Middle Proxy is experimental and may cause KDF address mismatch; consider disabling IPv6 or ME"); @@ -482,14 +502,18 @@ impl ProxyConfig { if let Some(tag) = &self.general.ad_tag { let zeros = "00000000000000000000000000000000"; + if !is_valid_ad_tag(tag) { + return Err(ProxyError::Config( + "general.ad_tag must be exactly 32 hex characters".to_string(), + )); + } if tag == zeros { warn!("ad_tag is all zeros; register a valid proxy tag via @MTProxybot to enable sponsored channel"); } - if tag.len() != 32 || tag.chars().any(|c| !c.is_ascii_hexdigit()) { - warn!("ad_tag is not a 32-char hex string; ensure you use value issued by @MTProxybot"); - } } + crate::network::dns_overrides::validate_entries(&self.network.dns_overrides)?; + Ok(()) } } @@ -509,6 +533,7 @@ mod tests { let cfg: ProxyConfig = toml::from_str(toml).unwrap(); assert_eq!(cfg.network.ipv6, default_network_ipv6()); + assert_eq!(cfg.network.stun_use, default_true()); assert_eq!(cfg.network.stun_tcp_fallback, default_stun_tcp_fallback()); assert_eq!( cfg.general.middle_proxy_warm_standby, @@ -532,6 +557,7 @@ mod tests { fn impl_defaults_are_sourced_from_default_helpers() { let network = NetworkConfig::default(); assert_eq!(network.ipv6, default_network_ipv6()); + assert_eq!(network.stun_use, default_true()); assert_eq!(network.stun_tcp_fallback, default_stun_tcp_fallback()); let general = GeneralConfig::default(); @@ -934,4 +960,87 @@ mod tests { assert_eq!(cfg.general.me_reinit_drain_timeout_secs, 90); let _ = std::fs::remove_file(path); } + + #[test] + fn invalid_ad_tag_is_disabled_during_load() { + let toml = r#" + [general] + ad_tag = "not_hex" + + [censorship] + tls_domain = "example.com" + + [access.users] + user = "00000000000000000000000000000000" + "#; + let dir = std::env::temp_dir(); + let path = dir.join("telemt_invalid_ad_tag_test.toml"); + std::fs::write(&path, toml).unwrap(); + let cfg = ProxyConfig::load(&path).unwrap(); + assert!(cfg.general.ad_tag.is_none()); + let _ = std::fs::remove_file(path); + } + + #[test] + fn valid_ad_tag_is_preserved_during_load() { + let toml = r#" + [general] + ad_tag = "00112233445566778899aabbccddeeff" + + [censorship] + tls_domain = "example.com" + + [access.users] + user = "00000000000000000000000000000000" + "#; + let dir = std::env::temp_dir(); + let path = dir.join("telemt_valid_ad_tag_test.toml"); + std::fs::write(&path, toml).unwrap(); + let cfg = ProxyConfig::load(&path).unwrap(); + assert_eq!( + cfg.general.ad_tag.as_deref(), + Some("00112233445566778899aabbccddeeff") + ); + let _ = std::fs::remove_file(path); + } + + #[test] + fn invalid_dns_override_is_rejected() { + let toml = r#" + [network] + dns_overrides = ["example.com:443:2001:db8::10"] + + [censorship] + tls_domain = "example.com" + + [access.users] + user = "00000000000000000000000000000000" + "#; + let dir = std::env::temp_dir(); + let path = dir.join("telemt_invalid_dns_override_test.toml"); + std::fs::write(&path, toml).unwrap(); + let err = ProxyConfig::load(&path).unwrap_err().to_string(); + assert!(err.contains("must be bracketed")); + let _ = std::fs::remove_file(path); + } + + #[test] + fn valid_dns_override_is_accepted() { + let toml = r#" + [network] + dns_overrides = ["example.com:443:127.0.0.1", "example.net:443:[2001:db8::10]"] + + [censorship] + tls_domain = "example.com" + + [access.users] + user = "00000000000000000000000000000000" + "#; + let dir = std::env::temp_dir(); + let path = dir.join("telemt_valid_dns_override_test.toml"); + std::fs::write(&path, toml).unwrap(); + let cfg = ProxyConfig::load(&path).unwrap(); + assert_eq!(cfg.network.dns_overrides.len(), 2); + let _ = std::fs::remove_file(path); + } } diff --git a/src/config/types.rs b/src/config/types.rs index 68086be..7d9f13a 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -97,6 +97,11 @@ pub struct NetworkConfig { #[serde(default)] pub multipath: bool, + /// Global switch for STUN probing. + /// When false, STUN is fully disabled and only non-STUN detection remains. + #[serde(default = "default_true")] + pub stun_use: bool, + /// STUN servers list for public IP discovery. #[serde(default = "default_stun_servers")] pub stun_servers: Vec, @@ -112,6 +117,11 @@ pub struct NetworkConfig { /// Cache file path for detected public IP. #[serde(default = "default_cache_public_ip_path")] pub cache_public_ip_path: String, + + /// Runtime DNS overrides in `host:port:ip` format. + /// IPv6 IP values must be bracketed: `[2001:db8::1]`. + #[serde(default)] + pub dns_overrides: Vec, } impl Default for NetworkConfig { @@ -121,10 +131,12 @@ impl Default for NetworkConfig { ipv6: default_network_ipv6(), prefer: default_prefer_4(), multipath: false, + stun_use: default_true(), stun_servers: default_stun_servers(), stun_tcp_fallback: default_stun_tcp_fallback(), http_ip_detect_urls: default_http_ip_detect_urls(), cache_public_ip_path: default_cache_public_ip_path(), + dns_overrides: Vec::new(), } } } diff --git a/src/main.rs b/src/main.rs index 95f7e5a..7389117 100644 --- a/src/main.rs +++ b/src/main.rs @@ -193,6 +193,11 @@ async fn main() -> std::result::Result<(), Box> { std::process::exit(1); } + if let Err(e) = crate::network::dns_overrides::install_entries(&config.network.dns_overrides) { + eprintln!("[telemt] Invalid network.dns_overrides: {}", e); + std::process::exit(1); + } + let has_rust_log = std::env::var("RUST_LOG").is_ok(); let effective_log_level = if cli_silent { LogLevel::Silent @@ -403,6 +408,12 @@ async fn main() -> std::result::Result<(), Box> { if !config.access.user_max_unique_ips.is_empty() { info!("IP limits configured for {} users", config.access.user_max_unique_ips.len()); } + if !config.network.dns_overrides.is_empty() { + info!( + "Runtime DNS overrides configured: {} entries", + config.network.dns_overrides.len() + ); + } // Connection concurrency limit let max_connections = Arc::new(Semaphore::new(10_000)); @@ -417,14 +428,17 @@ async fn main() -> std::result::Result<(), Box> { // ===================================================================== let me_pool: Option> = if use_middle_proxy { info!("=== Middle Proxy Mode ==="); + let me_nat_probe = config.general.middle_proxy_nat_probe && config.network.stun_use; + if config.general.middle_proxy_nat_probe && !config.network.stun_use { + info!("Middle-proxy STUN probing disabled by network.stun_use=false"); + } // ad_tag (proxy_tag) for advertising - let proxy_tag = config.general.ad_tag.as_ref().map(|tag| { - hex::decode(tag).unwrap_or_else(|_| { - warn!("Invalid ad_tag hex, middle proxy ad_tag will be empty"); - Vec::new() - }) - }); + let proxy_tag = config + .general + .ad_tag + .as_ref() + .map(|tag| hex::decode(tag).expect("general.ad_tag must be validated before startup")); // ============================================================= // CRITICAL: Download Telegram proxy-secret (NOT user secret!) @@ -484,7 +498,7 @@ async fn main() -> std::result::Result<(), Box> { proxy_tag, proxy_secret, config.general.middle_proxy_nat_ip, - config.general.middle_proxy_nat_probe, + me_nat_probe, None, config.network.stun_servers.clone(), config.general.stun_nat_probe_concurrency, @@ -1037,9 +1051,18 @@ async fn main() -> std::result::Result<(), Box> { let stats = stats.clone(); let beobachten = beobachten.clone(); let config_rx_metrics = config_rx.clone(); + let ip_tracker_metrics = ip_tracker.clone(); let whitelist = config.server.metrics_whitelist.clone(); tokio::spawn(async move { - metrics::serve(port, stats, beobachten, config_rx_metrics, whitelist).await; + metrics::serve( + port, + stats, + beobachten, + ip_tracker_metrics, + config_rx_metrics, + whitelist, + ) + .await; }); } diff --git a/src/metrics.rs b/src/metrics.rs index 08abb2d..63b337b 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -1,4 +1,5 @@ use std::convert::Infallible; +use std::collections::{BTreeSet, HashMap}; use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; @@ -13,6 +14,7 @@ use tokio::net::TcpListener; use tracing::{info, warn, debug}; use crate::config::ProxyConfig; +use crate::ip_tracker::UserIpTracker; use crate::stats::beobachten::BeobachtenStore; use crate::stats::Stats; @@ -20,6 +22,7 @@ pub async fn serve( port: u16, stats: Arc, beobachten: Arc, + ip_tracker: Arc, config_rx: tokio::sync::watch::Receiver>, whitelist: Vec, ) { @@ -49,13 +52,15 @@ pub async fn serve( let stats = stats.clone(); let beobachten = beobachten.clone(); + let ip_tracker = ip_tracker.clone(); let config_rx_conn = config_rx.clone(); tokio::spawn(async move { let svc = service_fn(move |req| { let stats = stats.clone(); let beobachten = beobachten.clone(); + let ip_tracker = ip_tracker.clone(); let config = config_rx_conn.borrow().clone(); - async move { handle(req, &stats, &beobachten, &config) } + async move { handle(req, &stats, &beobachten, &ip_tracker, &config).await } }); if let Err(e) = http1::Builder::new() .serve_connection(hyper_util::rt::TokioIo::new(stream), svc) @@ -67,14 +72,15 @@ pub async fn serve( } } -fn handle( +async fn handle( req: Request, stats: &Stats, beobachten: &BeobachtenStore, + ip_tracker: &UserIpTracker, config: &ProxyConfig, ) -> Result>, Infallible> { if req.uri().path() == "/metrics" { - let body = render_metrics(stats); + let body = render_metrics(stats, config, ip_tracker).await; let resp = Response::builder() .status(StatusCode::OK) .header("content-type", "text/plain; version=0.0.4; charset=utf-8") @@ -109,7 +115,7 @@ fn render_beobachten(beobachten: &BeobachtenStore, config: &ProxyConfig) -> Stri beobachten.snapshot_text(ttl) } -fn render_metrics(stats: &Stats) -> String { +async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIpTracker) -> String { use std::fmt::Write; let mut out = String::with_capacity(4096); @@ -349,6 +355,41 @@ fn render_metrics(stats: &Stats) -> String { let _ = writeln!(out, "telemt_user_msgs_to_client{{user=\"{}\"}} {}", user, s.msgs_to_client.load(std::sync::atomic::Ordering::Relaxed)); } + let ip_stats = ip_tracker.get_stats().await; + let ip_counts: HashMap = ip_stats + .into_iter() + .map(|(user, count, _)| (user, count)) + .collect(); + + let mut unique_users = BTreeSet::new(); + unique_users.extend(config.access.user_max_unique_ips.keys().cloned()); + unique_users.extend(ip_counts.keys().cloned()); + + let _ = writeln!(out, "# HELP telemt_user_unique_ips_current Per-user current number of unique active IPs"); + let _ = writeln!(out, "# TYPE telemt_user_unique_ips_current gauge"); + let _ = writeln!(out, "# HELP telemt_user_unique_ips_limit Per-user configured unique IP limit (0 means unlimited)"); + let _ = writeln!(out, "# TYPE telemt_user_unique_ips_limit gauge"); + let _ = writeln!(out, "# HELP telemt_user_unique_ips_utilization Per-user unique IP usage ratio (0 for unlimited)"); + let _ = writeln!(out, "# TYPE telemt_user_unique_ips_utilization gauge"); + + for user in unique_users { + let current = ip_counts.get(&user).copied().unwrap_or(0); + let limit = config.access.user_max_unique_ips.get(&user).copied().unwrap_or(0); + let utilization = if limit > 0 { + current as f64 / limit as f64 + } else { + 0.0 + }; + let _ = writeln!(out, "telemt_user_unique_ips_current{{user=\"{}\"}} {}", user, current); + let _ = writeln!(out, "telemt_user_unique_ips_limit{{user=\"{}\"}} {}", user, limit); + let _ = writeln!( + out, + "telemt_user_unique_ips_utilization{{user=\"{}\"}} {:.6}", + user, + utilization + ); + } + out } @@ -358,9 +399,16 @@ mod tests { use std::net::IpAddr; use http_body_util::BodyExt; - #[test] - fn test_render_metrics_format() { + #[tokio::test] + async fn test_render_metrics_format() { let stats = Arc::new(Stats::new()); + let tracker = UserIpTracker::new(); + let mut config = ProxyConfig::default(); + config + .access + .user_max_unique_ips + .insert("alice".to_string(), 4); + stats.increment_connects_all(); stats.increment_connects_all(); stats.increment_connects_bad(); @@ -372,8 +420,12 @@ mod tests { stats.increment_user_msgs_from("alice"); stats.increment_user_msgs_to("alice"); stats.increment_user_msgs_to("alice"); + tracker + .check_and_add("alice", "203.0.113.10".parse().unwrap()) + .await + .unwrap(); - let output = render_metrics(&stats); + let output = render_metrics(&stats, &config, &tracker).await; assert!(output.contains("telemt_connections_total 2")); assert!(output.contains("telemt_connections_bad_total 1")); @@ -384,22 +436,29 @@ mod tests { assert!(output.contains("telemt_user_octets_to_client{user=\"alice\"} 2048")); assert!(output.contains("telemt_user_msgs_from_client{user=\"alice\"} 1")); assert!(output.contains("telemt_user_msgs_to_client{user=\"alice\"} 2")); + assert!(output.contains("telemt_user_unique_ips_current{user=\"alice\"} 1")); + assert!(output.contains("telemt_user_unique_ips_limit{user=\"alice\"} 4")); + assert!(output.contains("telemt_user_unique_ips_utilization{user=\"alice\"} 0.250000")); } - #[test] - fn test_render_empty_stats() { + #[tokio::test] + async fn test_render_empty_stats() { let stats = Stats::new(); - let output = render_metrics(&stats); + let tracker = UserIpTracker::new(); + let config = ProxyConfig::default(); + let output = render_metrics(&stats, &config, &tracker).await; assert!(output.contains("telemt_connections_total 0")); assert!(output.contains("telemt_connections_bad_total 0")); assert!(output.contains("telemt_handshake_timeouts_total 0")); assert!(!output.contains("user=")); } - #[test] - fn test_render_has_type_annotations() { + #[tokio::test] + async fn test_render_has_type_annotations() { let stats = Stats::new(); - let output = render_metrics(&stats); + let tracker = UserIpTracker::new(); + let config = ProxyConfig::default(); + let output = render_metrics(&stats, &config, &tracker).await; assert!(output.contains("# TYPE telemt_uptime_seconds gauge")); assert!(output.contains("# TYPE telemt_connections_total counter")); assert!(output.contains("# TYPE telemt_connections_bad_total counter")); @@ -408,12 +467,16 @@ mod tests { assert!(output.contains( "# TYPE telemt_me_writer_removed_unexpected_minus_restored_total gauge" )); + assert!(output.contains("# TYPE telemt_user_unique_ips_current gauge")); + assert!(output.contains("# TYPE telemt_user_unique_ips_limit gauge")); + assert!(output.contains("# TYPE telemt_user_unique_ips_utilization gauge")); } #[tokio::test] async fn test_endpoint_integration() { let stats = Arc::new(Stats::new()); let beobachten = Arc::new(BeobachtenStore::new()); + let tracker = UserIpTracker::new(); let mut config = ProxyConfig::default(); stats.increment_connects_all(); stats.increment_connects_all(); @@ -423,7 +486,7 @@ mod tests { .uri("/metrics") .body(()) .unwrap(); - let resp = handle(req, &stats, &beobachten, &config).unwrap(); + let resp = handle(req, &stats, &beobachten, &tracker, &config).await.unwrap(); assert_eq!(resp.status(), StatusCode::OK); let body = resp.into_body().collect().await.unwrap().to_bytes(); assert!(std::str::from_utf8(body.as_ref()).unwrap().contains("telemt_connections_total 3")); @@ -439,7 +502,9 @@ mod tests { .uri("/beobachten") .body(()) .unwrap(); - let resp_beob = handle(req_beob, &stats, &beobachten, &config).unwrap(); + let resp_beob = handle(req_beob, &stats, &beobachten, &tracker, &config) + .await + .unwrap(); assert_eq!(resp_beob.status(), StatusCode::OK); let body_beob = resp_beob.into_body().collect().await.unwrap().to_bytes(); let beob_text = std::str::from_utf8(body_beob.as_ref()).unwrap(); @@ -450,7 +515,9 @@ mod tests { .uri("/other") .body(()) .unwrap(); - let resp404 = handle(req404, &stats, &beobachten, &config).unwrap(); + let resp404 = handle(req404, &stats, &beobachten, &tracker, &config) + .await + .unwrap(); assert_eq!(resp404.status(), StatusCode::NOT_FOUND); } } diff --git a/src/network/dns_overrides.rs b/src/network/dns_overrides.rs new file mode 100644 index 0000000..447863a --- /dev/null +++ b/src/network/dns_overrides.rs @@ -0,0 +1,197 @@ +//! Runtime DNS overrides for `host:port` targets. + +use std::collections::HashMap; +use std::net::{IpAddr, Ipv6Addr, SocketAddr}; +use std::sync::{OnceLock, RwLock}; + +use crate::error::{ProxyError, Result}; + +type OverrideMap = HashMap<(String, u16), IpAddr>; + +static DNS_OVERRIDES: OnceLock> = OnceLock::new(); + +fn overrides_store() -> &'static RwLock { + DNS_OVERRIDES.get_or_init(|| RwLock::new(HashMap::new())) +} + +fn parse_ip_spec(ip_spec: &str) -> Result { + if ip_spec.starts_with('[') && ip_spec.ends_with(']') { + let inner = &ip_spec[1..ip_spec.len() - 1]; + let ipv6 = inner.parse::().map_err(|_| { + ProxyError::Config(format!( + "network.dns_overrides IPv6 override is invalid: '{ip_spec}'" + )) + })?; + return Ok(IpAddr::V6(ipv6)); + } + + let ip = ip_spec.parse::().map_err(|_| { + ProxyError::Config(format!( + "network.dns_overrides IP is invalid: '{ip_spec}'" + )) + })?; + if matches!(ip, IpAddr::V6(_)) { + return Err(ProxyError::Config(format!( + "network.dns_overrides IPv6 must be bracketed: '{ip_spec}'" + ))); + } + Ok(ip) +} + +fn parse_entry(entry: &str) -> Result<((String, u16), IpAddr)> { + let trimmed = entry.trim(); + if trimmed.is_empty() { + return Err(ProxyError::Config( + "network.dns_overrides entry cannot be empty".to_string(), + )); + } + + let first_sep = trimmed.find(':').ok_or_else(|| { + ProxyError::Config(format!( + "network.dns_overrides entry must use host:port:ip format: '{trimmed}'" + )) + })?; + let second_sep = trimmed[first_sep + 1..] + .find(':') + .map(|idx| first_sep + 1 + idx) + .ok_or_else(|| { + ProxyError::Config(format!( + "network.dns_overrides entry must use host:port:ip format: '{trimmed}'" + )) + })?; + + let host = trimmed[..first_sep].trim(); + let port_str = trimmed[first_sep + 1..second_sep].trim(); + let ip_str = trimmed[second_sep + 1..].trim(); + + if host.is_empty() { + return Err(ProxyError::Config(format!( + "network.dns_overrides host cannot be empty: '{trimmed}'" + ))); + } + if host.contains(':') { + return Err(ProxyError::Config(format!( + "network.dns_overrides host must be a domain name without ':' in this format: '{trimmed}'" + ))); + } + + let port = port_str.parse::().map_err(|_| { + ProxyError::Config(format!( + "network.dns_overrides port is invalid: '{trimmed}'" + )) + })?; + let ip = parse_ip_spec(ip_str)?; + + Ok(((host.to_ascii_lowercase(), port), ip)) +} + +fn parse_entries(entries: &[String]) -> Result { + let mut parsed = HashMap::new(); + for entry in entries { + let (key, ip) = parse_entry(entry)?; + parsed.insert(key, ip); + } + Ok(parsed) +} + +/// Validate `network.dns_overrides` entries without updating runtime state. +pub fn validate_entries(entries: &[String]) -> Result<()> { + let _ = parse_entries(entries)?; + Ok(()) +} + +/// Replace runtime DNS overrides with a new validated snapshot. +pub fn install_entries(entries: &[String]) -> Result<()> { + let parsed = parse_entries(entries)?; + let mut guard = overrides_store() + .write() + .map_err(|_| ProxyError::Config("network.dns_overrides runtime lock is poisoned".to_string()))?; + *guard = parsed; + Ok(()) +} + +/// Resolve a hostname override for `(host, port)` if present. +pub fn resolve(host: &str, port: u16) -> Option { + let key = (host.to_ascii_lowercase(), port); + overrides_store() + .read() + .ok() + .and_then(|guard| guard.get(&key).copied()) +} + +/// Resolve a hostname override and construct a socket address when present. +pub fn resolve_socket_addr(host: &str, port: u16) -> Option { + resolve(host, port).map(|ip| SocketAddr::new(ip, port)) +} + +/// Parse a runtime endpoint in `host:port` format. +/// +/// Supports: +/// - `example.com:443` +/// - `[2001:db8::1]:443` +pub fn split_host_port(endpoint: &str) -> Option<(String, u16)> { + if endpoint.starts_with('[') { + let bracket_end = endpoint.find(']')?; + if endpoint.as_bytes().get(bracket_end + 1) != Some(&b':') { + return None; + } + let host = endpoint[1..bracket_end].trim(); + let port = endpoint[bracket_end + 2..].trim().parse::().ok()?; + if host.is_empty() { + return None; + } + return Some((host.to_ascii_lowercase(), port)); + } + + let split_idx = endpoint.rfind(':')?; + let host = endpoint[..split_idx].trim(); + let port = endpoint[split_idx + 1..].trim().parse::().ok()?; + if host.is_empty() || host.contains(':') { + return None; + } + + Some((host.to_ascii_lowercase(), port)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn validate_accepts_ipv4_and_bracketed_ipv6() { + let entries = vec![ + "example.com:443:127.0.0.1".to_string(), + "example.net:8443:[2001:db8::10]".to_string(), + ]; + assert!(validate_entries(&entries).is_ok()); + } + + #[test] + fn validate_rejects_unbracketed_ipv6() { + let entries = vec!["example.net:443:2001:db8::10".to_string()]; + let err = validate_entries(&entries).unwrap_err().to_string(); + assert!(err.contains("must be bracketed")); + } + + #[test] + fn install_and_resolve_are_case_insensitive_for_host() { + let entries = vec!["MyPetrovich.ru:8443:127.0.0.1".to_string()]; + install_entries(&entries).unwrap(); + + let resolved = resolve("mypetrovich.ru", 8443); + assert_eq!(resolved, Some("127.0.0.1".parse().unwrap())); + } + + #[test] + fn split_host_port_parses_supported_shapes() { + assert_eq!( + split_host_port("example.com:443"), + Some(("example.com".to_string(), 443)) + ); + assert_eq!( + split_host_port("[2001:db8::1]:443"), + Some(("2001:db8::1".to_string(), 443)) + ); + assert_eq!(split_host_port("2001:db8::1:443"), None); + } +} diff --git a/src/network/mod.rs b/src/network/mod.rs index 78a1040..e57622d 100644 --- a/src/network/mod.rs +++ b/src/network/mod.rs @@ -1,3 +1,4 @@ +pub mod dns_overrides; pub mod probe; pub mod stun; diff --git a/src/network/probe.rs b/src/network/probe.rs index 6e84682..2ceeb2c 100644 --- a/src/network/probe.rs +++ b/src/network/probe.rs @@ -68,7 +68,7 @@ pub async fn run_probe( probe.ipv4_is_bogon = probe.detected_ipv4.map(is_bogon_v4).unwrap_or(false); probe.ipv6_is_bogon = probe.detected_ipv6.map(is_bogon_v6).unwrap_or(false); - let stun_res = if nat_probe { + let stun_res = if nat_probe && config.stun_use { let servers = collect_stun_servers(config); if servers.is_empty() { warn!("STUN probe is enabled but network.stun_servers is empty"); @@ -80,6 +80,9 @@ pub async fn run_probe( ) .await } + } else if nat_probe { + info!("STUN probe is disabled by network.stun_use=false"); + DualStunResult::default() } else { DualStunResult::default() }; diff --git a/src/network/stun.rs b/src/network/stun.rs index 5bda495..bb5a873 100644 --- a/src/network/stun.rs +++ b/src/network/stun.rs @@ -7,6 +7,7 @@ use tokio::net::{lookup_host, UdpSocket}; use tokio::time::{timeout, Duration, sleep}; use crate::error::{ProxyError, Result}; +use crate::network::dns_overrides::{resolve, split_host_port}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum IpFamily { @@ -198,6 +199,16 @@ async fn resolve_stun_addr(stun_addr: &str, family: IpFamily) -> Result Some(addr), + _ => None, + }); + } + let mut addrs = lookup_host(stun_addr) .await .map_err(|e| ProxyError::Proxy(format!("STUN resolve failed: {e}")))?; diff --git a/src/proxy/masking.rs b/src/proxy/masking.rs index d12cf41..8f19b40 100644 --- a/src/proxy/masking.rs +++ b/src/proxy/masking.rs @@ -10,6 +10,7 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncReadExt, AsyncWriteExt}; use tokio::time::timeout; use tracing::debug; use crate::config::ProxyConfig; +use crate::network::dns_overrides::resolve_socket_addr; use crate::stats::beobachten::BeobachtenStore; use crate::transport::proxy_protocol::{ProxyProtocolV1Builder, ProxyProtocolV2Builder}; @@ -115,8 +116,10 @@ where "Forwarding bad client to mask host" ); - // Connect to mask host - let mask_addr = format!("{}:{}", mask_host, mask_port); + // Apply runtime DNS override for mask target when configured. + let mask_addr = resolve_socket_addr(mask_host, mask_port) + .map(|addr| addr.to_string()) + .unwrap_or_else(|| format!("{}:{}", mask_host, mask_port)); let connect_result = timeout(MASK_TIMEOUT, TcpStream::connect(&mask_addr)).await; match connect_result { Ok(Ok(stream)) => { diff --git a/src/tls_front/fetcher.rs b/src/tls_front/fetcher.rs index 561d4cc..ba80332 100644 --- a/src/tls_front/fetcher.rs +++ b/src/tls_front/fetcher.rs @@ -18,6 +18,7 @@ use x509_parser::prelude::FromDer; use x509_parser::certificate::X509Certificate; use crate::crypto::SecureRandom; +use crate::network::dns_overrides::resolve_socket_addr; use crate::protocol::constants::{TLS_RECORD_APPLICATION, TLS_RECORD_HANDSHAKE}; use crate::transport::proxy_protocol::{ProxyProtocolV1Builder, ProxyProtocolV2Builder}; use crate::tls_front::types::{ @@ -333,6 +334,17 @@ fn u24_bytes(value: usize) -> Option<[u8; 3]> { ]) } +async fn connect_with_dns_override( + host: &str, + port: u16, + connect_timeout: Duration, +) -> Result { + if let Some(addr) = resolve_socket_addr(host, port) { + return Ok(timeout(connect_timeout, TcpStream::connect(addr)).await??); + } + Ok(timeout(connect_timeout, TcpStream::connect((host, port))).await??) +} + fn encode_tls13_certificate_message(cert_chain_der: &[Vec]) -> Option> { if cert_chain_der.is_empty() { return None; @@ -369,8 +381,7 @@ async fn fetch_via_raw_tls( connect_timeout: Duration, proxy_protocol: u8, ) -> Result { - let addr = format!("{host}:{port}"); - let mut stream = timeout(connect_timeout, TcpStream::connect(addr)).await??; + let mut stream = connect_with_dns_override(host, port, connect_timeout).await?; let rng = SecureRandom::new(); let client_hello = build_client_hello(sni, &rng); @@ -437,24 +448,31 @@ async fn fetch_via_rustls( ) -> Result { // rustls handshake path for certificate and basic negotiated metadata. let mut stream = if let Some(manager) = upstream { - // Resolve host to SocketAddr - if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await { + if let Some(addr) = resolve_socket_addr(host, port) { + match manager.connect(addr, None, None).await { + Ok(s) => s, + Err(e) => { + warn!(sni = %sni, error = %e, "Upstream connect failed, using direct connect"); + connect_with_dns_override(host, port, connect_timeout).await? + } + } + } else if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await { if let Some(addr) = addrs.find(|a| a.is_ipv4()) { match manager.connect(addr, None, None).await { Ok(s) => s, Err(e) => { warn!(sni = %sni, error = %e, "Upstream connect failed, using direct connect"); - timeout(connect_timeout, TcpStream::connect((host, port))).await?? + connect_with_dns_override(host, port, connect_timeout).await? } } } else { - timeout(connect_timeout, TcpStream::connect((host, port))).await?? + connect_with_dns_override(host, port, connect_timeout).await? } } else { - timeout(connect_timeout, TcpStream::connect((host, port))).await?? + connect_with_dns_override(host, port, connect_timeout).await? } } else { - timeout(connect_timeout, TcpStream::connect((host, port))).await?? + connect_with_dns_override(host, port, connect_timeout).await? }; if proxy_protocol > 0 { diff --git a/src/transport/upstream.rs b/src/transport/upstream.rs index e2198a8..a442597 100644 --- a/src/transport/upstream.rs +++ b/src/transport/upstream.rs @@ -17,6 +17,7 @@ use tracing::{debug, warn, info, trace}; use crate::config::{UpstreamConfig, UpstreamType}; use crate::error::{Result, ProxyError}; +use crate::network::dns_overrides::{resolve_socket_addr, split_host_port}; use crate::protocol::constants::{TG_DATACENTERS_V4, TG_DATACENTERS_V6, TG_DATACENTER_PORT}; use crate::transport::socket::{create_outgoing_socket_bound, resolve_interface_ip}; use crate::transport::socks::{connect_socks4, connect_socks5}; @@ -209,6 +210,31 @@ impl UpstreamManager { None } + async fn connect_hostname_with_dns_override( + address: &str, + connect_timeout: Duration, + ) -> Result { + if let Some((host, port)) = split_host_port(address) + && let Some(addr) = resolve_socket_addr(&host, port) + { + return match tokio::time::timeout(connect_timeout, TcpStream::connect(addr)).await { + Ok(Ok(stream)) => Ok(stream), + Ok(Err(e)) => Err(ProxyError::Io(e)), + Err(_) => Err(ProxyError::ConnectionTimeout { + addr: addr.to_string(), + }), + }; + } + + match tokio::time::timeout(connect_timeout, TcpStream::connect(address)).await { + Ok(Ok(stream)) => Ok(stream), + Ok(Err(e)) => Err(ProxyError::Io(e)), + Err(_) => Err(ProxyError::ConnectionTimeout { + addr: address.to_string(), + }), + } + } + /// Select upstream using latency-weighted random selection. async fn select_upstream(&self, dc_idx: Option, scope: Option<&str>) -> Option { let upstreams = self.upstreams.read().await; @@ -433,15 +459,7 @@ impl UpstreamManager { if interface.is_some() { warn!("SOCKS4 interface binding is not supported for hostname addresses, ignoring"); } - match tokio::time::timeout(connect_timeout, TcpStream::connect(address)).await { - Ok(Ok(stream)) => stream, - Ok(Err(e)) => return Err(ProxyError::Io(e)), - Err(_) => { - return Err(ProxyError::ConnectionTimeout { - addr: address.clone(), - }); - } - } + Self::connect_hostname_with_dns_override(address, connect_timeout).await? }; // replace socks user_id with config.selected_scope, if set @@ -503,15 +521,7 @@ impl UpstreamManager { if interface.is_some() { warn!("SOCKS5 interface binding is not supported for hostname addresses, ignoring"); } - match tokio::time::timeout(connect_timeout, TcpStream::connect(address)).await { - Ok(Ok(stream)) => stream, - Ok(Err(e)) => return Err(ProxyError::Io(e)), - Err(_) => { - return Err(ProxyError::ConnectionTimeout { - addr: address.clone(), - }); - } - } + Self::connect_hostname_with_dns_override(address, connect_timeout).await? }; debug!(config = ?config, "Socks5 connection"); From 3d9660f83eaaf003d60e4e00ee2833d9189af0a8 Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 01:20:17 +0300 Subject: [PATCH 02/10] Upstreams for ME + Egress-data from UM + ME-over-SOCKS + Bind-aware STUN --- src/main.rs | 1 + src/network/stun.rs | 27 +++- src/transport/middle_proxy/handshake.rs | 160 ++++++++++++++++++---- src/transport/middle_proxy/ping.rs | 4 +- src/transport/middle_proxy/pool.rs | 4 + src/transport/middle_proxy/pool_nat.rs | 57 +++++--- src/transport/middle_proxy/pool_writer.rs | 4 +- src/transport/mod.rs | 2 +- src/transport/socks.rs | 42 ++++-- src/transport/upstream.rs | 84 ++++++++++-- 10 files changed, 307 insertions(+), 78 deletions(-) diff --git a/src/main.rs b/src/main.rs index 7389117..b065d4e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -509,6 +509,7 @@ async fn main() -> std::result::Result<(), Box> { cfg_v6.map.clone(), cfg_v4.default_dc.or(cfg_v6.default_dc), decision.clone(), + Some(upstream_manager.clone()), rng.clone(), stats.clone(), config.general.me_keepalive_enabled, diff --git a/src/network/stun.rs b/src/network/stun.rs index bb5a873..c3a235f 100644 --- a/src/network/stun.rs +++ b/src/network/stun.rs @@ -41,16 +41,31 @@ pub async fn stun_probe_dual(stun_addr: &str) -> Result { } pub async fn stun_probe_family(stun_addr: &str, family: IpFamily) -> Result> { + stun_probe_family_with_bind(stun_addr, family, None).await +} + +pub async fn stun_probe_family_with_bind( + stun_addr: &str, + family: IpFamily, + bind_ip: Option, +) -> Result> { use rand::RngCore; - let bind_addr = match family { - IpFamily::V4 => "0.0.0.0:0", - IpFamily::V6 => "[::]:0", + let bind_addr = match (family, bind_ip) { + (IpFamily::V4, Some(IpAddr::V4(ip))) => SocketAddr::new(IpAddr::V4(ip), 0), + (IpFamily::V6, Some(IpAddr::V6(ip))) => SocketAddr::new(IpAddr::V6(ip), 0), + (IpFamily::V4, Some(IpAddr::V6(_))) | (IpFamily::V6, Some(IpAddr::V4(_))) => { + return Ok(None); + } + (IpFamily::V4, None) => SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0), + (IpFamily::V6, None) => SocketAddr::new(IpAddr::V6(Ipv6Addr::UNSPECIFIED), 0), }; - let socket = UdpSocket::bind(bind_addr) - .await - .map_err(|e| ProxyError::Proxy(format!("STUN bind failed: {e}")))?; + let socket = match UdpSocket::bind(bind_addr).await { + Ok(socket) => socket, + Err(_) if bind_ip.is_some() => return Ok(None), + Err(e) => return Err(ProxyError::Proxy(format!("STUN bind failed: {e}"))), + }; let target_addr = resolve_stun_addr(stun_addr, family).await?; if let Some(addr) = target_addr { diff --git a/src/transport/middle_proxy/handshake.rs b/src/transport/middle_proxy/handshake.rs index d9bcdde..988834a 100644 --- a/src/transport/middle_proxy/handshake.rs +++ b/src/transport/middle_proxy/handshake.rs @@ -17,10 +17,12 @@ use tracing::{debug, info, warn}; use crate::crypto::{SecureRandom, build_middleproxy_prekey, derive_middleproxy_keys, sha256}; use crate::error::{ProxyError, Result}; use crate::network::IpFamily; +use crate::network::probe::is_bogon; use crate::protocol::constants::{ ME_CONNECT_TIMEOUT_SECS, ME_HANDSHAKE_TIMEOUT_SECS, RPC_CRYPTO_AES_U32, RPC_HANDSHAKE_ERROR_U32, rpc_crypto_flags, }; +use crate::transport::{UpstreamEgressInfo, UpstreamRouteKind}; use super::codec::{ RpcChecksumMode, build_handshake_payload, build_nonce_payload, build_rpc_frame, @@ -43,33 +45,125 @@ pub(crate) struct HandshakeOutput { } impl MePool { - /// TCP connect with timeout + return RTT in milliseconds. - pub(crate) async fn connect_tcp(&self, addr: SocketAddr) -> Result<(TcpStream, f64)> { - let start = Instant::now(); - let connect_fut = async { - if addr.is_ipv6() - && let Some(v6) = self.detected_ipv6 - { - match TcpSocket::new_v6() { - Ok(sock) => { - if let Err(e) = sock.bind(SocketAddr::new(IpAddr::V6(v6), 0)) { - debug!(error = %e, bind_ip = %v6, "ME IPv6 bind failed, falling back to default bind"); - } else { - match sock.connect(addr).await { - Ok(stream) => return Ok(stream), - Err(e) => debug!(error = %e, target = %addr, "ME IPv6 bound connect failed, retrying default connect"), - } - } + async fn resolve_dc_idx_for_endpoint(&self, addr: SocketAddr) -> Option { + if addr.is_ipv4() { + let map = self.proxy_map_v4.read().await; + for (dc, addrs) in map.iter() { + if addrs + .iter() + .any(|(ip, port)| SocketAddr::new(*ip, *port) == addr) + { + let abs_dc = dc.abs(); + if abs_dc > 0 + && let Ok(dc_idx) = i16::try_from(abs_dc) + { + return Some(dc_idx); } - Err(e) => debug!(error = %e, "ME IPv6 socket creation failed, falling back to default connect"), } } - TcpStream::connect(addr).await + } else { + let map = self.proxy_map_v6.read().await; + for (dc, addrs) in map.iter() { + if addrs + .iter() + .any(|(ip, port)| SocketAddr::new(*ip, *port) == addr) + { + let abs_dc = dc.abs(); + if abs_dc > 0 + && let Ok(dc_idx) = i16::try_from(abs_dc) + { + return Some(dc_idx); + } + } + } + } + None + } + + fn direct_bind_ip_for_stun( + family: IpFamily, + upstream_egress: Option, + ) -> Option { + let info = upstream_egress?; + if info.route_kind != UpstreamRouteKind::Direct { + return None; + } + match (family, info.direct_bind_ip) { + (IpFamily::V4, Some(IpAddr::V4(ip))) => Some(IpAddr::V4(ip)), + (IpFamily::V6, Some(IpAddr::V6(ip))) => Some(IpAddr::V6(ip)), + _ => None, + } + } + + fn select_socks_bound_addr( + family: IpFamily, + upstream_egress: Option, + ) -> Option { + let info = upstream_egress?; + if !matches!( + info.route_kind, + UpstreamRouteKind::Socks4 | UpstreamRouteKind::Socks5 + ) { + return None; + } + let bound = info.socks_bound_addr?; + let family_matches = matches!( + (family, bound.ip()), + (IpFamily::V4, IpAddr::V4(_)) | (IpFamily::V6, IpAddr::V6(_)) + ); + if !family_matches || is_bogon(bound.ip()) || bound.ip().is_unspecified() { + return None; + } + Some(bound) + } + + /// TCP connect with timeout + return RTT in milliseconds. + pub(crate) async fn connect_tcp( + &self, + addr: SocketAddr, + ) -> Result<(TcpStream, f64, Option)> { + let start = Instant::now(); + let (stream, upstream_egress) = if let Some(upstream) = &self.upstream { + let dc_idx = self.resolve_dc_idx_for_endpoint(addr).await; + let (stream, egress) = timeout( + Duration::from_secs(ME_CONNECT_TIMEOUT_SECS), + upstream.connect_with_details(addr, dc_idx, None), + ) + .await + .map_err(|_| ProxyError::ConnectionTimeout { + addr: addr.to_string(), + })??; + (stream, Some(egress)) + } else { + let connect_fut = async { + if addr.is_ipv6() + && let Some(v6) = self.detected_ipv6 + { + match TcpSocket::new_v6() { + Ok(sock) => { + if let Err(e) = sock.bind(SocketAddr::new(IpAddr::V6(v6), 0)) { + debug!(error = %e, bind_ip = %v6, "ME IPv6 bind failed, falling back to default bind"); + } else { + match sock.connect(addr).await { + Ok(stream) => return Ok(stream), + Err(e) => debug!(error = %e, target = %addr, "ME IPv6 bound connect failed, retrying default connect"), + } + } + } + Err(e) => debug!(error = %e, "ME IPv6 socket creation failed, falling back to default connect"), + } + } + TcpStream::connect(addr).await + }; + + let stream = timeout(Duration::from_secs(ME_CONNECT_TIMEOUT_SECS), connect_fut) + .await + .map_err(|_| ProxyError::ConnectionTimeout { + addr: addr.to_string(), + })??; + (stream, None) }; - let stream = timeout(Duration::from_secs(ME_CONNECT_TIMEOUT_SECS), connect_fut) - .await - .map_err(|_| ProxyError::ConnectionTimeout { addr: addr.to_string() })??; let connect_ms = start.elapsed().as_secs_f64() * 1000.0; stream.set_nodelay(true).ok(); if let Err(e) = Self::configure_keepalive(&stream) { @@ -79,7 +173,7 @@ impl MePool { if let Err(e) = Self::configure_user_timeout(stream.as_raw_fd()) { warn!(error = %e, "ME TCP_USER_TIMEOUT setup failed"); } - Ok((stream, connect_ms)) + Ok((stream, connect_ms, upstream_egress)) } fn configure_keepalive(stream: &TcpStream) -> std::io::Result<()> { @@ -117,12 +211,14 @@ impl MePool { &self, stream: TcpStream, addr: SocketAddr, + upstream_egress: Option, rng: &SecureRandom, ) -> Result { let hs_start = Instant::now(); let local_addr = stream.local_addr().map_err(ProxyError::Io)?; - let peer_addr = stream.peer_addr().map_err(ProxyError::Io)?; + let transport_peer_addr = stream.peer_addr().map_err(ProxyError::Io)?; + let peer_addr = addr; let _ = self.maybe_detect_nat_ip(local_addr.ip()).await; let family = if local_addr.ip().is_ipv4() { @@ -130,8 +226,12 @@ impl MePool { } else { IpFamily::V6 }; - let reflected = if self.nat_probe { - self.maybe_reflect_public_addr(family).await + let socks_bound_addr = Self::select_socks_bound_addr(family, upstream_egress); + let reflected = if let Some(bound) = socks_bound_addr { + Some(bound) + } else if self.nat_probe { + let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress); + self.maybe_reflect_public_addr(family, bind_ip).await } else { None }; @@ -197,7 +297,9 @@ impl MePool { %local_addr_nat, reflected_ip = reflected.map(|r| r.ip()).as_ref().map(ToString::to_string), %peer_addr, + %transport_peer_addr, %peer_addr_nat, + socks_bound_addr = socks_bound_addr.map(|v| v.to_string()), key_selector = format_args!("0x{ks:08x}"), crypto_schema = format_args!("0x{schema:08x}"), skew_secs = skew, @@ -206,7 +308,11 @@ impl MePool { let ts_bytes = crypto_ts.to_le_bytes(); let server_port_bytes = peer_addr_nat.port().to_le_bytes(); - let client_port_bytes = local_addr_nat.port().to_le_bytes(); + let client_port_for_kdf = socks_bound_addr + .map(|bound| bound.port()) + .filter(|port| *port != 0) + .unwrap_or(local_addr_nat.port()); + let client_port_bytes = client_port_for_kdf.to_le_bytes(); let server_ip = extract_ip_material(peer_addr_nat); let client_ip = extract_ip_material(local_addr_nat); diff --git a/src/transport/middle_proxy/ping.rs b/src/transport/middle_proxy/ping.rs index a1dd1e6..aae11e6 100644 --- a/src/transport/middle_proxy/ping.rs +++ b/src/transport/middle_proxy/ping.rs @@ -122,9 +122,9 @@ pub async fn run_me_ping(pool: &Arc, rng: &SecureRandom) -> Vec { + Ok((stream, conn_rtt, upstream_egress)) => { connect_ms = Some(conn_rtt); - match pool.handshake_only(stream, addr, rng).await { + match pool.handshake_only(stream, addr, upstream_egress, rng).await { Ok(hs) => { handshake_ms = Some(hs.handshake_ms); // drop halves to close diff --git a/src/transport/middle_proxy/pool.rs b/src/transport/middle_proxy/pool.rs index 1e43628..d2e8fa4 100644 --- a/src/transport/middle_proxy/pool.rs +++ b/src/transport/middle_proxy/pool.rs @@ -10,6 +10,7 @@ use tokio_util::sync::CancellationToken; use crate::crypto::SecureRandom; use crate::network::IpFamily; use crate::network::probe::NetworkDecision; +use crate::transport::UpstreamManager; use super::ConnRegistry; use super::codec::WriterCommand; @@ -33,6 +34,7 @@ pub struct MePool { pub(super) writers: Arc>>, pub(super) rr: AtomicU64, pub(super) decision: NetworkDecision, + pub(super) upstream: Option>, pub(super) rng: Arc, pub(super) proxy_tag: Option>, pub(super) proxy_secret: Arc>>, @@ -121,6 +123,7 @@ impl MePool { proxy_map_v6: HashMap>, default_dc: Option, decision: NetworkDecision, + upstream: Option>, rng: Arc, stats: Arc, me_keepalive_enabled: bool, @@ -148,6 +151,7 @@ impl MePool { writers: Arc::new(RwLock::new(Vec::new())), rr: AtomicU64::new(0), decision, + upstream, rng, proxy_tag, proxy_secret: Arc::new(RwLock::new(proxy_secret)), diff --git a/src/transport/middle_proxy/pool_nat.rs b/src/transport/middle_proxy/pool_nat.rs index 7141236..bfcb0e2 100644 --- a/src/transport/middle_proxy/pool_nat.rs +++ b/src/transport/middle_proxy/pool_nat.rs @@ -8,7 +8,7 @@ use tracing::{debug, info, warn}; use crate::error::{ProxyError, Result}; use crate::network::probe::is_bogon; -use crate::network::stun::{stun_probe_dual, IpFamily, StunProbeResult}; +use crate::network::stun::{stun_probe_dual, stun_probe_family_with_bind, IpFamily}; use super::MePool; use std::time::Instant; @@ -52,6 +52,7 @@ impl MePool { servers: &[String], family: IpFamily, attempt: u8, + bind_ip: Option, ) -> (Vec, Option) { let mut join_set = JoinSet::new(); let mut next_idx = 0usize; @@ -64,7 +65,11 @@ impl MePool { let stun_addr = servers[next_idx].clone(); next_idx += 1; join_set.spawn(async move { - let res = timeout(STUN_BATCH_TIMEOUT, stun_probe_dual(&stun_addr)).await; + let res = timeout( + STUN_BATCH_TIMEOUT, + stun_probe_family_with_bind(&stun_addr, family, bind_ip), + ) + .await; (stun_addr, res) }); } @@ -74,12 +79,7 @@ impl MePool { }; match task { - Ok((stun_addr, Ok(Ok(res)))) => { - let picked: Option = match family { - IpFamily::V4 => res.v4, - IpFamily::V6 => res.v6, - }; - + Ok((stun_addr, Ok(Ok(picked)))) => { if let Some(result) = picked { live_servers.push(stun_addr.clone()); let entry = best_by_ip @@ -207,10 +207,21 @@ impl MePool { pub(super) async fn maybe_reflect_public_addr( &self, family: IpFamily, + bind_ip: Option, ) -> Option { const STUN_CACHE_TTL: Duration = Duration::from_secs(600); + let use_shared_cache = bind_ip.is_none(); + if !use_shared_cache { + match (family, bind_ip) { + (IpFamily::V4, Some(IpAddr::V4(_))) + | (IpFamily::V6, Some(IpAddr::V6(_))) + | (_, None) => {} + _ => return None, + } + } // Backoff window - if let Some(until) = *self.stun_backoff_until.read().await + if use_shared_cache + && let Some(until) = *self.stun_backoff_until.read().await && Instant::now() < until { if let Ok(cache) = self.nat_reflection_cache.try_lock() { @@ -223,7 +234,9 @@ impl MePool { return None; } - if let Ok(mut cache) = self.nat_reflection_cache.try_lock() { + if use_shared_cache + && let Ok(mut cache) = self.nat_reflection_cache.try_lock() + { let slot = match family { IpFamily::V4 => &mut cache.v4, IpFamily::V6 => &mut cache.v6, @@ -235,7 +248,11 @@ impl MePool { } } - let attempt = self.nat_probe_attempts.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + let attempt = if use_shared_cache { + self.nat_probe_attempts.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + } else { + 0 + }; let configured_servers = self.configured_stun_servers(); let live_snapshot = self.nat_stun_live_servers.read().await.clone(); let primary_servers = if live_snapshot.is_empty() { @@ -245,12 +262,12 @@ impl MePool { }; let (mut live_servers, mut selected_reflected) = self - .probe_stun_batch_for_family(&primary_servers, family, attempt) + .probe_stun_batch_for_family(&primary_servers, family, attempt, bind_ip) .await; if selected_reflected.is_none() && !configured_servers.is_empty() && primary_servers != configured_servers { let (rediscovered_live, rediscovered_reflected) = self - .probe_stun_batch_for_family(&configured_servers, family, attempt) + .probe_stun_batch_for_family(&configured_servers, family, attempt, bind_ip) .await; live_servers = rediscovered_live; selected_reflected = rediscovered_reflected; @@ -264,14 +281,18 @@ impl MePool { } if let Some(reflected_addr) = selected_reflected { - self.nat_probe_attempts.store(0, std::sync::atomic::Ordering::Relaxed); + if use_shared_cache { + self.nat_probe_attempts.store(0, std::sync::atomic::Ordering::Relaxed); + } info!( family = ?family, live_servers = live_server_count, "STUN-Quorum reached, IP: {}", reflected_addr.ip() ); - if let Ok(mut cache) = self.nat_reflection_cache.try_lock() { + if use_shared_cache + && let Ok(mut cache) = self.nat_reflection_cache.try_lock() + { let slot = match family { IpFamily::V4 => &mut cache.v4, IpFamily::V6 => &mut cache.v6, @@ -281,8 +302,10 @@ impl MePool { return Some(reflected_addr); } - let backoff = Duration::from_secs(60 * 2u64.pow((attempt as u32).min(6))); - *self.stun_backoff_until.write().await = Some(Instant::now() + backoff); + if use_shared_cache { + let backoff = Duration::from_secs(60 * 2u64.pow((attempt as u32).min(6))); + *self.stun_backoff_until.write().await = Some(Instant::now() + backoff); + } None } } diff --git a/src/transport/middle_proxy/pool_writer.rs b/src/transport/middle_proxy/pool_writer.rs index 942ddaf..28f5538 100644 --- a/src/transport/middle_proxy/pool_writer.rs +++ b/src/transport/middle_proxy/pool_writer.rs @@ -47,8 +47,8 @@ impl MePool { return Err(ProxyError::Proxy("proxy-secret too short for ME auth".into())); } - let (stream, _connect_ms) = self.connect_tcp(addr).await?; - let hs = self.handshake_only(stream, addr, rng).await?; + let (stream, _connect_ms, upstream_egress) = self.connect_tcp(addr).await?; + let hs = self.handshake_only(stream, addr, upstream_egress, rng).await?; let writer_id = self.next_writer_id.fetch_add(1, Ordering::Relaxed); let generation = self.current_generation(); diff --git a/src/transport/mod.rs b/src/transport/mod.rs index ead0565..cba5465 100644 --- a/src/transport/mod.rs +++ b/src/transport/mod.rs @@ -14,5 +14,5 @@ pub use socket::*; #[allow(unused_imports)] pub use socks::*; #[allow(unused_imports)] -pub use upstream::{DcPingResult, StartupPingResult, UpstreamManager}; +pub use upstream::{DcPingResult, StartupPingResult, UpstreamEgressInfo, UpstreamManager, UpstreamRouteKind}; pub mod middle_proxy; diff --git a/src/transport/socks.rs b/src/transport/socks.rs index 8196b52..5369787 100644 --- a/src/transport/socks.rs +++ b/src/transport/socks.rs @@ -5,11 +5,16 @@ use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::TcpStream; use crate::error::{ProxyError, Result}; +#[derive(Debug, Clone, Copy)] +pub struct SocksBoundAddr { + pub addr: SocketAddr, +} + pub async fn connect_socks4( stream: &mut TcpStream, target: SocketAddr, user_id: Option<&str>, -) -> Result<()> { +) -> Result { let ip = match target.ip() { IpAddr::V4(ip) => ip, IpAddr::V6(_) => return Err(ProxyError::Proxy("SOCKS4 does not support IPv6".to_string())), @@ -36,8 +41,13 @@ pub async fn connect_socks4( if resp[1] != 90 { return Err(ProxyError::Proxy(format!("SOCKS4 request rejected: code {}", resp[1]))); } - - Ok(()) + + let bound_port = u16::from_be_bytes([resp[2], resp[3]]); + let bound_ip = IpAddr::from([resp[4], resp[5], resp[6], resp[7]]); + + Ok(SocksBoundAddr { + addr: SocketAddr::new(bound_ip, bound_port), + }) } pub async fn connect_socks5( @@ -45,7 +55,7 @@ pub async fn connect_socks5( target: SocketAddr, username: Option<&str>, password: Option<&str>, -) -> Result<()> { +) -> Result { // 1. Auth negotiation // VER (1) | NMETHODS (1) | METHODS (variable) let mut methods = vec![0u8]; // No auth @@ -122,24 +132,36 @@ pub async fn connect_socks5( return Err(ProxyError::Proxy(format!("SOCKS5 request failed: code {}", head[1]))); } - // Skip address part of response - match head[3] { + // Parse bound address from response. + let bound_addr = match head[3] { 1 => { // IPv4 let mut addr = [0u8; 4 + 2]; stream.read_exact(&mut addr).await.map_err(ProxyError::Io)?; + let ip = IpAddr::from([addr[0], addr[1], addr[2], addr[3]]); + let port = u16::from_be_bytes([addr[4], addr[5]]); + SocketAddr::new(ip, port) }, 3 => { // Domain let mut len = [0u8; 1]; stream.read_exact(&mut len).await.map_err(ProxyError::Io)?; let mut addr = vec![0u8; len[0] as usize + 2]; stream.read_exact(&mut addr).await.map_err(ProxyError::Io)?; + // Domain-bound response is not useful for KDF IP material. + let port_pos = addr.len().saturating_sub(2); + let port = u16::from_be_bytes([addr[port_pos], addr[port_pos + 1]]); + SocketAddr::new(IpAddr::from([0, 0, 0, 0]), port) }, 4 => { // IPv6 let mut addr = [0u8; 16 + 2]; stream.read_exact(&mut addr).await.map_err(ProxyError::Io)?; + let ip = IpAddr::from(<[u8; 16]>::try_from(&addr[..16]).map_err(|_| { + ProxyError::Proxy("Invalid SOCKS5 IPv6 bound address".to_string()) + })?); + let port = u16::from_be_bytes([addr[16], addr[17]]); + SocketAddr::new(ip, port) }, _ => return Err(ProxyError::Proxy("Invalid address type in SOCKS5 response".to_string())), - } - - Ok(()) -} \ No newline at end of file + }; + + Ok(SocksBoundAddr { addr: bound_addr }) +} diff --git a/src/transport/upstream.rs b/src/transport/upstream.rs index a442597..eff05b8 100644 --- a/src/transport/upstream.rs +++ b/src/transport/upstream.rs @@ -151,6 +151,21 @@ pub struct StartupPingResult { pub both_available: bool, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UpstreamRouteKind { + Direct, + Socks4, + Socks5, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct UpstreamEgressInfo { + pub route_kind: UpstreamRouteKind, + pub local_addr: Option, + pub direct_bind_ip: Option, + pub socks_bound_addr: Option, +} + // ============= Upstream Manager ============= #[derive(Clone)] @@ -316,6 +331,17 @@ impl UpstreamManager { /// Connect to target through a selected upstream. pub async fn connect(&self, target: SocketAddr, dc_idx: Option, scope: Option<&str>) -> Result { + let (stream, _) = self.connect_with_details(target, dc_idx, scope).await?; + Ok(stream) + } + + /// Connect to target through a selected upstream and return egress details. + pub async fn connect_with_details( + &self, + target: SocketAddr, + dc_idx: Option, + scope: Option<&str>, + ) -> Result<(TcpStream, UpstreamEgressInfo)> { let idx = self.select_upstream(dc_idx, scope).await .ok_or_else(|| ProxyError::Config("No upstreams available".to_string()))?; @@ -337,7 +363,7 @@ impl UpstreamManager { }; match self.connect_via_upstream(&upstream, target, bind_rr).await { - Ok(stream) => { + Ok((stream, egress)) => { let rtt_ms = start.elapsed().as_secs_f64() * 1000.0; let mut guard = self.upstreams.write().await; if let Some(u) = guard.get_mut(idx) { @@ -351,7 +377,7 @@ impl UpstreamManager { u.dc_latency[di].update(rtt_ms); } } - Ok(stream) + Ok((stream, egress)) }, Err(e) => { let mut guard = self.upstreams.write().await; @@ -373,7 +399,7 @@ impl UpstreamManager { config: &UpstreamConfig, target: SocketAddr, bind_rr: Option>, - ) -> Result { + ) -> Result<(TcpStream, UpstreamEgressInfo)> { match &config.upstream_type { UpstreamType::Direct { interface, bind_addresses } => { let bind_ip = Self::resolve_bind_address( @@ -414,7 +440,16 @@ impl UpstreamManager { return Err(ProxyError::Io(e)); } - Ok(stream) + let local_addr = stream.local_addr().ok(); + Ok(( + stream, + UpstreamEgressInfo { + route_kind: UpstreamRouteKind::Direct, + local_addr, + direct_bind_ip: bind_ip, + socks_bound_addr: None, + }, + )) }, UpstreamType::Socks4 { address, interface, user_id } => { let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS); @@ -467,16 +502,30 @@ impl UpstreamManager { .filter(|s| !s.is_empty()); let _user_id: Option<&str> = scope.or(user_id.as_deref()); - match tokio::time::timeout(connect_timeout, connect_socks4(&mut stream, target, _user_id)).await { - Ok(Ok(())) => {} + let bound = match tokio::time::timeout( + connect_timeout, + connect_socks4(&mut stream, target, _user_id), + ) + .await + { + Ok(Ok(bound)) => bound, Ok(Err(e)) => return Err(e), Err(_) => { return Err(ProxyError::ConnectionTimeout { addr: target.to_string(), }); } - } - Ok(stream) + }; + let local_addr = stream.local_addr().ok(); + Ok(( + stream, + UpstreamEgressInfo { + route_kind: UpstreamRouteKind::Socks4, + local_addr, + direct_bind_ip: None, + socks_bound_addr: Some(bound.addr), + }, + )) }, UpstreamType::Socks5 { address, interface, username, password } => { let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS); @@ -531,21 +580,30 @@ impl UpstreamManager { let _username: Option<&str> = scope.or(username.as_deref()); let _password: Option<&str> = scope.or(password.as_deref()); - match tokio::time::timeout( + let bound = match tokio::time::timeout( connect_timeout, connect_socks5(&mut stream, target, _username, _password), ) .await { - Ok(Ok(())) => {} + Ok(Ok(bound)) => bound, Ok(Err(e)) => return Err(e), Err(_) => { return Err(ProxyError::ConnectionTimeout { addr: target.to_string(), }); } - } - Ok(stream) + }; + let local_addr = stream.local_addr().ok(); + Ok(( + stream, + UpstreamEgressInfo { + route_kind: UpstreamRouteKind::Socks5, + local_addr, + direct_bind_ip: None, + socks_bound_addr: Some(bound.addr), + }, + )) }, } } @@ -777,7 +835,7 @@ impl UpstreamManager { target: SocketAddr, ) -> Result { let start = Instant::now(); - let _stream = self.connect_via_upstream(config, target, bind_rr).await?; + let _ = self.connect_via_upstream(config, target, bind_rr).await?; Ok(start.elapsed().as_secs_f64() * 1000.0) } From 6b8aa7270ecb18c54836f53bc6612d88cedee9be Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 01:54:29 +0300 Subject: [PATCH 03/10] Bind_addresses prio over interfaces --- src/transport/upstream.rs | 79 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/src/transport/upstream.rs b/src/transport/upstream.rs index eff05b8..edcf476 100644 --- a/src/transport/upstream.rs +++ b/src/transport/upstream.rs @@ -185,21 +185,82 @@ impl UpstreamManager { } } + #[cfg(unix)] + fn resolve_interface_addrs(name: &str, want_ipv6: bool) -> Vec { + use nix::ifaddrs::getifaddrs; + + let mut out = Vec::new(); + if let Ok(addrs) = getifaddrs() { + for iface in addrs { + if iface.interface_name != name { + continue; + } + if let Some(address) = iface.address { + if let Some(v4) = address.as_sockaddr_in() { + if !want_ipv6 { + out.push(IpAddr::V4(v4.ip())); + } + } else if let Some(v6) = address.as_sockaddr_in6() + && want_ipv6 + { + out.push(IpAddr::V6(v6.ip())); + } + } + } + } + out.sort_unstable(); + out.dedup(); + out + } + fn resolve_bind_address( interface: &Option, bind_addresses: &Option>, target: SocketAddr, rr: Option<&AtomicUsize>, + validate_ip_on_interface: bool, ) -> Option { let want_ipv6 = target.is_ipv6(); if let Some(addrs) = bind_addresses { - let candidates: Vec = addrs + let mut candidates: Vec = addrs .iter() .filter_map(|s| s.parse::().ok()) .filter(|ip| ip.is_ipv6() == want_ipv6) .collect(); + // Explicit bind IP has strict priority over interface auto-selection. + if validate_ip_on_interface + && let Some(iface) = interface + && iface.parse::().is_err() + { + #[cfg(unix)] + { + let iface_addrs = Self::resolve_interface_addrs(iface, want_ipv6); + if !iface_addrs.is_empty() { + candidates.retain(|ip| { + let ok = iface_addrs.contains(ip); + if !ok { + warn!( + interface = %iface, + bind_ip = %ip, + target = %target, + "Configured bind address is not assigned to interface" + ); + } + ok + }); + } else if !candidates.is_empty() { + warn!( + interface = %iface, + target = %target, + "Configured interface has no addresses for target family; falling back to direct connect without bind" + ); + candidates.clear(); + } + } + } + if !candidates.is_empty() { if let Some(counter) = rr { let idx = counter.fetch_add(1, Ordering::Relaxed) % candidates.len(); @@ -207,6 +268,19 @@ impl UpstreamManager { } return candidates.first().copied(); } + + if validate_ip_on_interface + && interface + .as_ref() + .is_some_and(|iface| iface.parse::().is_err()) + { + warn!( + interface = interface.as_deref().unwrap_or(""), + target = %target, + "No valid bind_addresses left for interface; falling back to direct connect without bind" + ); + return None; + } } if let Some(iface) = interface { @@ -407,6 +481,7 @@ impl UpstreamManager { bind_addresses, target, bind_rr.as_deref(), + true, ); let socket = create_outgoing_socket_bound(target, bind_ip)?; @@ -461,6 +536,7 @@ impl UpstreamManager { &None, proxy_addr, bind_rr.as_deref(), + false, ); let socket = create_outgoing_socket_bound(proxy_addr, bind_ip)?; @@ -537,6 +613,7 @@ impl UpstreamManager { &None, proxy_addr, bind_rr.as_deref(), + false, ); let socket = create_outgoing_socket_bound(proxy_addr, bind_ip)?; From e0d5561095d7a3d990ec2c26e9c1933cba59643b Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 02:19:19 +0300 Subject: [PATCH 04/10] TUNING.md --- docs/TUNING.de.md | 219 ++++++++++++++++++++++++++++++++++++++++++++++ docs/TUNING.en.md | 219 ++++++++++++++++++++++++++++++++++++++++++++++ docs/TUNING.ru.md | 219 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 657 insertions(+) create mode 100644 docs/TUNING.de.md create mode 100644 docs/TUNING.en.md create mode 100644 docs/TUNING.ru.md diff --git a/docs/TUNING.de.md b/docs/TUNING.de.md new file mode 100644 index 0000000..8c3c950 --- /dev/null +++ b/docs/TUNING.de.md @@ -0,0 +1,219 @@ +# Telemt Tuning-Leitfaden: Middle-End und Upstreams + +Dieses Dokument beschreibt das aktuelle Laufzeitverhalten für Middle-End (ME) und Upstream-Routing basierend auf: +- `src/config/types.rs` +- `src/config/defaults.rs` +- `src/config/load.rs` +- `src/transport/upstream.rs` + +Die unten angegebenen `Default`-Werte sind Code-Defaults (bei fehlendem Schlüssel), nicht zwingend die Werte aus `config.full.toml`. + +## Middle-End-Parameter + +### 1) ME-Grundmodus, NAT und STUN + +| Parameter | Typ | Default | Einschränkungen / Validierung | Laufzeiteffekt | Beispiel | +|---|---|---:|---|---|---| +| `general.use_middle_proxy` | `bool` | `true` | keine | Aktiviert den ME-Transportmodus. Bei `false` wird Direct-Modus verwendet. | `use_middle_proxy = true` | +| `general.proxy_secret_path` | `Option` | `"proxy-secret"` | Pfad kann `null` sein | Pfad zur Telegram-Infrastrukturdatei `proxy-secret`. | `proxy_secret_path = "proxy-secret"` | +| `general.middle_proxy_nat_ip` | `Option` | `null` | gültige IP bei gesetztem Wert | Manueller Override der öffentlichen NAT-IP für ME-Adressmaterial. | `middle_proxy_nat_ip = "203.0.113.10"` | +| `general.middle_proxy_nat_probe` | `bool` | `true` | wird auf `true` erzwungen, wenn `use_middle_proxy=true` | Aktiviert NAT-Probing für ME. | `middle_proxy_nat_probe = true` | +| `general.stun_nat_probe_concurrency` | `usize` | `8` | muss `> 0` sein | Maximale parallele STUN-Probes während NAT-Erkennung. | `stun_nat_probe_concurrency = 16` | +| `network.stun_use` | `bool` | `true` | keine | Globaler STUN-Schalter. Bei `false` wird STUN deaktiviert. | `stun_use = true` | +| `network.stun_servers` | `Vec` | integrierter öffentlicher Pool | Duplikate/leer werden entfernt | Primäre STUN-Serverliste für NAT/Public-Endpoint-Erkennung. | `stun_servers = ["stun1.l.google.com:19302"]` | +| `network.stun_tcp_fallback` | `bool` | `true` | keine | Aktiviert TCP-Fallback, wenn UDP-STUN blockiert ist. | `stun_tcp_fallback = true` | +| `network.http_ip_detect_urls` | `Vec` | `ifconfig.me` + `api.ipify.org` | keine | HTTP-Fallback zur öffentlichen IPv4-Erkennung, falls STUN ausfällt. | `http_ip_detect_urls = ["https://api.ipify.org"]` | +| `general.stun_iface_mismatch_ignore` | `bool` | `false` | keine | Reserviertes Feld in der aktuellen Revision (derzeit kein aktiver Runtime-Verbrauch). | `stun_iface_mismatch_ignore = false` | +| `timeouts.me_one_retry` | `u8` | `12` | keine | Anzahl schneller Reconnect-Versuche bei Single-Endpoint-DC-Fällen. | `me_one_retry = 6` | +| `timeouts.me_one_timeout_ms` | `u64` | `1200` | keine | Timeout pro schnellem Einzelversuch (ms). | `me_one_timeout_ms = 1500` | + +### 2) Poolgröße, Keepalive und Reconnect-Policy + +| Parameter | Typ | Default | Einschränkungen / Validierung | Laufzeiteffekt | Beispiel | +|---|---|---:|---|---|---| +| `general.middle_proxy_pool_size` | `usize` | `8` | keine | Zielgröße des aktiven ME-Writer-Pools. | `middle_proxy_pool_size = 12` | +| `general.middle_proxy_warm_standby` | `usize` | `16` | keine | Reserviertes Kompatibilitätsfeld in der aktuellen Revision (kein aktiver Runtime-Consumer). | `middle_proxy_warm_standby = 16` | +| `general.me_keepalive_enabled` | `bool` | `true` | keine | Aktiviert periodischen ME-Keepalive/Ping-Traffic. | `me_keepalive_enabled = true` | +| `general.me_keepalive_interval_secs` | `u64` | `25` | keine | Basisintervall für Keepalive (Sekunden). | `me_keepalive_interval_secs = 20` | +| `general.me_keepalive_jitter_secs` | `u64` | `5` | keine | Keepalive-Jitter zur Vermeidung synchroner Peaks. | `me_keepalive_jitter_secs = 3` | +| `general.me_keepalive_payload_random` | `bool` | `true` | keine | Randomisiert Keepalive-Payload-Bytes. | `me_keepalive_payload_random = true` | +| `general.me_warmup_stagger_enabled` | `bool` | `true` | keine | Aktiviert gestaffeltes Warmup zusätzlicher ME-Verbindungen. | `me_warmup_stagger_enabled = true` | +| `general.me_warmup_step_delay_ms` | `u64` | `500` | keine | Basisverzögerung zwischen Warmup-Schritten (ms). | `me_warmup_step_delay_ms = 300` | +| `general.me_warmup_step_jitter_ms` | `u64` | `300` | keine | Zusätzlicher zufälliger Warmup-Jitter (ms). | `me_warmup_step_jitter_ms = 200` | +| `general.me_reconnect_max_concurrent_per_dc` | `u32` | `8` | keine | Begrenzung paralleler Reconnect-Worker pro DC. | `me_reconnect_max_concurrent_per_dc = 12` | +| `general.me_reconnect_backoff_base_ms` | `u64` | `500` | keine | Initiales Reconnect-Backoff (ms). | `me_reconnect_backoff_base_ms = 250` | +| `general.me_reconnect_backoff_cap_ms` | `u64` | `30000` | keine | Maximales Reconnect-Backoff (ms). | `me_reconnect_backoff_cap_ms = 10000` | +| `general.me_reconnect_fast_retry_count` | `u32` | `16` | keine | Budget für Sofort-Retries vor längerem Backoff. | `me_reconnect_fast_retry_count = 8` | + +### 3) Reinit/Hardswap, Secret-Rotation und Degradation + +| Parameter | Typ | Default | Einschränkungen / Validierung | Laufzeiteffekt | Beispiel | +|---|---|---:|---|---|---| +| `general.hardswap` | `bool` | `true` | keine | Aktiviert generation-basierte Hardswap-Strategie für den ME-Pool. | `hardswap = true` | +| `general.me_reinit_every_secs` | `u64` | `900` | muss `> 0` sein | Intervall für periodische ME-Reinitialisierung. | `me_reinit_every_secs = 600` | +| `general.me_hardswap_warmup_delay_min_ms` | `u64` | `1000` | muss `<= me_hardswap_warmup_delay_max_ms` sein | Untere Grenze für Warmup-Dial-Abstände. | `me_hardswap_warmup_delay_min_ms = 500` | +| `general.me_hardswap_warmup_delay_max_ms` | `u64` | `2000` | muss `> 0` sein | Obere Grenze für Warmup-Dial-Abstände. | `me_hardswap_warmup_delay_max_ms = 1200` | +| `general.me_hardswap_warmup_extra_passes` | `u8` | `3` | Bereich `[0,10]` | Zusätzliche Warmup-Pässe nach dem Basispass. | `me_hardswap_warmup_extra_passes = 2` | +| `general.me_hardswap_warmup_pass_backoff_base_ms` | `u64` | `500` | muss `> 0` sein | Basis-Backoff zwischen zusätzlichen Warmup-Pässen. | `me_hardswap_warmup_pass_backoff_base_ms = 400` | +| `general.me_config_stable_snapshots` | `u8` | `2` | muss `> 0` sein | Anzahl identischer ME-Config-Snapshots vor Apply. | `me_config_stable_snapshots = 3` | +| `general.me_config_apply_cooldown_secs` | `u64` | `300` | keine | Cooldown zwischen angewendeten ME-Map-Updates. | `me_config_apply_cooldown_secs = 120` | +| `general.proxy_secret_stable_snapshots` | `u8` | `2` | muss `> 0` sein | Anzahl identischer Secret-Snapshots vor Rotation. | `proxy_secret_stable_snapshots = 3` | +| `general.proxy_secret_rotate_runtime` | `bool` | `true` | keine | Aktiviert Runtime-Rotation des Proxy-Secrets. | `proxy_secret_rotate_runtime = true` | +| `general.proxy_secret_len_max` | `usize` | `256` | Bereich `[32,4096]` | Obergrenze für akzeptierte Secret-Länge. | `proxy_secret_len_max = 512` | +| `general.update_every` | `Option` | `300` | wenn gesetzt: `> 0`; bei `null`: Legacy-Min-Fallback | Einheitliches Refresh-Intervall für ME-Config + Secret-Updater. | `update_every = 300` | +| `general.me_pool_drain_ttl_secs` | `u64` | `90` | keine | Zeitraum, in dem stale Writer noch als Fallback zulässig sind. | `me_pool_drain_ttl_secs = 120` | +| `general.me_pool_min_fresh_ratio` | `f32` | `0.8` | Bereich `[0.0,1.0]` | Coverage-Schwelle vor Drain der alten Generation. | `me_pool_min_fresh_ratio = 0.9` | +| `general.me_reinit_drain_timeout_secs` | `u64` | `120` | `0` = kein Force-Close; wenn `>0 && < TTL`, dann auf TTL angehoben | Force-Close-Timeout für draining stale Writer. | `me_reinit_drain_timeout_secs = 0` | +| `general.auto_degradation_enabled` | `bool` | `true` | keine | Reserviertes Kompatibilitätsfeld in aktueller Revision (kein aktiver Runtime-Consumer). | `auto_degradation_enabled = true` | +| `general.degradation_min_unavailable_dc_groups` | `u8` | `2` | keine | Reservierter Kompatibilitäts-Schwellenwert in aktueller Revision (kein aktiver Runtime-Consumer). | `degradation_min_unavailable_dc_groups = 2` | + +## Deprecated / Legacy Parameter + +| Parameter | Status | Ersatz | Aktuelles Verhalten | Migrationshinweis | +|---|---|---|---|---| +| `general.middle_proxy_nat_stun` | Deprecated | `network.stun_servers` | Wird nur dann in `network.stun_servers` gemerged, wenn `network.stun_servers` nicht explizit gesetzt ist. | Wert nach `network.stun_servers` verschieben, Legacy-Key entfernen. | +| `general.middle_proxy_nat_stun_servers` | Deprecated | `network.stun_servers` | Wird nur dann in `network.stun_servers` gemerged, wenn `network.stun_servers` nicht explizit gesetzt ist. | Werte nach `network.stun_servers` verschieben, Legacy-Key entfernen. | +| `general.proxy_secret_auto_reload_secs` | Deprecated | `general.update_every` | Nur aktiv, wenn `update_every = null` (Legacy-Fallback). | `general.update_every` explizit setzen, Legacy-Key entfernen. | +| `general.proxy_config_auto_reload_secs` | Deprecated | `general.update_every` | Nur aktiv, wenn `update_every = null` (Legacy-Fallback). | `general.update_every` explizit setzen, Legacy-Key entfernen. | + +## Wie Upstreams konfiguriert werden + +### Upstream-Schema + +| Feld | Gilt für | Typ | Pflicht | Default | Bedeutung | +|---|---|---|---|---|---| +| `[[upstreams]].type` | alle Upstreams | `"direct" \| "socks4" \| "socks5"` | ja | n/a | Upstream-Transporttyp. | +| `[[upstreams]].weight` | alle Upstreams | `u16` | nein | `1` | Basisgewicht für weighted-random Auswahl. | +| `[[upstreams]].enabled` | alle Upstreams | `bool` | nein | `true` | Deaktivierte Einträge werden beim Start ignoriert. | +| `[[upstreams]].scopes` | alle Upstreams | `String` | nein | `""` | Komma-separierte Scope-Tags für Request-Routing. | +| `interface` | `direct` | `Option` | nein | `null` | Interface-Name (z. B. `eth0`) oder lokale Literal-IP. | +| `bind_addresses` | `direct` | `Option>` | nein | `null` | Explizite Source-IP-Kandidaten (strikter Vorrang vor `interface`). | +| `address` | `socks4` | `String` | ja | n/a | SOCKS4-Server (`ip:port` oder `host:port`). | +| `interface` | `socks4` | `Option` | nein | `null` | Wird nur genutzt, wenn `address` als `ip:port` angegeben ist. | +| `user_id` | `socks4` | `Option` | nein | `null` | SOCKS4 User-ID für CONNECT. | +| `address` | `socks5` | `String` | ja | n/a | SOCKS5-Server (`ip:port` oder `host:port`). | +| `interface` | `socks5` | `Option` | nein | `null` | Wird nur genutzt, wenn `address` als `ip:port` angegeben ist. | +| `username` | `socks5` | `Option` | nein | `null` | SOCKS5 Benutzername. | +| `password` | `socks5` | `Option` | nein | `null` | SOCKS5 Passwort. | + +### Runtime-Regeln (wichtig) + +1. Wenn `[[upstreams]]` fehlt, injiziert der Loader einen Default-`direct`-Upstream. +2. Scope-Filterung basiert auf exaktem Token-Match: +- mit Request-Scope -> nur Einträge, deren `scopes` genau dieses Token enthält; +- ohne Request-Scope -> nur Einträge mit leerem `scopes`. +3. Unter healthy Upstreams erfolgt die Auswahl per weighted random: `weight * latency_factor`. +4. Gibt es im gefilterten Set keinen healthy Upstream, wird zufällig aus dem gefilterten Set gewählt. +5. `direct`-Bind-Auflösung: +- zuerst `bind_addresses` (nur gleiche IP-Familie wie Target); +- bei `interface` (Name) + `bind_addresses` wird jede Candidate-IP gegen Interface-Adressen validiert; +- ungültige Kandidaten werden mit `WARN` verworfen; +- bleiben keine gültigen Kandidaten übrig, erfolgt unbound direct connect (`bind_ip=None`); +- wenn `bind_addresses` nicht passt, wird `interface` verwendet (Literal-IP oder Interface-Primäradresse). +6. Für `socks4/socks5` mit Hostname-`address` ist Interface-Binding nicht unterstützt und wird mit Warnung ignoriert. +7. Runtime DNS Overrides werden für Hostname-Auflösung bei Upstream-Verbindungen genutzt. +8. Im ME-Modus wird der gewählte Upstream auch für den ME-TCP-Dial-Pfad verwendet. +9. Im ME-Modus ist bei `direct` mit bind/interface die STUN-Reflection bind-aware für KDF-Adressmaterial. +10. Im ME-Modus werden bei SOCKS-Upstream `BND.ADDR/BND.PORT` für KDF verwendet, wenn gültig/öffentlich und gleiche IP-Familie. + +## Upstream-Konfigurationsbeispiele + +### Beispiel 1: Minimaler direct Upstream + +```toml +[[upstreams]] +type = "direct" +weight = 1 +enabled = true +``` + +### Beispiel 2: direct mit Interface + expliziten bind IPs + +```toml +[[upstreams]] +type = "direct" +interface = "eth0" +bind_addresses = ["192.168.1.100", "192.168.1.101"] +weight = 3 +enabled = true +``` + +### Beispiel 3: SOCKS5 Upstream mit Authentifizierung + +```toml +[[upstreams]] +type = "socks5" +address = "198.51.100.30:1080" +username = "proxy-user" +password = "proxy-pass" +weight = 2 +enabled = true +``` + +### Beispiel 4: Gemischte Upstreams mit Scopes + +```toml +[[upstreams]] +type = "direct" +weight = 5 +enabled = true +scopes = "" + +[[upstreams]] +type = "socks5" +address = "203.0.113.40:1080" +username = "edge" +password = "edgepass" +weight = 3 +enabled = true +scopes = "premium,me" +``` + +### Beispiel 5: ME-orientiertes Tuning-Profil + +```toml +[general] +use_middle_proxy = true +proxy_secret_path = "proxy-secret" +middle_proxy_nat_probe = true +stun_nat_probe_concurrency = 16 +middle_proxy_pool_size = 12 +me_keepalive_enabled = true +me_keepalive_interval_secs = 20 +me_keepalive_jitter_secs = 4 +me_reconnect_max_concurrent_per_dc = 12 +me_reconnect_backoff_base_ms = 300 +me_reconnect_backoff_cap_ms = 10000 +me_reconnect_fast_retry_count = 10 +hardswap = true +me_reinit_every_secs = 600 +me_hardswap_warmup_delay_min_ms = 500 +me_hardswap_warmup_delay_max_ms = 1200 +me_hardswap_warmup_extra_passes = 2 +me_hardswap_warmup_pass_backoff_base_ms = 400 +me_config_stable_snapshots = 3 +me_config_apply_cooldown_secs = 120 +proxy_secret_stable_snapshots = 3 +proxy_secret_rotate_runtime = true +proxy_secret_len_max = 512 +update_every = 300 +me_pool_drain_ttl_secs = 120 +me_pool_min_fresh_ratio = 0.9 +me_reinit_drain_timeout_secs = 180 + +[timeouts] +me_one_retry = 8 +me_one_timeout_ms = 1200 + +[network] +stun_use = true +stun_tcp_fallback = true +stun_servers = [ + "stun1.l.google.com:19302", + "stun2.l.google.com:19302" +] +http_ip_detect_urls = [ + "https://api.ipify.org", + "https://ifconfig.me/ip" +] +``` diff --git a/docs/TUNING.en.md b/docs/TUNING.en.md new file mode 100644 index 0000000..1bbc439 --- /dev/null +++ b/docs/TUNING.en.md @@ -0,0 +1,219 @@ +# Telemt Tuning Guide: Middle-End and Upstreams + +This document describes the current runtime behavior for Middle-End (ME) and upstream routing based on: +- `src/config/types.rs` +- `src/config/defaults.rs` +- `src/config/load.rs` +- `src/transport/upstream.rs` + +Defaults below are code defaults (used when a key is omitted), not necessarily values from `config.full.toml` examples. + +## Middle-End Parameters + +### 1) Core ME mode, NAT, and STUN + +| Parameter | Type | Default | Constraints / validation | Runtime effect | Example | +|---|---|---:|---|---|---| +| `general.use_middle_proxy` | `bool` | `true` | none | Enables ME transport mode. If `false`, Direct mode is used. | `use_middle_proxy = true` | +| `general.proxy_secret_path` | `Option` | `"proxy-secret"` | path may be `null` | Path to Telegram infrastructure proxy-secret file. | `proxy_secret_path = "proxy-secret"` | +| `general.middle_proxy_nat_ip` | `Option` | `null` | valid IP when set | Manual public NAT IP override for ME address material. | `middle_proxy_nat_ip = "203.0.113.10"` | +| `general.middle_proxy_nat_probe` | `bool` | `true` | auto-forced to `true` when `use_middle_proxy=true` | Enables ME NAT probing. | `middle_proxy_nat_probe = true` | +| `general.stun_nat_probe_concurrency` | `usize` | `8` | must be `> 0` | Max parallel STUN probes during NAT discovery. | `stun_nat_probe_concurrency = 16` | +| `network.stun_use` | `bool` | `true` | none | Global STUN switch. If `false`, STUN probing is disabled. | `stun_use = true` | +| `network.stun_servers` | `Vec` | built-in public pool | deduplicated + empty values removed | Primary STUN server list for NAT/public endpoint discovery. | `stun_servers = ["stun1.l.google.com:19302"]` | +| `network.stun_tcp_fallback` | `bool` | `true` | none | Enables TCP fallback path when UDP STUN is blocked. | `stun_tcp_fallback = true` | +| `network.http_ip_detect_urls` | `Vec` | `ifconfig.me` + `api.ipify.org` | none | HTTP fallback for public IPv4 detection if STUN is unavailable. | `http_ip_detect_urls = ["https://api.ipify.org"]` | +| `general.stun_iface_mismatch_ignore` | `bool` | `false` | none | Reserved flag in current revision (not consumed by runtime path). | `stun_iface_mismatch_ignore = false` | +| `timeouts.me_one_retry` | `u8` | `12` | none | Fast reconnect attempts for single-endpoint DC cases. | `me_one_retry = 6` | +| `timeouts.me_one_timeout_ms` | `u64` | `1200` | none | Timeout per quick single-endpoint attempt (ms). | `me_one_timeout_ms = 1500` | + +### 2) Pool size, keepalive, and reconnect policy + +| Parameter | Type | Default | Constraints / validation | Runtime effect | Example | +|---|---|---:|---|---|---| +| `general.middle_proxy_pool_size` | `usize` | `8` | none | Target active ME writer pool size. | `middle_proxy_pool_size = 12` | +| `general.middle_proxy_warm_standby` | `usize` | `16` | none | Reserved compatibility field in current revision (no active runtime consumer). | `middle_proxy_warm_standby = 16` | +| `general.me_keepalive_enabled` | `bool` | `true` | none | Enables periodic ME keepalive/ping traffic. | `me_keepalive_enabled = true` | +| `general.me_keepalive_interval_secs` | `u64` | `25` | none | Base keepalive interval (seconds). | `me_keepalive_interval_secs = 20` | +| `general.me_keepalive_jitter_secs` | `u64` | `5` | none | Keepalive jitter to avoid synchronization bursts. | `me_keepalive_jitter_secs = 3` | +| `general.me_keepalive_payload_random` | `bool` | `true` | none | Randomizes keepalive payload bytes. | `me_keepalive_payload_random = true` | +| `general.me_warmup_stagger_enabled` | `bool` | `true` | none | Staggers extra ME warmup dials to avoid spikes. | `me_warmup_stagger_enabled = true` | +| `general.me_warmup_step_delay_ms` | `u64` | `500` | none | Base delay between warmup dial steps (ms). | `me_warmup_step_delay_ms = 300` | +| `general.me_warmup_step_jitter_ms` | `u64` | `300` | none | Additional random delay for warmup steps (ms). | `me_warmup_step_jitter_ms = 200` | +| `general.me_reconnect_max_concurrent_per_dc` | `u32` | `8` | none | Limits concurrent reconnect workers per DC in health recovery. | `me_reconnect_max_concurrent_per_dc = 12` | +| `general.me_reconnect_backoff_base_ms` | `u64` | `500` | none | Initial reconnect backoff (ms). | `me_reconnect_backoff_base_ms = 250` | +| `general.me_reconnect_backoff_cap_ms` | `u64` | `30000` | none | Maximum reconnect backoff (ms). | `me_reconnect_backoff_cap_ms = 10000` | +| `general.me_reconnect_fast_retry_count` | `u32` | `16` | none | Immediate retry budget before long backoff behavior. | `me_reconnect_fast_retry_count = 8` | + +### 3) Reinit/hardswap, secret rotation, and degradation + +| Parameter | Type | Default | Constraints / validation | Runtime effect | Example | +|---|---|---:|---|---|---| +| `general.hardswap` | `bool` | `true` | none | Enables generation-based ME hardswap strategy. | `hardswap = true` | +| `general.me_reinit_every_secs` | `u64` | `900` | must be `> 0` | Periodic ME reinit interval. | `me_reinit_every_secs = 600` | +| `general.me_hardswap_warmup_delay_min_ms` | `u64` | `1000` | must be `<= me_hardswap_warmup_delay_max_ms` | Lower bound for hardswap warmup dial spacing. | `me_hardswap_warmup_delay_min_ms = 500` | +| `general.me_hardswap_warmup_delay_max_ms` | `u64` | `2000` | must be `> 0` | Upper bound for hardswap warmup dial spacing. | `me_hardswap_warmup_delay_max_ms = 1200` | +| `general.me_hardswap_warmup_extra_passes` | `u8` | `3` | must be within `[0,10]` | Additional warmup passes after base pass. | `me_hardswap_warmup_extra_passes = 2` | +| `general.me_hardswap_warmup_pass_backoff_base_ms` | `u64` | `500` | must be `> 0` | Base backoff between extra warmup passes. | `me_hardswap_warmup_pass_backoff_base_ms = 400` | +| `general.me_config_stable_snapshots` | `u8` | `2` | must be `> 0` | Number of identical ME config snapshots required before apply. | `me_config_stable_snapshots = 3` | +| `general.me_config_apply_cooldown_secs` | `u64` | `300` | none | Cooldown between applied ME map updates. | `me_config_apply_cooldown_secs = 120` | +| `general.proxy_secret_stable_snapshots` | `u8` | `2` | must be `> 0` | Number of identical proxy-secret snapshots required before rotation. | `proxy_secret_stable_snapshots = 3` | +| `general.proxy_secret_rotate_runtime` | `bool` | `true` | none | Enables runtime proxy-secret rotation. | `proxy_secret_rotate_runtime = true` | +| `general.proxy_secret_len_max` | `usize` | `256` | must be within `[32,4096]` | Upper limit for accepted proxy-secret length. | `proxy_secret_len_max = 512` | +| `general.update_every` | `Option` | `300` | if set: must be `> 0`; if `null`: legacy min fallback | Unified refresh interval for ME config + secret updater. | `update_every = 300` | +| `general.me_pool_drain_ttl_secs` | `u64` | `90` | none | Time window where stale writers remain fallback-eligible. | `me_pool_drain_ttl_secs = 120` | +| `general.me_pool_min_fresh_ratio` | `f32` | `0.8` | must be within `[0.0,1.0]` | Coverage threshold before stale generation can be drained. | `me_pool_min_fresh_ratio = 0.9` | +| `general.me_reinit_drain_timeout_secs` | `u64` | `120` | `0` means no force-close; if `>0 && < TTL` it is bumped to TTL | Force-close timeout for draining stale writers. | `me_reinit_drain_timeout_secs = 0` | +| `general.auto_degradation_enabled` | `bool` | `true` | none | Reserved compatibility flag in current revision (no active runtime consumer). | `auto_degradation_enabled = true` | +| `general.degradation_min_unavailable_dc_groups` | `u8` | `2` | none | Reserved compatibility threshold in current revision (no active runtime consumer). | `degradation_min_unavailable_dc_groups = 2` | + +## Deprecated / Legacy Parameters + +| Parameter | Status | Replacement | Current behavior | Migration recommendation | +|---|---|---|---|---| +| `general.middle_proxy_nat_stun` | Deprecated | `network.stun_servers` | Merged into `network.stun_servers` only when `network.stun_servers` is not explicitly set. | Move value into `network.stun_servers` and remove legacy key. | +| `general.middle_proxy_nat_stun_servers` | Deprecated | `network.stun_servers` | Merged into `network.stun_servers` only when `network.stun_servers` is not explicitly set. | Move values into `network.stun_servers` and remove legacy key. | +| `general.proxy_secret_auto_reload_secs` | Deprecated | `general.update_every` | Used only when `update_every = null` (legacy fallback path). | Set `general.update_every` explicitly and remove legacy key. | +| `general.proxy_config_auto_reload_secs` | Deprecated | `general.update_every` | Used only when `update_every = null` (legacy fallback path). | Set `general.update_every` explicitly and remove legacy key. | + +## How Upstreams Are Configured + +### Upstream schema + +| Field | Applies to | Type | Required | Default | Meaning | +|---|---|---|---|---|---| +| `[[upstreams]].type` | all upstreams | `"direct" \| "socks4" \| "socks5"` | yes | n/a | Upstream transport type. | +| `[[upstreams]].weight` | all upstreams | `u16` | no | `1` | Base weight for weighted-random selection. | +| `[[upstreams]].enabled` | all upstreams | `bool` | no | `true` | Disabled entries are ignored at startup. | +| `[[upstreams]].scopes` | all upstreams | `String` | no | `""` | Comma-separated scope tags for request-level routing. | +| `interface` | `direct` | `Option` | no | `null` | Interface name (e.g. `eth0`) or literal local IP for bind selection. | +| `bind_addresses` | `direct` | `Option>` | no | `null` | Explicit local source IP candidates (strict priority over `interface`). | +| `address` | `socks4` | `String` | yes | n/a | SOCKS4 server endpoint (`ip:port` or `host:port`). | +| `interface` | `socks4` | `Option` | no | `null` | Used only for SOCKS server `ip:port` dial path. | +| `user_id` | `socks4` | `Option` | no | `null` | SOCKS4 user ID for CONNECT request. | +| `address` | `socks5` | `String` | yes | n/a | SOCKS5 server endpoint (`ip:port` or `host:port`). | +| `interface` | `socks5` | `Option` | no | `null` | Used only for SOCKS server `ip:port` dial path. | +| `username` | `socks5` | `Option` | no | `null` | SOCKS5 username auth. | +| `password` | `socks5` | `Option` | no | `null` | SOCKS5 password auth. | + +### Runtime rules (important) + +1. If `[[upstreams]]` is omitted, loader injects one default `direct` upstream. +2. Scope filtering is exact-token based: +- when request scope is set -> only entries whose `scopes` contains that exact token; +- when request scope is not set -> only entries with empty `scopes`. +3. Healthy upstreams are selected by weighted random using: `weight * latency_factor`. +4. If no healthy upstream exists in filtered set, random selection is used among filtered entries. +5. `direct` bind resolution order: +- `bind_addresses` candidates (same IP family as target) first; +- if `interface` is an interface name and `bind_addresses` is set, each candidate IP is validated against addresses currently assigned to that interface; +- invalid candidates are dropped with `WARN`; +- if no valid candidate remains, connection falls back to unbound direct connect (`bind_ip=None`); +- if no `bind_addresses` candidate, `interface` is used (literal IP or resolved interface primary IP). +6. For `socks4/socks5` with `address` as hostname, interface binding is not supported and is ignored with warning. +7. Runtime DNS overrides are used for upstream hostname resolution. +8. In ME mode, the selected upstream is also used for ME TCP dial path. +9. In ME mode for `direct` upstream with bind/interface, STUN reflection logic is bind-aware for KDF source material. +10. In ME mode for SOCKS upstream, SOCKS `BND.ADDR/BND.PORT` is used for KDF when it is valid/public for the same family. + +## Upstream Configuration Examples + +### Example 1: Minimal direct upstream + +```toml +[[upstreams]] +type = "direct" +weight = 1 +enabled = true +``` + +### Example 2: Direct with interface + explicit bind addresses + +```toml +[[upstreams]] +type = "direct" +interface = "eth0" +bind_addresses = ["192.168.1.100", "192.168.1.101"] +weight = 3 +enabled = true +``` + +### Example 3: SOCKS5 upstream with authentication + +```toml +[[upstreams]] +type = "socks5" +address = "198.51.100.30:1080" +username = "proxy-user" +password = "proxy-pass" +weight = 2 +enabled = true +``` + +### Example 4: Mixed upstreams with scopes + +```toml +[[upstreams]] +type = "direct" +weight = 5 +enabled = true +scopes = "" + +[[upstreams]] +type = "socks5" +address = "203.0.113.40:1080" +username = "edge" +password = "edgepass" +weight = 3 +enabled = true +scopes = "premium,me" +``` + +### Example 5: ME-focused tuning profile + +```toml +[general] +use_middle_proxy = true +proxy_secret_path = "proxy-secret" +middle_proxy_nat_probe = true +stun_nat_probe_concurrency = 16 +middle_proxy_pool_size = 12 +me_keepalive_enabled = true +me_keepalive_interval_secs = 20 +me_keepalive_jitter_secs = 4 +me_reconnect_max_concurrent_per_dc = 12 +me_reconnect_backoff_base_ms = 300 +me_reconnect_backoff_cap_ms = 10000 +me_reconnect_fast_retry_count = 10 +hardswap = true +me_reinit_every_secs = 600 +me_hardswap_warmup_delay_min_ms = 500 +me_hardswap_warmup_delay_max_ms = 1200 +me_hardswap_warmup_extra_passes = 2 +me_hardswap_warmup_pass_backoff_base_ms = 400 +me_config_stable_snapshots = 3 +me_config_apply_cooldown_secs = 120 +proxy_secret_stable_snapshots = 3 +proxy_secret_rotate_runtime = true +proxy_secret_len_max = 512 +update_every = 300 +me_pool_drain_ttl_secs = 120 +me_pool_min_fresh_ratio = 0.9 +me_reinit_drain_timeout_secs = 180 + +[timeouts] +me_one_retry = 8 +me_one_timeout_ms = 1200 + +[network] +stun_use = true +stun_tcp_fallback = true +stun_servers = [ + "stun1.l.google.com:19302", + "stun2.l.google.com:19302" +] +http_ip_detect_urls = [ + "https://api.ipify.org", + "https://ifconfig.me/ip" +] +``` diff --git a/docs/TUNING.ru.md b/docs/TUNING.ru.md new file mode 100644 index 0000000..48a2b6c --- /dev/null +++ b/docs/TUNING.ru.md @@ -0,0 +1,219 @@ +# Руководство по тюнингу Telemt: Middle-End и Upstreams + +Документ описывает актуальное поведение Middle-End (ME) и маршрутизации через upstream на основе: +- `src/config/types.rs` +- `src/config/defaults.rs` +- `src/config/load.rs` +- `src/transport/upstream.rs` + +Значения `Default` ниже — это значения из кода при отсутствии ключа в конфиге, а не обязательно значения из примеров `config.full.toml`. + +## Параметры Middle-End + +### 1) Базовый режим ME, NAT и STUN + +| Параметр | Тип | Default | Ограничения / валидация | Влияние на runtime | Пример | +|---|---|---:|---|---|---| +| `general.use_middle_proxy` | `bool` | `true` | нет | Включает транспорт ME. При `false` используется Direct-режим. | `use_middle_proxy = true` | +| `general.proxy_secret_path` | `Option` | `"proxy-secret"` | путь может быть `null` | Путь к инфраструктурному proxy-secret Telegram. | `proxy_secret_path = "proxy-secret"` | +| `general.middle_proxy_nat_ip` | `Option` | `null` | валидный IP при задании | Ручной override публичного NAT IP для адресного материала ME. | `middle_proxy_nat_ip = "203.0.113.10"` | +| `general.middle_proxy_nat_probe` | `bool` | `true` | авто-принудительно `true`, если `use_middle_proxy=true` | Включает NAT probing для ME. | `middle_proxy_nat_probe = true` | +| `general.stun_nat_probe_concurrency` | `usize` | `8` | должно быть `> 0` | Максимум параллельных STUN-проб при NAT-детекте. | `stun_nat_probe_concurrency = 16` | +| `network.stun_use` | `bool` | `true` | нет | Глобальный переключатель STUN. При `false` STUN отключен. | `stun_use = true` | +| `network.stun_servers` | `Vec` | встроенный публичный пул | удаляются дубликаты и пустые значения | Основной список STUN-серверов для NAT/public endpoint discovery. | `stun_servers = ["stun1.l.google.com:19302"]` | +| `network.stun_tcp_fallback` | `bool` | `true` | нет | Включает TCP fallback, если UDP STUN недоступен. | `stun_tcp_fallback = true` | +| `network.http_ip_detect_urls` | `Vec` | `ifconfig.me` + `api.ipify.org` | нет | HTTP fallback для определения публичного IPv4 при недоступности STUN. | `http_ip_detect_urls = ["https://api.ipify.org"]` | +| `general.stun_iface_mismatch_ignore` | `bool` | `false` | нет | Зарезервированный флаг в текущей ревизии (runtime его не использует). | `stun_iface_mismatch_ignore = false` | +| `timeouts.me_one_retry` | `u8` | `12` | нет | Количество быстрых reconnect-попыток для DC с одним endpoint. | `me_one_retry = 6` | +| `timeouts.me_one_timeout_ms` | `u64` | `1200` | нет | Таймаут одной быстрой попытки (мс). | `me_one_timeout_ms = 1500` | + +### 2) Размер пула, keepalive и reconnect-политика + +| Параметр | Тип | Default | Ограничения / валидация | Влияние на runtime | Пример | +|---|---|---:|---|---|---| +| `general.middle_proxy_pool_size` | `usize` | `8` | нет | Целевой размер активного пула ME-writer соединений. | `middle_proxy_pool_size = 12` | +| `general.middle_proxy_warm_standby` | `usize` | `16` | нет | Зарезервированное поле совместимости в текущей ревизии (активного runtime-consumer нет). | `middle_proxy_warm_standby = 16` | +| `general.me_keepalive_enabled` | `bool` | `true` | нет | Включает периодические keepalive/ping кадры ME. | `me_keepalive_enabled = true` | +| `general.me_keepalive_interval_secs` | `u64` | `25` | нет | Базовый интервал keepalive (сек). | `me_keepalive_interval_secs = 20` | +| `general.me_keepalive_jitter_secs` | `u64` | `5` | нет | Джиттер keepalive для предотвращения синхронных всплесков. | `me_keepalive_jitter_secs = 3` | +| `general.me_keepalive_payload_random` | `bool` | `true` | нет | Рандомизирует payload keepalive-кадров. | `me_keepalive_payload_random = true` | +| `general.me_warmup_stagger_enabled` | `bool` | `true` | нет | Включает staggered warmup дополнительных ME-коннектов. | `me_warmup_stagger_enabled = true` | +| `general.me_warmup_step_delay_ms` | `u64` | `500` | нет | Базовая задержка между шагами warmup (мс). | `me_warmup_step_delay_ms = 300` | +| `general.me_warmup_step_jitter_ms` | `u64` | `300` | нет | Дополнительный случайный warmup-джиттер (мс). | `me_warmup_step_jitter_ms = 200` | +| `general.me_reconnect_max_concurrent_per_dc` | `u32` | `8` | нет | Ограничивает параллельные reconnect worker'ы на один DC. | `me_reconnect_max_concurrent_per_dc = 12` | +| `general.me_reconnect_backoff_base_ms` | `u64` | `500` | нет | Начальный backoff reconnect (мс). | `me_reconnect_backoff_base_ms = 250` | +| `general.me_reconnect_backoff_cap_ms` | `u64` | `30000` | нет | Верхняя граница backoff reconnect (мс). | `me_reconnect_backoff_cap_ms = 10000` | +| `general.me_reconnect_fast_retry_count` | `u32` | `16` | нет | Бюджет быстрых retry до длинного backoff. | `me_reconnect_fast_retry_count = 8` | + +### 3) Reinit/hardswap, ротация секрета и деградация + +| Параметр | Тип | Default | Ограничения / валидация | Влияние на runtime | Пример | +|---|---|---:|---|---|---| +| `general.hardswap` | `bool` | `true` | нет | Включает generation-based стратегию hardswap для ME-пула. | `hardswap = true` | +| `general.me_reinit_every_secs` | `u64` | `900` | должно быть `> 0` | Интервал периодического reinit ME-пула. | `me_reinit_every_secs = 600` | +| `general.me_hardswap_warmup_delay_min_ms` | `u64` | `1000` | должно быть `<= me_hardswap_warmup_delay_max_ms` | Нижняя граница пауз между warmup dial попытками. | `me_hardswap_warmup_delay_min_ms = 500` | +| `general.me_hardswap_warmup_delay_max_ms` | `u64` | `2000` | должно быть `> 0` | Верхняя граница пауз между warmup dial попытками. | `me_hardswap_warmup_delay_max_ms = 1200` | +| `general.me_hardswap_warmup_extra_passes` | `u8` | `3` | диапазон `[0,10]` | Дополнительные warmup-проходы после базового. | `me_hardswap_warmup_extra_passes = 2` | +| `general.me_hardswap_warmup_pass_backoff_base_ms` | `u64` | `500` | должно быть `> 0` | Базовый backoff между extra-pass в warmup. | `me_hardswap_warmup_pass_backoff_base_ms = 400` | +| `general.me_config_stable_snapshots` | `u8` | `2` | должно быть `> 0` | Количество одинаковых snapshot перед применением ME map update. | `me_config_stable_snapshots = 3` | +| `general.me_config_apply_cooldown_secs` | `u64` | `300` | нет | Cooldown между применёнными обновлениями ME map. | `me_config_apply_cooldown_secs = 120` | +| `general.proxy_secret_stable_snapshots` | `u8` | `2` | должно быть `> 0` | Количество одинаковых snapshot перед runtime-rotation proxy-secret. | `proxy_secret_stable_snapshots = 3` | +| `general.proxy_secret_rotate_runtime` | `bool` | `true` | нет | Включает runtime-ротацию proxy-secret. | `proxy_secret_rotate_runtime = true` | +| `general.proxy_secret_len_max` | `usize` | `256` | диапазон `[32,4096]` | Верхний лимит длины принимаемого proxy-secret. | `proxy_secret_len_max = 512` | +| `general.update_every` | `Option` | `300` | если задано: `> 0`; если `null`: fallback на legacy минимум | Единый интервал refresh для ME config + secret updater. | `update_every = 300` | +| `general.me_pool_drain_ttl_secs` | `u64` | `90` | нет | Время, когда stale writer ещё может использоваться как fallback. | `me_pool_drain_ttl_secs = 120` | +| `general.me_pool_min_fresh_ratio` | `f32` | `0.8` | диапазон `[0.0,1.0]` | Порог покрытия fresh-поколения перед drain старого поколения. | `me_pool_min_fresh_ratio = 0.9` | +| `general.me_reinit_drain_timeout_secs` | `u64` | `120` | `0` = без force-close; если `>0 && < TTL`, поднимается до TTL | Таймаут force-close для draining stale writer. | `me_reinit_drain_timeout_secs = 0` | +| `general.auto_degradation_enabled` | `bool` | `true` | нет | Зарезервированный флаг совместимости в текущей ревизии (активного runtime-consumer нет). | `auto_degradation_enabled = true` | +| `general.degradation_min_unavailable_dc_groups` | `u8` | `2` | нет | Зарезервированный порог совместимости в текущей ревизии (активного runtime-consumer нет). | `degradation_min_unavailable_dc_groups = 2` | + +## Устаревшие / legacy параметры + +| Параметр | Статус | Замена | Текущее поведение | Рекомендация миграции | +|---|---|---|---|---| +| `general.middle_proxy_nat_stun` | Deprecated | `network.stun_servers` | Добавляется в `network.stun_servers`, только если `network.stun_servers` не задан явно. | Перенести значение в `network.stun_servers`, legacy-ключ удалить. | +| `general.middle_proxy_nat_stun_servers` | Deprecated | `network.stun_servers` | Добавляется в `network.stun_servers`, только если `network.stun_servers` не задан явно. | Перенести значения в `network.stun_servers`, legacy-ключ удалить. | +| `general.proxy_secret_auto_reload_secs` | Deprecated | `general.update_every` | Используется только если `update_every = null` (legacy fallback). | Явно задать `general.update_every`, legacy-ключ удалить. | +| `general.proxy_config_auto_reload_secs` | Deprecated | `general.update_every` | Используется только если `update_every = null` (legacy fallback). | Явно задать `general.update_every`, legacy-ключ удалить. | + +## Как конфигурируются Upstreams + +### Схема upstream + +| Поле | Применимость | Тип | Обязательно | Default | Назначение | +|---|---|---|---|---|---| +| `[[upstreams]].type` | все upstream | `"direct" \| "socks4" \| "socks5"` | да | n/a | Тип upstream транспорта. | +| `[[upstreams]].weight` | все upstream | `u16` | нет | `1` | Базовый вес в weighted-random выборе. | +| `[[upstreams]].enabled` | все upstream | `bool` | нет | `true` | Выключенные записи игнорируются на старте. | +| `[[upstreams]].scopes` | все upstream | `String` | нет | `""` | Список scope-токенов через запятую для маршрутизации. | +| `interface` | `direct` | `Option` | нет | `null` | Имя интерфейса (например `eth0`) или literal локальный IP. | +| `bind_addresses` | `direct` | `Option>` | нет | `null` | Явные кандидаты source IP (имеют приоритет над `interface`). | +| `address` | `socks4` | `String` | да | n/a | Адрес SOCKS4 сервера (`ip:port` или `host:port`). | +| `interface` | `socks4` | `Option` | нет | `null` | Используется только если `address` задан как `ip:port`. | +| `user_id` | `socks4` | `Option` | нет | `null` | SOCKS4 user ID в CONNECT-запросе. | +| `address` | `socks5` | `String` | да | n/a | Адрес SOCKS5 сервера (`ip:port` или `host:port`). | +| `interface` | `socks5` | `Option` | нет | `null` | Используется только если `address` задан как `ip:port`. | +| `username` | `socks5` | `Option` | нет | `null` | Логин SOCKS5 auth. | +| `password` | `socks5` | `Option` | нет | `null` | Пароль SOCKS5 auth. | + +### Runtime-правила (важно) + +1. Если `[[upstreams]]` отсутствует, loader добавляет один upstream `direct` по умолчанию. +2. Scope-фильтрация — по точному совпадению токена: +- если scope запроса задан -> используются только записи, где `scopes` содержит такой же токен; +- если scope запроса не задан -> используются только записи с пустым `scopes`. +3. Среди healthy upstream используется weighted-random выбор: `weight * latency_factor`. +4. Если в отфильтрованном наборе нет healthy upstream, выбирается случайный из отфильтрованных. +5. Порядок выбора bind для `direct`: +- сначала `bind_addresses` (только IP нужного семейства); +- если одновременно заданы `interface` (имя) и `bind_addresses`, каждый IP проверяется на принадлежность интерфейсу; +- несовпадающие IP отбрасываются с `WARN`; +- если валидных IP не осталось, используется unbound direct connect (`bind_ip=None`); +- если `bind_addresses` не подходит, применяется `interface` (literal IP или адрес интерфейса). +6. Для `socks4/socks5` с `address` в виде hostname интерфейсный bind не поддерживается и игнорируется с предупреждением. +7. Runtime DNS overrides применяются к резолвингу hostname в upstream-подключениях. +8. В ME-режиме выбранный upstream также используется для ME TCP dial path. +9. В ME-режиме для `direct` upstream с bind/interface STUN-рефлексия выполняется bind-aware для KDF материала. +10. В ME-режиме для SOCKS upstream используются `BND.ADDR/BND.PORT` для KDF, если адрес валиден/публичен и соответствует IP family. + +## Примеры конфигурации Upstreams + +### Пример 1: минимальный direct upstream + +```toml +[[upstreams]] +type = "direct" +weight = 1 +enabled = true +``` + +### Пример 2: direct с interface + явными bind IP + +```toml +[[upstreams]] +type = "direct" +interface = "eth0" +bind_addresses = ["192.168.1.100", "192.168.1.101"] +weight = 3 +enabled = true +``` + +### Пример 3: SOCKS5 upstream с аутентификацией + +```toml +[[upstreams]] +type = "socks5" +address = "198.51.100.30:1080" +username = "proxy-user" +password = "proxy-pass" +weight = 2 +enabled = true +``` + +### Пример 4: смешанные upstream с scopes + +```toml +[[upstreams]] +type = "direct" +weight = 5 +enabled = true +scopes = "" + +[[upstreams]] +type = "socks5" +address = "203.0.113.40:1080" +username = "edge" +password = "edgepass" +weight = 3 +enabled = true +scopes = "premium,me" +``` + +### Пример 5: профиль тюнинга под ME + +```toml +[general] +use_middle_proxy = true +proxy_secret_path = "proxy-secret" +middle_proxy_nat_probe = true +stun_nat_probe_concurrency = 16 +middle_proxy_pool_size = 12 +me_keepalive_enabled = true +me_keepalive_interval_secs = 20 +me_keepalive_jitter_secs = 4 +me_reconnect_max_concurrent_per_dc = 12 +me_reconnect_backoff_base_ms = 300 +me_reconnect_backoff_cap_ms = 10000 +me_reconnect_fast_retry_count = 10 +hardswap = true +me_reinit_every_secs = 600 +me_hardswap_warmup_delay_min_ms = 500 +me_hardswap_warmup_delay_max_ms = 1200 +me_hardswap_warmup_extra_passes = 2 +me_hardswap_warmup_pass_backoff_base_ms = 400 +me_config_stable_snapshots = 3 +me_config_apply_cooldown_secs = 120 +proxy_secret_stable_snapshots = 3 +proxy_secret_rotate_runtime = true +proxy_secret_len_max = 512 +update_every = 300 +me_pool_drain_ttl_secs = 120 +me_pool_min_fresh_ratio = 0.9 +me_reinit_drain_timeout_secs = 180 + +[timeouts] +me_one_retry = 8 +me_one_timeout_ms = 1200 + +[network] +stun_use = true +stun_tcp_fallback = true +stun_servers = [ + "stun1.l.google.com:19302", + "stun2.l.google.com:19302" +] +http_ip_detect_urls = [ + "https://api.ipify.org", + "https://ifconfig.me/ip" +] +``` From bf11ebbaa36f601a2cc187b2b4844e773757ade7 Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 02:23:34 +0300 Subject: [PATCH 05/10] Update TUNING.ru.md --- docs/TUNING.ru.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/TUNING.ru.md b/docs/TUNING.ru.md index 48a2b6c..6ea4d69 100644 --- a/docs/TUNING.ru.md +++ b/docs/TUNING.ru.md @@ -96,7 +96,7 @@ | `username` | `socks5` | `Option` | нет | `null` | Логин SOCKS5 auth. | | `password` | `socks5` | `Option` | нет | `null` | Пароль SOCKS5 auth. | -### Runtime-правила (важно) +### Runtime-правила 1. Если `[[upstreams]]` отсутствует, loader добавляет один upstream `direct` по умолчанию. 2. Scope-фильтрация — по точному совпадению токена: From a61882af6e4f7b6f5849043a589436d294f472d5 Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 02:55:21 +0300 Subject: [PATCH 06/10] TLS Fetch on unix-socket --- src/main.rs | 7 ++ src/tls_front/fetcher.rs | 201 ++++++++++++++++++++++++++++++++------- 2 files changed, 171 insertions(+), 37 deletions(-) diff --git a/src/main.rs b/src/main.rs index b065d4e..e759095 100644 --- a/src/main.rs +++ b/src/main.rs @@ -285,17 +285,20 @@ async fn main() -> std::result::Result<(), Box> { .mask_host .clone() .unwrap_or_else(|| config.censorship.tls_domain.clone()); + let mask_unix_sock = config.censorship.mask_unix_sock.clone(); let fetch_timeout = Duration::from_secs(5); let cache_initial = cache.clone(); let domains_initial = tls_domains.clone(); let host_initial = mask_host.clone(); + let unix_sock_initial = mask_unix_sock.clone(); let upstream_initial = upstream_manager.clone(); tokio::spawn(async move { let mut join = tokio::task::JoinSet::new(); for domain in domains_initial { let cache_domain = cache_initial.clone(); let host_domain = host_initial.clone(); + let unix_sock_domain = unix_sock_initial.clone(); let upstream_domain = upstream_initial.clone(); join.spawn(async move { match crate::tls_front::fetcher::fetch_real_tls( @@ -305,6 +308,7 @@ async fn main() -> std::result::Result<(), Box> { fetch_timeout, Some(upstream_domain), proxy_protocol, + unix_sock_domain.as_deref(), ) .await { @@ -344,6 +348,7 @@ async fn main() -> std::result::Result<(), Box> { let cache_refresh = cache.clone(); let domains_refresh = tls_domains.clone(); let host_refresh = mask_host.clone(); + let unix_sock_refresh = mask_unix_sock.clone(); let upstream_refresh = upstream_manager.clone(); tokio::spawn(async move { loop { @@ -355,6 +360,7 @@ async fn main() -> std::result::Result<(), Box> { for domain in domains_refresh.clone() { let cache_domain = cache_refresh.clone(); let host_domain = host_refresh.clone(); + let unix_sock_domain = unix_sock_refresh.clone(); let upstream_domain = upstream_refresh.clone(); join.spawn(async move { match crate::tls_front::fetcher::fetch_real_tls( @@ -364,6 +370,7 @@ async fn main() -> std::result::Result<(), Box> { fetch_timeout, Some(upstream_domain), proxy_protocol, + unix_sock_domain.as_deref(), ) .await { diff --git a/src/tls_front/fetcher.rs b/src/tls_front/fetcher.rs index ba80332..1731cdc 100644 --- a/src/tls_front/fetcher.rs +++ b/src/tls_front/fetcher.rs @@ -2,8 +2,10 @@ use std::sync::Arc; use std::time::Duration; use anyhow::{Result, anyhow}; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use tokio::net::TcpStream; +#[cfg(unix)] +use tokio::net::UnixStream; use tokio::time::timeout; use tokio_rustls::client::TlsStream; use tokio_rustls::TlsConnector; @@ -212,7 +214,10 @@ fn gen_key_share(rng: &SecureRandom) -> [u8; 32] { key } -async fn read_tls_record(stream: &mut TcpStream) -> Result<(u8, Vec)> { +async fn read_tls_record(stream: &mut S) -> Result<(u8, Vec)> +where + S: AsyncRead + Unpin, +{ let mut header = [0u8; 5]; stream.read_exact(&mut header).await?; let len = u16::from_be_bytes([header[3], header[4]]) as usize; @@ -345,6 +350,44 @@ async fn connect_with_dns_override( Ok(timeout(connect_timeout, TcpStream::connect((host, port))).await??) } +async fn connect_tcp_with_upstream( + host: &str, + port: u16, + connect_timeout: Duration, + upstream: Option>, +) -> Result { + if let Some(manager) = upstream { + if let Some(addr) = resolve_socket_addr(host, port) { + match manager.connect(addr, None, None).await { + Ok(stream) => return Ok(stream), + Err(e) => { + warn!( + host = %host, + port = port, + error = %e, + "Upstream connect failed, using direct connect" + ); + } + } + } else if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await { + if let Some(addr) = addrs.find(|a| a.is_ipv4()) { + match manager.connect(addr, None, None).await { + Ok(stream) => return Ok(stream), + Err(e) => { + warn!( + host = %host, + port = port, + error = %e, + "Upstream connect failed, using direct connect" + ); + } + } + } + } + } + connect_with_dns_override(host, port, connect_timeout).await +} + fn encode_tls13_certificate_message(cert_chain_der: &[Vec]) -> Option> { if cert_chain_der.is_empty() { return None; @@ -374,15 +417,15 @@ fn encode_tls13_certificate_message(cert_chain_der: &[Vec]) -> Option( + mut stream: S, sni: &str, connect_timeout: Duration, proxy_protocol: u8, -) -> Result { - let mut stream = connect_with_dns_override(host, port, connect_timeout).await?; - +) -> Result +where + S: AsyncRead + AsyncWrite + Unpin, +{ let rng = SecureRandom::new(); let client_hello = build_client_hello(sni, &rng); timeout(connect_timeout, async { @@ -438,43 +481,61 @@ async fn fetch_via_raw_tls( }) } -async fn fetch_via_rustls( +async fn fetch_via_raw_tls( host: &str, port: u16, sni: &str, connect_timeout: Duration, upstream: Option>, proxy_protocol: u8, + unix_sock: Option<&str>, ) -> Result { - // rustls handshake path for certificate and basic negotiated metadata. - let mut stream = if let Some(manager) = upstream { - if let Some(addr) = resolve_socket_addr(host, port) { - match manager.connect(addr, None, None).await { - Ok(s) => s, - Err(e) => { - warn!(sni = %sni, error = %e, "Upstream connect failed, using direct connect"); - connect_with_dns_override(host, port, connect_timeout).await? - } + #[cfg(unix)] + if let Some(sock_path) = unix_sock { + match timeout(connect_timeout, UnixStream::connect(sock_path)).await { + Ok(Ok(stream)) => { + debug!( + sni = %sni, + sock = %sock_path, + "Raw TLS fetch using mask unix socket" + ); + return fetch_via_raw_tls_stream(stream, sni, connect_timeout, 0).await; } - } else if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await { - if let Some(addr) = addrs.find(|a| a.is_ipv4()) { - match manager.connect(addr, None, None).await { - Ok(s) => s, - Err(e) => { - warn!(sni = %sni, error = %e, "Upstream connect failed, using direct connect"); - connect_with_dns_override(host, port, connect_timeout).await? - } - } - } else { - connect_with_dns_override(host, port, connect_timeout).await? + Ok(Err(e)) => { + warn!( + sni = %sni, + sock = %sock_path, + error = %e, + "Raw TLS unix socket connect failed, falling back to TCP" + ); + } + Err(_) => { + warn!( + sni = %sni, + sock = %sock_path, + "Raw TLS unix socket connect timed out, falling back to TCP" + ); } - } else { - connect_with_dns_override(host, port, connect_timeout).await? } - } else { - connect_with_dns_override(host, port, connect_timeout).await? - }; + } + #[cfg(not(unix))] + let _ = unix_sock; + + let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream).await?; + fetch_via_raw_tls_stream(stream, sni, connect_timeout, proxy_protocol).await +} + +async fn fetch_via_rustls_stream( + mut stream: S, + host: &str, + sni: &str, + proxy_protocol: u8, +) -> Result +where + S: AsyncRead + AsyncWrite + Unpin, +{ + // rustls handshake path for certificate and basic negotiated metadata. if proxy_protocol > 0 { let header = match proxy_protocol { 2 => ProxyProtocolV2Builder::new().build(), @@ -491,7 +552,7 @@ async fn fetch_via_rustls( .or_else(|_| ServerName::try_from(host.to_owned())) .map_err(|_| RustlsError::General("invalid SNI".into()))?; - let tls_stream: TlsStream = connector.connect(server_name, stream).await?; + let tls_stream: TlsStream = connector.connect(server_name, stream).await?; // Extract negotiated parameters and certificates let (_io, session) = tls_stream.get_ref(); @@ -552,6 +613,51 @@ async fn fetch_via_rustls( }) } +async fn fetch_via_rustls( + host: &str, + port: u16, + sni: &str, + connect_timeout: Duration, + upstream: Option>, + proxy_protocol: u8, + unix_sock: Option<&str>, +) -> Result { + #[cfg(unix)] + if let Some(sock_path) = unix_sock { + match timeout(connect_timeout, UnixStream::connect(sock_path)).await { + Ok(Ok(stream)) => { + debug!( + sni = %sni, + sock = %sock_path, + "Rustls fetch using mask unix socket" + ); + return fetch_via_rustls_stream(stream, host, sni, 0).await; + } + Ok(Err(e)) => { + warn!( + sni = %sni, + sock = %sock_path, + error = %e, + "Rustls unix socket connect failed, falling back to TCP" + ); + } + Err(_) => { + warn!( + sni = %sni, + sock = %sock_path, + "Rustls unix socket connect timed out, falling back to TCP" + ); + } + } + } + + #[cfg(not(unix))] + let _ = unix_sock; + + let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream).await?; + fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await +} + /// Fetch real TLS metadata for the given SNI. /// /// Strategy: @@ -565,8 +671,19 @@ pub async fn fetch_real_tls( connect_timeout: Duration, upstream: Option>, proxy_protocol: u8, + unix_sock: Option<&str>, ) -> Result { - let raw_result = match fetch_via_raw_tls(host, port, sni, connect_timeout, proxy_protocol).await { + let raw_result = match fetch_via_raw_tls( + host, + port, + sni, + connect_timeout, + upstream.clone(), + proxy_protocol, + unix_sock, + ) + .await + { Ok(res) => Some(res), Err(e) => { warn!(sni = %sni, error = %e, "Raw TLS fetch failed"); @@ -574,7 +691,17 @@ pub async fn fetch_real_tls( } }; - match fetch_via_rustls(host, port, sni, connect_timeout, upstream, proxy_protocol).await { + match fetch_via_rustls( + host, + port, + sni, + connect_timeout, + upstream, + proxy_protocol, + unix_sock, + ) + .await + { Ok(rustls_result) => { if let Some(mut raw) = raw_result { raw.cert_info = rustls_result.cert_info; From fa2423dadfecda9ea61191835aad5b0f9caa60cb Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 03:21:22 +0300 Subject: [PATCH 07/10] ME/DC Method Detection fixes Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com> --- src/main.rs | 11 +- src/transport/middle_proxy/mod.rs | 2 +- src/transport/middle_proxy/ping.rs | 158 +++++++++++++++++++++++++++++ src/transport/upstream.rs | 18 +++- 4 files changed, 185 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index e759095..e985051 100644 --- a/src/main.rs +++ b/src/main.rs @@ -40,6 +40,7 @@ use crate::stats::{ReplayChecker, Stats}; use crate::stream::BufferPool; use crate::transport::middle_proxy::{ MePool, fetch_proxy_config, run_me_ping, MePingFamily, MePingSample, format_sample_line, + format_me_route, }; use crate::transport::{ListenOptions, UpstreamManager, create_listener, find_listener_processes}; use crate::tls_front::TlsFrontCache; @@ -624,7 +625,15 @@ async fn main() -> std::result::Result<(), Box> { } else { info!(" No ME connectivity"); } - info!(" via direct"); + let me_route = format_me_route( + &config.upstreams, + &me_results, + prefer_ipv6, + v4_ok, + v6_ok, + ) + .await; + info!(" via {}", me_route); info!("============================================================"); use std::collections::BTreeMap; diff --git a/src/transport/middle_proxy/mod.rs b/src/transport/middle_proxy/mod.rs index 3a4ff16..1072ec8 100644 --- a/src/transport/middle_proxy/mod.rs +++ b/src/transport/middle_proxy/mod.rs @@ -23,7 +23,7 @@ use bytes::Bytes; pub use health::me_health_monitor; #[allow(unused_imports)] -pub use ping::{run_me_ping, format_sample_line, MePingReport, MePingSample, MePingFamily}; +pub use ping::{run_me_ping, format_sample_line, format_me_route, MePingReport, MePingSample, MePingFamily}; pub use pool::MePool; #[allow(unused_imports)] pub use pool_nat::{stun_probe, detect_public_ip}; diff --git a/src/transport/middle_proxy/ping.rs b/src/transport/middle_proxy/ping.rs index aae11e6..e90d98f 100644 --- a/src/transport/middle_proxy/ping.rs +++ b/src/transport/middle_proxy/ping.rs @@ -2,6 +2,9 @@ use std::collections::HashMap; use std::net::{IpAddr, SocketAddr}; use std::sync::Arc; +use tokio::net::UdpSocket; + +use crate::config::{UpstreamConfig, UpstreamType}; use crate::crypto::SecureRandom; use crate::error::ProxyError; @@ -50,6 +53,161 @@ pub fn format_sample_line(sample: &MePingSample) -> String { } } +fn format_direct_with_config( + interface: &Option, + bind_addresses: &Option>, +) -> Option { + let mut direct_parts: Vec = Vec::new(); + if let Some(dev) = interface.as_deref().filter(|v| !v.is_empty()) { + direct_parts.push(format!("dev={dev}")); + } + if let Some(src) = bind_addresses.as_ref().filter(|v| !v.is_empty()) { + direct_parts.push(format!("src={}", src.join(","))); + } + if direct_parts.is_empty() { + None + } else { + Some(format!("direct {}", direct_parts.join(" "))) + } +} + +fn pick_target_for_family(reports: &[MePingReport], family: MePingFamily) -> Option { + reports.iter().find_map(|report| { + if report.family != family { + return None; + } + report + .samples + .iter() + .find(|s| s.error.is_none() && s.handshake_ms.is_some()) + .map(|s| s.addr) + }) +} + +#[cfg(unix)] +fn detect_interface_for_ip(ip: IpAddr) -> Option { + use nix::ifaddrs::getifaddrs; + + if let Ok(addrs) = getifaddrs() { + for iface in addrs { + if let Some(address) = iface.address { + if let Some(v4) = address.as_sockaddr_in() { + if IpAddr::V4(v4.ip()) == ip { + return Some(iface.interface_name); + } + } else if let Some(v6) = address.as_sockaddr_in6() { + if IpAddr::V6(v6.ip()) == ip { + return Some(iface.interface_name); + } + } + } + } + } + None +} + +#[cfg(not(unix))] +fn detect_interface_for_ip(_ip: IpAddr) -> Option { + None +} + +async fn detect_direct_route_details( + reports: &[MePingReport], + prefer_ipv6: bool, + v4_ok: bool, + v6_ok: bool, +) -> Option { + let target_addr = if prefer_ipv6 && v6_ok { + pick_target_for_family(reports, MePingFamily::V6) + .or_else(|| pick_target_for_family(reports, MePingFamily::V4)) + } else if v4_ok { + pick_target_for_family(reports, MePingFamily::V4) + .or_else(|| pick_target_for_family(reports, MePingFamily::V6)) + } else { + pick_target_for_family(reports, MePingFamily::V6) + .or_else(|| pick_target_for_family(reports, MePingFamily::V4)) + }?; + + let local_ip = if target_addr.is_ipv4() { + let sock = UdpSocket::bind("0.0.0.0:0").await.ok()?; + sock.connect(target_addr).await.ok()?; + sock.local_addr().ok().map(|a| a.ip()) + } else { + let sock = UdpSocket::bind("[::]:0").await.ok()?; + sock.connect(target_addr).await.ok()?; + sock.local_addr().ok().map(|a| a.ip()) + }; + + let mut parts = Vec::new(); + if let Some(ip) = local_ip { + if let Some(dev) = detect_interface_for_ip(ip) { + parts.push(format!("dev={dev}")); + } + parts.push(format!("src={ip}")); + } + + if parts.is_empty() { + None + } else { + Some(format!("direct {}", parts.join(" "))) + } +} + +pub async fn format_me_route( + upstreams: &[UpstreamConfig], + reports: &[MePingReport], + prefer_ipv6: bool, + v4_ok: bool, + v6_ok: bool, +) -> String { + let enabled_upstreams: Vec<_> = upstreams.iter().filter(|u| u.enabled).collect(); + if enabled_upstreams.is_empty() { + return detect_direct_route_details(reports, prefer_ipv6, v4_ok, v6_ok) + .await + .unwrap_or_else(|| "direct".to_string()); + } + + if enabled_upstreams.len() == 1 { + return match &enabled_upstreams[0].upstream_type { + UpstreamType::Direct { + interface, + bind_addresses, + } => { + if let Some(route) = format_direct_with_config(interface, bind_addresses) { + route + } else { + detect_direct_route_details(reports, prefer_ipv6, v4_ok, v6_ok) + .await + .unwrap_or_else(|| "direct".to_string()) + } + } + UpstreamType::Socks4 { address, .. } => format!("socks4://{address}"), + UpstreamType::Socks5 { address, .. } => format!("socks5://{address}"), + }; + } + + let has_direct = enabled_upstreams + .iter() + .any(|u| matches!(u.upstream_type, UpstreamType::Direct { .. })); + let has_socks4 = enabled_upstreams + .iter() + .any(|u| matches!(u.upstream_type, UpstreamType::Socks4 { .. })); + let has_socks5 = enabled_upstreams + .iter() + .any(|u| matches!(u.upstream_type, UpstreamType::Socks5 { .. })); + let mut kinds = Vec::new(); + if has_direct { + kinds.push("direct"); + } + if has_socks4 { + kinds.push("socks4"); + } + if has_socks5 { + kinds.push("socks5"); + } + format!("mixed upstreams ({})", kinds.join(", ")) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/transport/upstream.rs b/src/transport/upstream.rs index edcf476..5ab198c 100644 --- a/src/transport/upstream.rs +++ b/src/transport/upstream.rs @@ -707,8 +707,22 @@ impl UpstreamManager { for (upstream_idx, upstream_config, bind_rr) in &upstreams { let upstream_name = match &upstream_config.upstream_type { - UpstreamType::Direct { interface, .. } => { - format!("direct{}", interface.as_ref().map(|i| format!(" ({})", i)).unwrap_or_default()) + UpstreamType::Direct { + interface, + bind_addresses, + } => { + let mut direct_parts = Vec::new(); + if let Some(dev) = interface.as_deref().filter(|v| !v.is_empty()) { + direct_parts.push(format!("dev={dev}")); + } + if let Some(src) = bind_addresses.as_ref().filter(|v| !v.is_empty()) { + direct_parts.push(format!("src={}", src.join(","))); + } + if direct_parts.is_empty() { + "direct".to_string() + } else { + format!("direct {}", direct_parts.join(" ")) + } } UpstreamType::Socks4 { address, .. } => format!("socks4://{}", address), UpstreamType::Socks5 { address, .. } => format!("socks5://{}", address), From 8b39a4ef6d924acd35935f9b4c3d1849f6d9d546 Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 13:18:31 +0300 Subject: [PATCH 08/10] Statistics on ME + Dynamic backpressure + KDF with SOCKS Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com> --- src/config/defaults.rs | 12 + src/config/hot_reload.rs | 52 ++- src/config/load.rs | 20 + src/config/types.rs | 117 ++++++ src/main.rs | 27 ++ src/metrics.rs | 482 ++++++++++++++++++++---- src/stats/mod.rs | 292 ++++++++++++-- src/stats/telemetry.rs | 29 ++ src/transport/middle_proxy/handshake.rs | 37 +- src/transport/middle_proxy/ping.rs | 44 +++ src/transport/middle_proxy/pool.rs | 37 +- src/transport/middle_proxy/reader.rs | 18 +- src/transport/middle_proxy/registry.rs | 72 +++- 13 files changed, 1108 insertions(+), 131 deletions(-) create mode 100644 src/stats/telemetry.rs diff --git a/src/config/defaults.rs b/src/config/defaults.rs index d82f8ed..dbc251c 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -170,6 +170,18 @@ pub(crate) fn default_desync_all_full() -> bool { false } +pub(crate) fn default_me_route_backpressure_base_timeout_ms() -> u64 { + 25 +} + +pub(crate) fn default_me_route_backpressure_high_timeout_ms() -> u64 { + 120 +} + +pub(crate) fn default_me_route_backpressure_high_watermark_pct() -> u8 { + 80 +} + pub(crate) fn default_beobachten_minutes() -> u64 { 10 } diff --git a/src/config/hot_reload.rs b/src/config/hot_reload.rs index acc64cd..579a9cb 100644 --- a/src/config/hot_reload.rs +++ b/src/config/hot_reload.rs @@ -16,6 +16,7 @@ //! | `general` | `me_pool_drain_ttl_secs` | Applied on next ME map update | //! | `general` | `me_pool_min_fresh_ratio` | Applied on next ME map update | //! | `general` | `me_reinit_drain_timeout_secs`| Applied on next ME map update | +//! | `general` | `telemetry` / `me_*_policy` | Applied immediately | //! | `network` | `dns_overrides` | Applied immediately | //! | `access` | All user/quota fields | Effective immediately | //! @@ -30,7 +31,7 @@ use notify::{EventKind, RecursiveMode, Watcher, recommended_watcher}; use tokio::sync::{mpsc, watch}; use tracing::{error, info, warn}; -use crate::config::LogLevel; +use crate::config::{LogLevel, MeSocksKdfPolicy, MeTelemetryLevel}; use super::load::ProxyConfig; // ── Hot fields ──────────────────────────────────────────────────────────────── @@ -52,6 +53,13 @@ pub struct HotFields { pub me_keepalive_interval_secs: u64, pub me_keepalive_jitter_secs: u64, pub me_keepalive_payload_random: bool, + pub telemetry_core_enabled: bool, + pub telemetry_user_enabled: bool, + pub telemetry_me_level: MeTelemetryLevel, + pub me_socks_kdf_policy: MeSocksKdfPolicy, + pub me_route_backpressure_base_timeout_ms: u64, + pub me_route_backpressure_high_timeout_ms: u64, + pub me_route_backpressure_high_watermark_pct: u8, pub access: crate::config::AccessConfig, } @@ -72,6 +80,13 @@ impl HotFields { me_keepalive_interval_secs: cfg.general.me_keepalive_interval_secs, me_keepalive_jitter_secs: cfg.general.me_keepalive_jitter_secs, me_keepalive_payload_random: cfg.general.me_keepalive_payload_random, + telemetry_core_enabled: cfg.general.telemetry.core_enabled, + telemetry_user_enabled: cfg.general.telemetry.user_enabled, + telemetry_me_level: cfg.general.telemetry.me_level, + me_socks_kdf_policy: cfg.general.me_socks_kdf_policy, + me_route_backpressure_base_timeout_ms: cfg.general.me_route_backpressure_base_timeout_ms, + me_route_backpressure_high_timeout_ms: cfg.general.me_route_backpressure_high_timeout_ms, + me_route_backpressure_high_watermark_pct: cfg.general.me_route_backpressure_high_watermark_pct, access: cfg.access.clone(), } } @@ -262,6 +277,41 @@ fn log_changes( ); } + if old_hot.telemetry_core_enabled != new_hot.telemetry_core_enabled + || old_hot.telemetry_user_enabled != new_hot.telemetry_user_enabled + || old_hot.telemetry_me_level != new_hot.telemetry_me_level + { + info!( + "config reload: telemetry: core_enabled={} user_enabled={} me_level={}", + new_hot.telemetry_core_enabled, + new_hot.telemetry_user_enabled, + new_hot.telemetry_me_level, + ); + } + + if old_hot.me_socks_kdf_policy != new_hot.me_socks_kdf_policy { + info!( + "config reload: me_socks_kdf_policy: {:?} → {:?}", + old_hot.me_socks_kdf_policy, + new_hot.me_socks_kdf_policy, + ); + } + + if old_hot.me_route_backpressure_base_timeout_ms + != new_hot.me_route_backpressure_base_timeout_ms + || old_hot.me_route_backpressure_high_timeout_ms + != new_hot.me_route_backpressure_high_timeout_ms + || old_hot.me_route_backpressure_high_watermark_pct + != new_hot.me_route_backpressure_high_watermark_pct + { + info!( + "config reload: me_route_backpressure: base={}ms high={}ms watermark={}%", + new_hot.me_route_backpressure_base_timeout_ms, + new_hot.me_route_backpressure_high_timeout_ms, + new_hot.me_route_backpressure_high_watermark_pct, + ); + } + if old_hot.access.users != new_hot.access.users { let mut added: Vec<&String> = new_hot.access.users.keys() .filter(|u| !old_hot.access.users.contains_key(*u)) diff --git a/src/config/load.rs b/src/config/load.rs index c1bbdef..7c578a3 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -311,6 +311,26 @@ impl ProxyConfig { )); } + if config.general.me_route_backpressure_base_timeout_ms == 0 { + return Err(ProxyError::Config( + "general.me_route_backpressure_base_timeout_ms must be > 0".to_string(), + )); + } + + if config.general.me_route_backpressure_high_timeout_ms + < config.general.me_route_backpressure_base_timeout_ms + { + return Err(ProxyError::Config( + "general.me_route_backpressure_high_timeout_ms must be >= general.me_route_backpressure_base_timeout_ms".to_string(), + )); + } + + if !(1..=100).contains(&config.general.me_route_backpressure_high_watermark_pct) { + return Err(ProxyError::Config( + "general.me_route_backpressure_high_watermark_pct must be within [1, 100]".to_string(), + )); + } + if config.general.effective_me_pool_force_close_secs() > 0 && config.general.effective_me_pool_force_close_secs() < config.general.me_pool_drain_ttl_secs diff --git a/src/config/types.rs b/src/config/types.rs index 7d9f13a..902d816 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -59,6 +59,98 @@ impl std::fmt::Display for LogLevel { } } +/// Middle-End telemetry verbosity level. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "lowercase")] +pub enum MeTelemetryLevel { + #[default] + Normal, + Silent, + Debug, +} + +impl MeTelemetryLevel { + pub fn as_u8(self) -> u8 { + match self { + MeTelemetryLevel::Silent => 0, + MeTelemetryLevel::Normal => 1, + MeTelemetryLevel::Debug => 2, + } + } + + pub fn from_u8(raw: u8) -> Self { + match raw { + 0 => MeTelemetryLevel::Silent, + 2 => MeTelemetryLevel::Debug, + _ => MeTelemetryLevel::Normal, + } + } + + pub fn allows_normal(self) -> bool { + !matches!(self, MeTelemetryLevel::Silent) + } + + pub fn allows_debug(self) -> bool { + matches!(self, MeTelemetryLevel::Debug) + } +} + +impl std::fmt::Display for MeTelemetryLevel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + MeTelemetryLevel::Silent => write!(f, "silent"), + MeTelemetryLevel::Normal => write!(f, "normal"), + MeTelemetryLevel::Debug => write!(f, "debug"), + } + } +} + +/// Middle-End SOCKS KDF fallback policy. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "lowercase")] +pub enum MeSocksKdfPolicy { + #[default] + Strict, + Compat, +} + +impl MeSocksKdfPolicy { + pub fn as_u8(self) -> u8 { + match self { + MeSocksKdfPolicy::Strict => 0, + MeSocksKdfPolicy::Compat => 1, + } + } + + pub fn from_u8(raw: u8) -> Self { + match raw { + 1 => MeSocksKdfPolicy::Compat, + _ => MeSocksKdfPolicy::Strict, + } + } +} + +/// Telemetry controls for hot-path counters and ME diagnostics. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TelemetryConfig { + #[serde(default = "default_true")] + pub core_enabled: bool, + #[serde(default = "default_true")] + pub user_enabled: bool, + #[serde(default)] + pub me_level: MeTelemetryLevel, +} + +impl Default for TelemetryConfig { + fn default() -> Self { + Self { + core_enabled: default_true(), + user_enabled: default_true(), + me_level: MeTelemetryLevel::Normal, + } + } +} + // ============= Sub-Configs ============= #[derive(Debug, Clone, Serialize, Deserialize)] @@ -288,6 +380,26 @@ pub struct GeneralConfig { #[serde(default)] pub disable_colors: bool, + /// Runtime telemetry controls for counters/metrics in hot paths. + #[serde(default)] + pub telemetry: TelemetryConfig, + + /// SOCKS-bound KDF policy for Middle-End handshake. + #[serde(default)] + pub me_socks_kdf_policy: MeSocksKdfPolicy, + + /// Base backpressure timeout in milliseconds for ME route channel send. + #[serde(default = "default_me_route_backpressure_base_timeout_ms")] + pub me_route_backpressure_base_timeout_ms: u64, + + /// High backpressure timeout in milliseconds when queue occupancy is above watermark. + #[serde(default = "default_me_route_backpressure_high_timeout_ms")] + pub me_route_backpressure_high_timeout_ms: u64, + + /// Queue occupancy percent threshold for high backpressure timeout. + #[serde(default = "default_me_route_backpressure_high_watermark_pct")] + pub me_route_backpressure_high_watermark_pct: u8, + /// [general.links] — proxy link generation overrides. #[serde(default)] pub links: LinksConfig, @@ -414,6 +526,11 @@ impl Default for GeneralConfig { unknown_dc_log_path: default_unknown_dc_log_path(), log_level: LogLevel::Normal, disable_colors: false, + telemetry: TelemetryConfig::default(), + me_socks_kdf_policy: MeSocksKdfPolicy::Strict, + me_route_backpressure_base_timeout_ms: default_me_route_backpressure_base_timeout_ms(), + me_route_backpressure_high_timeout_ms: default_me_route_backpressure_high_timeout_ms(), + me_route_backpressure_high_watermark_pct: default_me_route_backpressure_high_watermark_pct(), links: LinksConfig::default(), crypto_pending_buffer: default_crypto_pending_buffer(), max_client_frame: default_max_client_frame(), diff --git a/src/main.rs b/src/main.rs index e985051..4d4d3f5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -36,6 +36,7 @@ use crate::ip_tracker::UserIpTracker; use crate::network::probe::{decide_network_capabilities, log_probe_result, run_probe}; use crate::proxy::ClientHandler; use crate::stats::beobachten::BeobachtenStore; +use crate::stats::telemetry::TelemetryPolicy; use crate::stats::{ReplayChecker, Stats}; use crate::stream::BufferPool; use crate::transport::middle_proxy::{ @@ -406,6 +407,7 @@ async fn main() -> std::result::Result<(), Box> { let prefer_ipv6 = decision.prefer_ipv6(); let mut use_middle_proxy = config.general.use_middle_proxy && (decision.ipv4_me || decision.ipv6_me); let stats = Arc::new(Stats::new()); + stats.apply_telemetry_policy(TelemetryPolicy::from_config(&config.general.telemetry)); let beobachten = Arc::new(BeobachtenStore::new()); let rng = Arc::new(SecureRandom::new()); @@ -539,6 +541,10 @@ async fn main() -> std::result::Result<(), Box> { config.general.me_hardswap_warmup_delay_max_ms, config.general.me_hardswap_warmup_extra_passes, config.general.me_hardswap_warmup_pass_backoff_base_ms, + config.general.me_socks_kdf_policy, + config.general.me_route_backpressure_base_timeout_ms, + config.general.me_route_backpressure_high_timeout_ms, + config.general.me_route_backpressure_high_watermark_pct, ); let pool_size = config.general.middle_proxy_pool_size.max(1); @@ -794,6 +800,27 @@ async fn main() -> std::result::Result<(), Box> { detected_ip_v6, ); + let stats_policy = stats.clone(); + let mut config_rx_policy = config_rx.clone(); + let me_pool_policy = me_pool.clone(); + tokio::spawn(async move { + loop { + if config_rx_policy.changed().await.is_err() { + break; + } + let cfg = config_rx_policy.borrow_and_update().clone(); + stats_policy.apply_telemetry_policy(TelemetryPolicy::from_config(&cfg.general.telemetry)); + if let Some(pool) = &me_pool_policy { + pool.update_runtime_transport_policy( + cfg.general.me_socks_kdf_policy, + cfg.general.me_route_backpressure_base_timeout_ms, + cfg.general.me_route_backpressure_high_timeout_ms, + cfg.general.me_route_backpressure_high_watermark_pct, + ); + } + } + }); + let beobachten_writer = beobachten.clone(); let config_rx_beobachten = config_rx.clone(); tokio::spawn(async move { diff --git a/src/metrics.rs b/src/metrics.rs index 63b337b..35f29ca 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -118,120 +118,394 @@ fn render_beobachten(beobachten: &BeobachtenStore, config: &ProxyConfig) -> Stri async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIpTracker) -> String { use std::fmt::Write; let mut out = String::with_capacity(4096); + let telemetry = stats.telemetry_policy(); + let core_enabled = telemetry.core_enabled; + let user_enabled = telemetry.user_enabled; + let me_allows_normal = telemetry.me_level.allows_normal(); + let me_allows_debug = telemetry.me_level.allows_debug(); let _ = writeln!(out, "# HELP telemt_uptime_seconds Proxy uptime"); let _ = writeln!(out, "# TYPE telemt_uptime_seconds gauge"); let _ = writeln!(out, "telemt_uptime_seconds {:.1}", stats.uptime_secs()); + let _ = writeln!(out, "# HELP telemt_telemetry_core_enabled Runtime core telemetry switch"); + let _ = writeln!(out, "# TYPE telemt_telemetry_core_enabled gauge"); + let _ = writeln!( + out, + "telemt_telemetry_core_enabled {}", + if core_enabled { 1 } else { 0 } + ); + + let _ = writeln!(out, "# HELP telemt_telemetry_user_enabled Runtime per-user telemetry switch"); + let _ = writeln!(out, "# TYPE telemt_telemetry_user_enabled gauge"); + let _ = writeln!( + out, + "telemt_telemetry_user_enabled {}", + if user_enabled { 1 } else { 0 } + ); + + let _ = writeln!(out, "# HELP telemt_telemetry_me_level Runtime ME telemetry level flag"); + let _ = writeln!(out, "# TYPE telemt_telemetry_me_level gauge"); + let _ = writeln!( + out, + "telemt_telemetry_me_level{{level=\"silent\"}} {}", + if matches!(telemetry.me_level, crate::config::MeTelemetryLevel::Silent) { + 1 + } else { + 0 + } + ); + let _ = writeln!( + out, + "telemt_telemetry_me_level{{level=\"normal\"}} {}", + if matches!(telemetry.me_level, crate::config::MeTelemetryLevel::Normal) { + 1 + } else { + 0 + } + ); + let _ = writeln!( + out, + "telemt_telemetry_me_level{{level=\"debug\"}} {}", + if matches!(telemetry.me_level, crate::config::MeTelemetryLevel::Debug) { + 1 + } else { + 0 + } + ); + let _ = writeln!(out, "# HELP telemt_connections_total Total accepted connections"); let _ = writeln!(out, "# TYPE telemt_connections_total counter"); - let _ = writeln!(out, "telemt_connections_total {}", stats.get_connects_all()); + let _ = writeln!( + out, + "telemt_connections_total {}", + if core_enabled { stats.get_connects_all() } else { 0 } + ); let _ = writeln!(out, "# HELP telemt_connections_bad_total Bad/rejected connections"); let _ = writeln!(out, "# TYPE telemt_connections_bad_total counter"); - let _ = writeln!(out, "telemt_connections_bad_total {}", stats.get_connects_bad()); + let _ = writeln!( + out, + "telemt_connections_bad_total {}", + if core_enabled { stats.get_connects_bad() } else { 0 } + ); let _ = writeln!(out, "# HELP telemt_handshake_timeouts_total Handshake timeouts"); let _ = writeln!(out, "# TYPE telemt_handshake_timeouts_total counter"); - let _ = writeln!(out, "telemt_handshake_timeouts_total {}", stats.get_handshake_timeouts()); + let _ = writeln!( + out, + "telemt_handshake_timeouts_total {}", + if core_enabled { + stats.get_handshake_timeouts() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_keepalive_sent_total ME keepalive frames sent"); let _ = writeln!(out, "# TYPE telemt_me_keepalive_sent_total counter"); - let _ = writeln!(out, "telemt_me_keepalive_sent_total {}", stats.get_me_keepalive_sent()); + let _ = writeln!( + out, + "telemt_me_keepalive_sent_total {}", + if me_allows_debug { + stats.get_me_keepalive_sent() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_keepalive_failed_total ME keepalive send failures"); let _ = writeln!(out, "# TYPE telemt_me_keepalive_failed_total counter"); - let _ = writeln!(out, "telemt_me_keepalive_failed_total {}", stats.get_me_keepalive_failed()); + let _ = writeln!( + out, + "telemt_me_keepalive_failed_total {}", + if me_allows_normal { + stats.get_me_keepalive_failed() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_keepalive_pong_total ME keepalive pong replies"); let _ = writeln!(out, "# TYPE telemt_me_keepalive_pong_total counter"); - let _ = writeln!(out, "telemt_me_keepalive_pong_total {}", stats.get_me_keepalive_pong()); + let _ = writeln!( + out, + "telemt_me_keepalive_pong_total {}", + if me_allows_debug { + stats.get_me_keepalive_pong() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_keepalive_timeout_total ME keepalive ping timeouts"); let _ = writeln!(out, "# TYPE telemt_me_keepalive_timeout_total counter"); - let _ = writeln!(out, "telemt_me_keepalive_timeout_total {}", stats.get_me_keepalive_timeout()); + let _ = writeln!( + out, + "telemt_me_keepalive_timeout_total {}", + if me_allows_normal { + stats.get_me_keepalive_timeout() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_reconnect_attempts_total ME reconnect attempts"); let _ = writeln!(out, "# TYPE telemt_me_reconnect_attempts_total counter"); - let _ = writeln!(out, "telemt_me_reconnect_attempts_total {}", stats.get_me_reconnect_attempts()); + let _ = writeln!( + out, + "telemt_me_reconnect_attempts_total {}", + if me_allows_normal { + stats.get_me_reconnect_attempts() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_reconnect_success_total ME reconnect successes"); let _ = writeln!(out, "# TYPE telemt_me_reconnect_success_total counter"); - let _ = writeln!(out, "telemt_me_reconnect_success_total {}", stats.get_me_reconnect_success()); + let _ = writeln!( + out, + "telemt_me_reconnect_success_total {}", + if me_allows_normal { + stats.get_me_reconnect_success() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_crc_mismatch_total ME CRC mismatches"); let _ = writeln!(out, "# TYPE telemt_me_crc_mismatch_total counter"); - let _ = writeln!(out, "telemt_me_crc_mismatch_total {}", stats.get_me_crc_mismatch()); + let _ = writeln!( + out, + "telemt_me_crc_mismatch_total {}", + if me_allows_normal { + stats.get_me_crc_mismatch() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_seq_mismatch_total ME sequence mismatches"); let _ = writeln!(out, "# TYPE telemt_me_seq_mismatch_total counter"); - let _ = writeln!(out, "telemt_me_seq_mismatch_total {}", stats.get_me_seq_mismatch()); + let _ = writeln!( + out, + "telemt_me_seq_mismatch_total {}", + if me_allows_normal { + stats.get_me_seq_mismatch() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_route_drop_no_conn_total ME route drops: no conn"); let _ = writeln!(out, "# TYPE telemt_me_route_drop_no_conn_total counter"); - let _ = writeln!(out, "telemt_me_route_drop_no_conn_total {}", stats.get_me_route_drop_no_conn()); + let _ = writeln!( + out, + "telemt_me_route_drop_no_conn_total {}", + if me_allows_normal { + stats.get_me_route_drop_no_conn() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_route_drop_channel_closed_total ME route drops: channel closed"); let _ = writeln!(out, "# TYPE telemt_me_route_drop_channel_closed_total counter"); - let _ = writeln!(out, "telemt_me_route_drop_channel_closed_total {}", stats.get_me_route_drop_channel_closed()); + let _ = writeln!( + out, + "telemt_me_route_drop_channel_closed_total {}", + if me_allows_normal { + stats.get_me_route_drop_channel_closed() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_me_route_drop_queue_full_total ME route drops: queue full"); let _ = writeln!(out, "# TYPE telemt_me_route_drop_queue_full_total counter"); - let _ = writeln!(out, "telemt_me_route_drop_queue_full_total {}", stats.get_me_route_drop_queue_full()); + let _ = writeln!( + out, + "telemt_me_route_drop_queue_full_total {}", + if me_allows_normal { + stats.get_me_route_drop_queue_full() + } else { + 0 + } + ); + + let _ = writeln!( + out, + "# HELP telemt_me_route_drop_queue_full_profile_total ME route drops: queue full by adaptive profile" + ); + let _ = writeln!( + out, + "# TYPE telemt_me_route_drop_queue_full_profile_total counter" + ); + let _ = writeln!( + out, + "telemt_me_route_drop_queue_full_profile_total{{profile=\"base\"}} {}", + if me_allows_normal { + stats.get_me_route_drop_queue_full_base() + } else { + 0 + } + ); + let _ = writeln!( + out, + "telemt_me_route_drop_queue_full_profile_total{{profile=\"high\"}} {}", + if me_allows_normal { + stats.get_me_route_drop_queue_full_high() + } else { + 0 + } + ); + + let _ = writeln!( + out, + "# HELP telemt_me_socks_kdf_policy_total SOCKS KDF policy outcomes" + ); + let _ = writeln!(out, "# TYPE telemt_me_socks_kdf_policy_total counter"); + let _ = writeln!( + out, + "telemt_me_socks_kdf_policy_total{{policy=\"strict\",outcome=\"reject\"}} {}", + if me_allows_normal { + stats.get_me_socks_kdf_strict_reject() + } else { + 0 + } + ); + let _ = writeln!( + out, + "telemt_me_socks_kdf_policy_total{{policy=\"compat\",outcome=\"fallback\"}} {}", + if me_allows_debug { + stats.get_me_socks_kdf_compat_fallback() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_secure_padding_invalid_total Invalid secure frame lengths"); let _ = writeln!(out, "# TYPE telemt_secure_padding_invalid_total counter"); - let _ = writeln!(out, "telemt_secure_padding_invalid_total {}", stats.get_secure_padding_invalid()); + let _ = writeln!( + out, + "telemt_secure_padding_invalid_total {}", + if me_allows_normal { + stats.get_secure_padding_invalid() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_desync_total Total crypto-desync detections"); let _ = writeln!(out, "# TYPE telemt_desync_total counter"); - let _ = writeln!(out, "telemt_desync_total {}", stats.get_desync_total()); + let _ = writeln!( + out, + "telemt_desync_total {}", + if me_allows_normal { + stats.get_desync_total() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_desync_full_logged_total Full forensic desync logs emitted"); let _ = writeln!(out, "# TYPE telemt_desync_full_logged_total counter"); - let _ = writeln!(out, "telemt_desync_full_logged_total {}", stats.get_desync_full_logged()); + let _ = writeln!( + out, + "telemt_desync_full_logged_total {}", + if me_allows_normal { + stats.get_desync_full_logged() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_desync_suppressed_total Suppressed desync forensic events"); let _ = writeln!(out, "# TYPE telemt_desync_suppressed_total counter"); - let _ = writeln!(out, "telemt_desync_suppressed_total {}", stats.get_desync_suppressed()); + let _ = writeln!( + out, + "telemt_desync_suppressed_total {}", + if me_allows_normal { + stats.get_desync_suppressed() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_desync_frames_bucket_total Desync count by frames_ok bucket"); let _ = writeln!(out, "# TYPE telemt_desync_frames_bucket_total counter"); let _ = writeln!( out, "telemt_desync_frames_bucket_total{{bucket=\"0\"}} {}", - stats.get_desync_frames_bucket_0() + if me_allows_normal { + stats.get_desync_frames_bucket_0() + } else { + 0 + } ); let _ = writeln!( out, "telemt_desync_frames_bucket_total{{bucket=\"1_2\"}} {}", - stats.get_desync_frames_bucket_1_2() + if me_allows_normal { + stats.get_desync_frames_bucket_1_2() + } else { + 0 + } ); let _ = writeln!( out, "telemt_desync_frames_bucket_total{{bucket=\"3_10\"}} {}", - stats.get_desync_frames_bucket_3_10() + if me_allows_normal { + stats.get_desync_frames_bucket_3_10() + } else { + 0 + } ); let _ = writeln!( out, "telemt_desync_frames_bucket_total{{bucket=\"gt_10\"}} {}", - stats.get_desync_frames_bucket_gt_10() + if me_allows_normal { + stats.get_desync_frames_bucket_gt_10() + } else { + 0 + } ); let _ = writeln!(out, "# HELP telemt_pool_swap_total Successful ME pool swaps"); let _ = writeln!(out, "# TYPE telemt_pool_swap_total counter"); - let _ = writeln!(out, "telemt_pool_swap_total {}", stats.get_pool_swap_total()); + let _ = writeln!( + out, + "telemt_pool_swap_total {}", + if me_allows_debug { + stats.get_pool_swap_total() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_pool_drain_active Active draining ME writers"); let _ = writeln!(out, "# TYPE telemt_pool_drain_active gauge"); - let _ = writeln!(out, "telemt_pool_drain_active {}", stats.get_pool_drain_active()); + let _ = writeln!( + out, + "telemt_pool_drain_active {}", + if me_allows_debug { + stats.get_pool_drain_active() + } else { + 0 + } + ); let _ = writeln!(out, "# HELP telemt_pool_force_close_total Forced close events for draining writers"); let _ = writeln!(out, "# TYPE telemt_pool_force_close_total counter"); let _ = writeln!( out, "telemt_pool_force_close_total {}", - stats.get_pool_force_close_total() + if me_allows_normal { + stats.get_pool_force_close_total() + } else { + 0 + } ); let _ = writeln!(out, "# HELP telemt_pool_stale_pick_total Stale writer fallback picks for new binds"); @@ -239,7 +513,11 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!( out, "telemt_pool_stale_pick_total {}", - stats.get_pool_stale_pick_total() + if me_allows_normal { + stats.get_pool_stale_pick_total() + } else { + 0 + } ); let _ = writeln!(out, "# HELP telemt_me_writer_removed_total Total ME writer removals"); @@ -247,7 +525,11 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!( out, "telemt_me_writer_removed_total {}", - stats.get_me_writer_removed_total() + if me_allows_debug { + stats.get_me_writer_removed_total() + } else { + 0 + } ); let _ = writeln!( @@ -258,7 +540,11 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!( out, "telemt_me_writer_removed_unexpected_total {}", - stats.get_me_writer_removed_unexpected_total() + if me_allows_normal { + stats.get_me_writer_removed_unexpected_total() + } else { + 0 + } ); let _ = writeln!(out, "# HELP telemt_me_refill_triggered_total Immediate ME refill runs started"); @@ -266,7 +552,11 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!( out, "telemt_me_refill_triggered_total {}", - stats.get_me_refill_triggered_total() + if me_allows_debug { + stats.get_me_refill_triggered_total() + } else { + 0 + } ); let _ = writeln!( @@ -277,7 +567,11 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!( out, "telemt_me_refill_skipped_inflight_total {}", - stats.get_me_refill_skipped_inflight_total() + if me_allows_debug { + stats.get_me_refill_skipped_inflight_total() + } else { + 0 + } ); let _ = writeln!(out, "# HELP telemt_me_refill_failed_total Immediate ME refill failures"); @@ -285,7 +579,11 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!( out, "telemt_me_refill_failed_total {}", - stats.get_me_refill_failed_total() + if me_allows_normal { + stats.get_me_refill_failed_total() + } else { + 0 + } ); let _ = writeln!( @@ -296,7 +594,11 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!( out, "telemt_me_writer_restored_same_endpoint_total {}", - stats.get_me_writer_restored_same_endpoint_total() + if me_allows_normal { + stats.get_me_writer_restored_same_endpoint_total() + } else { + 0 + } ); let _ = writeln!( @@ -307,16 +609,24 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!( out, "telemt_me_writer_restored_fallback_total {}", - stats.get_me_writer_restored_fallback_total() + if me_allows_normal { + stats.get_me_writer_restored_fallback_total() + } else { + 0 + } ); - let unresolved_writer_losses = stats - .get_me_writer_removed_unexpected_total() - .saturating_sub( - stats - .get_me_writer_restored_same_endpoint_total() - .saturating_add(stats.get_me_writer_restored_fallback_total()), - ); + let unresolved_writer_losses = if me_allows_normal { + stats + .get_me_writer_removed_unexpected_total() + .saturating_sub( + stats + .get_me_writer_restored_same_endpoint_total() + .saturating_add(stats.get_me_writer_restored_fallback_total()), + ) + } else { + 0 + }; let _ = writeln!( out, "# HELP telemt_me_writer_removed_unexpected_minus_restored_total Unexpected writer removals not yet compensated by restore" @@ -343,51 +653,63 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp let _ = writeln!(out, "# TYPE telemt_user_msgs_from_client counter"); let _ = writeln!(out, "# HELP telemt_user_msgs_to_client Per-user messages sent"); let _ = writeln!(out, "# TYPE telemt_user_msgs_to_client counter"); + let _ = writeln!( + out, + "# HELP telemt_telemetry_user_series_suppressed User-labeled metric series suppression flag" + ); + let _ = writeln!(out, "# TYPE telemt_telemetry_user_series_suppressed gauge"); + let _ = writeln!( + out, + "telemt_telemetry_user_series_suppressed {}", + if user_enabled { 0 } else { 1 } + ); - for entry in stats.iter_user_stats() { - let user = entry.key(); - let s = entry.value(); - let _ = writeln!(out, "telemt_user_connections_total{{user=\"{}\"}} {}", user, s.connects.load(std::sync::atomic::Ordering::Relaxed)); - let _ = writeln!(out, "telemt_user_connections_current{{user=\"{}\"}} {}", user, s.curr_connects.load(std::sync::atomic::Ordering::Relaxed)); - let _ = writeln!(out, "telemt_user_octets_from_client{{user=\"{}\"}} {}", user, s.octets_from_client.load(std::sync::atomic::Ordering::Relaxed)); - let _ = writeln!(out, "telemt_user_octets_to_client{{user=\"{}\"}} {}", user, s.octets_to_client.load(std::sync::atomic::Ordering::Relaxed)); - let _ = writeln!(out, "telemt_user_msgs_from_client{{user=\"{}\"}} {}", user, s.msgs_from_client.load(std::sync::atomic::Ordering::Relaxed)); - let _ = writeln!(out, "telemt_user_msgs_to_client{{user=\"{}\"}} {}", user, s.msgs_to_client.load(std::sync::atomic::Ordering::Relaxed)); - } + if user_enabled { + for entry in stats.iter_user_stats() { + let user = entry.key(); + let s = entry.value(); + let _ = writeln!(out, "telemt_user_connections_total{{user=\"{}\"}} {}", user, s.connects.load(std::sync::atomic::Ordering::Relaxed)); + let _ = writeln!(out, "telemt_user_connections_current{{user=\"{}\"}} {}", user, s.curr_connects.load(std::sync::atomic::Ordering::Relaxed)); + let _ = writeln!(out, "telemt_user_octets_from_client{{user=\"{}\"}} {}", user, s.octets_from_client.load(std::sync::atomic::Ordering::Relaxed)); + let _ = writeln!(out, "telemt_user_octets_to_client{{user=\"{}\"}} {}", user, s.octets_to_client.load(std::sync::atomic::Ordering::Relaxed)); + let _ = writeln!(out, "telemt_user_msgs_from_client{{user=\"{}\"}} {}", user, s.msgs_from_client.load(std::sync::atomic::Ordering::Relaxed)); + let _ = writeln!(out, "telemt_user_msgs_to_client{{user=\"{}\"}} {}", user, s.msgs_to_client.load(std::sync::atomic::Ordering::Relaxed)); + } - let ip_stats = ip_tracker.get_stats().await; - let ip_counts: HashMap = ip_stats - .into_iter() - .map(|(user, count, _)| (user, count)) - .collect(); + let ip_stats = ip_tracker.get_stats().await; + let ip_counts: HashMap = ip_stats + .into_iter() + .map(|(user, count, _)| (user, count)) + .collect(); - let mut unique_users = BTreeSet::new(); - unique_users.extend(config.access.user_max_unique_ips.keys().cloned()); - unique_users.extend(ip_counts.keys().cloned()); + let mut unique_users = BTreeSet::new(); + unique_users.extend(config.access.user_max_unique_ips.keys().cloned()); + unique_users.extend(ip_counts.keys().cloned()); - let _ = writeln!(out, "# HELP telemt_user_unique_ips_current Per-user current number of unique active IPs"); - let _ = writeln!(out, "# TYPE telemt_user_unique_ips_current gauge"); - let _ = writeln!(out, "# HELP telemt_user_unique_ips_limit Per-user configured unique IP limit (0 means unlimited)"); - let _ = writeln!(out, "# TYPE telemt_user_unique_ips_limit gauge"); - let _ = writeln!(out, "# HELP telemt_user_unique_ips_utilization Per-user unique IP usage ratio (0 for unlimited)"); - let _ = writeln!(out, "# TYPE telemt_user_unique_ips_utilization gauge"); + let _ = writeln!(out, "# HELP telemt_user_unique_ips_current Per-user current number of unique active IPs"); + let _ = writeln!(out, "# TYPE telemt_user_unique_ips_current gauge"); + let _ = writeln!(out, "# HELP telemt_user_unique_ips_limit Per-user configured unique IP limit (0 means unlimited)"); + let _ = writeln!(out, "# TYPE telemt_user_unique_ips_limit gauge"); + let _ = writeln!(out, "# HELP telemt_user_unique_ips_utilization Per-user unique IP usage ratio (0 for unlimited)"); + let _ = writeln!(out, "# TYPE telemt_user_unique_ips_utilization gauge"); - for user in unique_users { - let current = ip_counts.get(&user).copied().unwrap_or(0); - let limit = config.access.user_max_unique_ips.get(&user).copied().unwrap_or(0); - let utilization = if limit > 0 { - current as f64 / limit as f64 - } else { - 0.0 - }; - let _ = writeln!(out, "telemt_user_unique_ips_current{{user=\"{}\"}} {}", user, current); - let _ = writeln!(out, "telemt_user_unique_ips_limit{{user=\"{}\"}} {}", user, limit); - let _ = writeln!( - out, - "telemt_user_unique_ips_utilization{{user=\"{}\"}} {:.6}", - user, - utilization - ); + for user in unique_users { + let current = ip_counts.get(&user).copied().unwrap_or(0); + let limit = config.access.user_max_unique_ips.get(&user).copied().unwrap_or(0); + let utilization = if limit > 0 { + current as f64 / limit as f64 + } else { + 0.0 + }; + let _ = writeln!(out, "telemt_user_unique_ips_current{{user=\"{}\"}} {}", user, current); + let _ = writeln!(out, "telemt_user_unique_ips_limit{{user=\"{}\"}} {}", user, limit); + let _ = writeln!( + out, + "telemt_user_unique_ips_utilization{{user=\"{}\"}} {:.6}", + user, + utilization + ); + } } out diff --git a/src/stats/mod.rs b/src/stats/mod.rs index 1e32bb7..f5aa2b7 100644 --- a/src/stats/mod.rs +++ b/src/stats/mod.rs @@ -3,8 +3,9 @@ #![allow(dead_code)] pub mod beobachten; +pub mod telemetry; -use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; use std::time::{Instant, Duration}; use dashmap::DashMap; use parking_lot::Mutex; @@ -15,6 +16,9 @@ use std::collections::hash_map::DefaultHasher; use std::collections::VecDeque; use tracing::debug; +use crate::config::MeTelemetryLevel; +use self::telemetry::TelemetryPolicy; + // ============= Stats ============= #[derive(Default)] @@ -33,6 +37,10 @@ pub struct Stats { me_route_drop_no_conn: AtomicU64, me_route_drop_channel_closed: AtomicU64, me_route_drop_queue_full: AtomicU64, + me_route_drop_queue_full_base: AtomicU64, + me_route_drop_queue_full_high: AtomicU64, + me_socks_kdf_strict_reject: AtomicU64, + me_socks_kdf_compat_fallback: AtomicU64, secure_padding_invalid: AtomicU64, desync_total: AtomicU64, desync_full_logged: AtomicU64, @@ -52,6 +60,9 @@ pub struct Stats { me_refill_failed_total: AtomicU64, me_writer_restored_same_endpoint_total: AtomicU64, me_writer_restored_fallback_total: AtomicU64, + telemetry_core_enabled: AtomicBool, + telemetry_user_enabled: AtomicBool, + telemetry_me_level: AtomicU8, user_stats: DashMap, start_time: parking_lot::RwLock>, } @@ -69,44 +80,167 @@ pub struct UserStats { impl Stats { pub fn new() -> Self { let stats = Self::default(); + stats.apply_telemetry_policy(TelemetryPolicy::default()); *stats.start_time.write() = Some(Instant::now()); stats } - - pub fn increment_connects_all(&self) { self.connects_all.fetch_add(1, Ordering::Relaxed); } - pub fn increment_connects_bad(&self) { self.connects_bad.fetch_add(1, Ordering::Relaxed); } - pub fn increment_handshake_timeouts(&self) { self.handshake_timeouts.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_keepalive_sent(&self) { self.me_keepalive_sent.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_keepalive_failed(&self) { self.me_keepalive_failed.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_keepalive_pong(&self) { self.me_keepalive_pong.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_keepalive_timeout(&self) { self.me_keepalive_timeout.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_keepalive_timeout_by(&self, value: u64) { - self.me_keepalive_timeout.fetch_add(value, Ordering::Relaxed); + + fn telemetry_me_level(&self) -> MeTelemetryLevel { + MeTelemetryLevel::from_u8(self.telemetry_me_level.load(Ordering::Relaxed)) + } + + fn telemetry_core_enabled(&self) -> bool { + self.telemetry_core_enabled.load(Ordering::Relaxed) + } + + fn telemetry_user_enabled(&self) -> bool { + self.telemetry_user_enabled.load(Ordering::Relaxed) + } + + fn telemetry_me_allows_normal(&self) -> bool { + self.telemetry_me_level().allows_normal() + } + + fn telemetry_me_allows_debug(&self) -> bool { + self.telemetry_me_level().allows_debug() + } + + pub fn apply_telemetry_policy(&self, policy: TelemetryPolicy) { + self.telemetry_core_enabled + .store(policy.core_enabled, Ordering::Relaxed); + self.telemetry_user_enabled + .store(policy.user_enabled, Ordering::Relaxed); + self.telemetry_me_level + .store(policy.me_level.as_u8(), Ordering::Relaxed); + } + + pub fn telemetry_policy(&self) -> TelemetryPolicy { + TelemetryPolicy { + core_enabled: self.telemetry_core_enabled(), + user_enabled: self.telemetry_user_enabled(), + me_level: self.telemetry_me_level(), + } + } + + pub fn increment_connects_all(&self) { + if self.telemetry_core_enabled() { + self.connects_all.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_connects_bad(&self) { + if self.telemetry_core_enabled() { + self.connects_bad.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_handshake_timeouts(&self) { + if self.telemetry_core_enabled() { + self.handshake_timeouts.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_keepalive_sent(&self) { + if self.telemetry_me_allows_debug() { + self.me_keepalive_sent.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_keepalive_failed(&self) { + if self.telemetry_me_allows_normal() { + self.me_keepalive_failed.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_keepalive_pong(&self) { + if self.telemetry_me_allows_debug() { + self.me_keepalive_pong.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_keepalive_timeout(&self) { + if self.telemetry_me_allows_normal() { + self.me_keepalive_timeout.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_keepalive_timeout_by(&self, value: u64) { + if self.telemetry_me_allows_normal() { + self.me_keepalive_timeout.fetch_add(value, Ordering::Relaxed); + } + } + pub fn increment_me_reconnect_attempt(&self) { + if self.telemetry_me_allows_normal() { + self.me_reconnect_attempts.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_reconnect_success(&self) { + if self.telemetry_me_allows_normal() { + self.me_reconnect_success.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_crc_mismatch(&self) { + if self.telemetry_me_allows_normal() { + self.me_crc_mismatch.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_seq_mismatch(&self) { + if self.telemetry_me_allows_normal() { + self.me_seq_mismatch.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_route_drop_no_conn(&self) { + if self.telemetry_me_allows_normal() { + self.me_route_drop_no_conn.fetch_add(1, Ordering::Relaxed); + } } - pub fn increment_me_reconnect_attempt(&self) { self.me_reconnect_attempts.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_reconnect_success(&self) { self.me_reconnect_success.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_crc_mismatch(&self) { self.me_crc_mismatch.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_seq_mismatch(&self) { self.me_seq_mismatch.fetch_add(1, Ordering::Relaxed); } - pub fn increment_me_route_drop_no_conn(&self) { self.me_route_drop_no_conn.fetch_add(1, Ordering::Relaxed); } pub fn increment_me_route_drop_channel_closed(&self) { - self.me_route_drop_channel_closed.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.me_route_drop_channel_closed.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_me_route_drop_queue_full(&self) { - self.me_route_drop_queue_full.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.me_route_drop_queue_full.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_route_drop_queue_full_base(&self) { + if self.telemetry_me_allows_normal() { + self.me_route_drop_queue_full_base.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_route_drop_queue_full_high(&self) { + if self.telemetry_me_allows_normal() { + self.me_route_drop_queue_full_high.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_socks_kdf_strict_reject(&self) { + if self.telemetry_me_allows_normal() { + self.me_socks_kdf_strict_reject.fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_socks_kdf_compat_fallback(&self) { + if self.telemetry_me_allows_debug() { + self.me_socks_kdf_compat_fallback.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_secure_padding_invalid(&self) { - self.secure_padding_invalid.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.secure_padding_invalid.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_desync_total(&self) { - self.desync_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.desync_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_desync_full_logged(&self) { - self.desync_full_logged.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.desync_full_logged.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_desync_suppressed(&self) { - self.desync_suppressed.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.desync_suppressed.fetch_add(1, Ordering::Relaxed); + } } pub fn observe_desync_frames_ok(&self, frames_ok: u64) { + if !self.telemetry_me_allows_normal() { + return; + } match frames_ok { 0 => { self.desync_frames_bucket_0.fetch_add(1, Ordering::Relaxed); @@ -123,12 +257,19 @@ impl Stats { } } pub fn increment_pool_swap_total(&self) { - self.pool_swap_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_debug() { + self.pool_swap_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_pool_drain_active(&self) { - self.pool_drain_active.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_debug() { + self.pool_drain_active.fetch_add(1, Ordering::Relaxed); + } } pub fn decrement_pool_drain_active(&self) { + if !self.telemetry_me_allows_debug() { + return; + } let mut current = self.pool_drain_active.load(Ordering::Relaxed); loop { if current == 0 { @@ -146,31 +287,51 @@ impl Stats { } } pub fn increment_pool_force_close_total(&self) { - self.pool_force_close_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.pool_force_close_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_pool_stale_pick_total(&self) { - self.pool_stale_pick_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.pool_stale_pick_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_me_writer_removed_total(&self) { - self.me_writer_removed_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_debug() { + self.me_writer_removed_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_me_writer_removed_unexpected_total(&self) { - self.me_writer_removed_unexpected_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.me_writer_removed_unexpected_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_me_refill_triggered_total(&self) { - self.me_refill_triggered_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_debug() { + self.me_refill_triggered_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_me_refill_skipped_inflight_total(&self) { - self.me_refill_skipped_inflight_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_debug() { + self.me_refill_skipped_inflight_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_me_refill_failed_total(&self) { - self.me_refill_failed_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.me_refill_failed_total.fetch_add(1, Ordering::Relaxed); + } } pub fn increment_me_writer_restored_same_endpoint_total(&self) { - self.me_writer_restored_same_endpoint_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.me_writer_restored_same_endpoint_total + .fetch_add(1, Ordering::Relaxed); + } } pub fn increment_me_writer_restored_fallback_total(&self) { - self.me_writer_restored_fallback_total.fetch_add(1, Ordering::Relaxed); + if self.telemetry_me_allows_normal() { + self.me_writer_restored_fallback_total + .fetch_add(1, Ordering::Relaxed); + } } pub fn get_connects_all(&self) -> u64 { self.connects_all.load(Ordering::Relaxed) } pub fn get_connects_bad(&self) -> u64 { self.connects_bad.load(Ordering::Relaxed) } @@ -189,6 +350,18 @@ impl Stats { pub fn get_me_route_drop_queue_full(&self) -> u64 { self.me_route_drop_queue_full.load(Ordering::Relaxed) } + pub fn get_me_route_drop_queue_full_base(&self) -> u64 { + self.me_route_drop_queue_full_base.load(Ordering::Relaxed) + } + pub fn get_me_route_drop_queue_full_high(&self) -> u64 { + self.me_route_drop_queue_full_high.load(Ordering::Relaxed) + } + pub fn get_me_socks_kdf_strict_reject(&self) -> u64 { + self.me_socks_kdf_strict_reject.load(Ordering::Relaxed) + } + pub fn get_me_socks_kdf_compat_fallback(&self) -> u64 { + self.me_socks_kdf_compat_fallback.load(Ordering::Relaxed) + } pub fn get_secure_padding_invalid(&self) -> u64 { self.secure_padding_invalid.load(Ordering::Relaxed) } @@ -248,11 +421,17 @@ impl Stats { } pub fn increment_user_connects(&self, user: &str) { + if !self.telemetry_user_enabled() { + return; + } self.user_stats.entry(user.to_string()).or_default() .connects.fetch_add(1, Ordering::Relaxed); } pub fn increment_user_curr_connects(&self, user: &str) { + if !self.telemetry_user_enabled() { + return; + } self.user_stats.entry(user.to_string()).or_default() .curr_connects.fetch_add(1, Ordering::Relaxed); } @@ -285,21 +464,33 @@ impl Stats { } pub fn add_user_octets_from(&self, user: &str, bytes: u64) { + if !self.telemetry_user_enabled() { + return; + } self.user_stats.entry(user.to_string()).or_default() .octets_from_client.fetch_add(bytes, Ordering::Relaxed); } pub fn add_user_octets_to(&self, user: &str, bytes: u64) { + if !self.telemetry_user_enabled() { + return; + } self.user_stats.entry(user.to_string()).or_default() .octets_to_client.fetch_add(bytes, Ordering::Relaxed); } pub fn increment_user_msgs_from(&self, user: &str) { + if !self.telemetry_user_enabled() { + return; + } self.user_stats.entry(user.to_string()).or_default() .msgs_from_client.fetch_add(1, Ordering::Relaxed); } pub fn increment_user_msgs_to(&self, user: &str) { + if !self.telemetry_user_enabled() { + return; + } self.user_stats.entry(user.to_string()).or_default() .msgs_to_client.fetch_add(1, Ordering::Relaxed); } @@ -548,6 +739,7 @@ impl ReplayStats { #[cfg(test)] mod tests { use super::*; + use crate::config::MeTelemetryLevel; use std::sync::Arc; #[test] @@ -558,6 +750,40 @@ mod tests { stats.increment_connects_all(); assert_eq!(stats.get_connects_all(), 3); } + + #[test] + fn test_telemetry_policy_disables_core_and_user_counters() { + let stats = Stats::new(); + stats.apply_telemetry_policy(TelemetryPolicy { + core_enabled: false, + user_enabled: false, + me_level: MeTelemetryLevel::Normal, + }); + + stats.increment_connects_all(); + stats.increment_user_connects("alice"); + stats.add_user_octets_from("alice", 1024); + assert_eq!(stats.get_connects_all(), 0); + assert_eq!(stats.get_user_curr_connects("alice"), 0); + assert_eq!(stats.get_user_total_octets("alice"), 0); + } + + #[test] + fn test_telemetry_policy_me_silent_blocks_me_counters() { + let stats = Stats::new(); + stats.apply_telemetry_policy(TelemetryPolicy { + core_enabled: true, + user_enabled: true, + me_level: MeTelemetryLevel::Silent, + }); + + stats.increment_me_crc_mismatch(); + stats.increment_me_keepalive_sent(); + stats.increment_me_route_drop_queue_full(); + assert_eq!(stats.get_me_crc_mismatch(), 0); + assert_eq!(stats.get_me_keepalive_sent(), 0); + assert_eq!(stats.get_me_route_drop_queue_full(), 0); + } #[test] fn test_replay_checker_basic() { diff --git a/src/stats/telemetry.rs b/src/stats/telemetry.rs new file mode 100644 index 0000000..e29fa44 --- /dev/null +++ b/src/stats/telemetry.rs @@ -0,0 +1,29 @@ +use crate::config::{MeTelemetryLevel, TelemetryConfig}; + +/// Runtime telemetry policy used by hot-path counters. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct TelemetryPolicy { + pub core_enabled: bool, + pub user_enabled: bool, + pub me_level: MeTelemetryLevel, +} + +impl Default for TelemetryPolicy { + fn default() -> Self { + Self { + core_enabled: true, + user_enabled: true, + me_level: MeTelemetryLevel::Normal, + } + } +} + +impl TelemetryPolicy { + pub fn from_config(cfg: &TelemetryConfig) -> Self { + Self { + core_enabled: cfg.core_enabled, + user_enabled: cfg.user_enabled, + me_level: cfg.me_level, + } + } +} diff --git a/src/transport/middle_proxy/handshake.rs b/src/transport/middle_proxy/handshake.rs index 988834a..384ecc9 100644 --- a/src/transport/middle_proxy/handshake.rs +++ b/src/transport/middle_proxy/handshake.rs @@ -14,6 +14,7 @@ use tokio::net::{TcpStream, TcpSocket}; use tokio::time::timeout; use tracing::{debug, info, warn}; +use crate::config::MeSocksKdfPolicy; use crate::crypto::{SecureRandom, build_middleproxy_prekey, derive_middleproxy_keys, sha256}; use crate::error::{ProxyError, Result}; use crate::network::IpFamily; @@ -117,6 +118,13 @@ impl MePool { Some(bound) } + fn is_socks_route(upstream_egress: Option) -> bool { + matches!( + upstream_egress.map(|info| info.route_kind), + Some(UpstreamRouteKind::Socks4 | UpstreamRouteKind::Socks5) + ) + } + /// TCP connect with timeout + return RTT in milliseconds. pub(crate) async fn connect_tcp( &self, @@ -125,14 +133,7 @@ impl MePool { let start = Instant::now(); let (stream, upstream_egress) = if let Some(upstream) = &self.upstream { let dc_idx = self.resolve_dc_idx_for_endpoint(addr).await; - let (stream, egress) = timeout( - Duration::from_secs(ME_CONNECT_TIMEOUT_SECS), - upstream.connect_with_details(addr, dc_idx, None), - ) - .await - .map_err(|_| ProxyError::ConnectionTimeout { - addr: addr.to_string(), - })??; + let (stream, egress) = upstream.connect_with_details(addr, dc_idx, None).await?; (stream, Some(egress)) } else { let connect_fut = async { @@ -226,9 +227,29 @@ impl MePool { } else { IpFamily::V6 }; + let is_socks_route = Self::is_socks_route(upstream_egress); let socks_bound_addr = Self::select_socks_bound_addr(family, upstream_egress); let reflected = if let Some(bound) = socks_bound_addr { Some(bound) + } else if is_socks_route { + match self.socks_kdf_policy() { + MeSocksKdfPolicy::Strict => { + self.stats.increment_me_socks_kdf_strict_reject(); + return Err(ProxyError::InvalidHandshake( + "SOCKS route returned no valid BND.ADDR for ME KDF (strict policy)" + .to_string(), + )); + } + MeSocksKdfPolicy::Compat => { + self.stats.increment_me_socks_kdf_compat_fallback(); + if self.nat_probe { + let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress); + self.maybe_reflect_public_addr(family, bind_ip).await + } else { + None + } + } + } } else if self.nat_probe { let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress); self.maybe_reflect_public_addr(family, bind_ip).await diff --git a/src/transport/middle_proxy/ping.rs b/src/transport/middle_proxy/ping.rs index e90d98f..a05e44d 100644 --- a/src/transport/middle_proxy/ping.rs +++ b/src/transport/middle_proxy/ping.rs @@ -7,6 +7,7 @@ use tokio::net::UdpSocket; use crate::config::{UpstreamConfig, UpstreamType}; use crate::crypto::SecureRandom; use crate::error::ProxyError; +use crate::transport::{UpstreamEgressInfo, UpstreamRouteKind}; use super::MePool; @@ -20,6 +21,7 @@ pub enum MePingFamily { pub struct MePingSample { pub dc: i32, pub addr: SocketAddr, + pub route: Option, pub connect_ms: Option, pub handshake_ms: Option, pub error: Option, @@ -84,6 +86,34 @@ fn pick_target_for_family(reports: &[MePingReport], family: MePingFamily) -> Opt }) } +fn route_from_egress(egress: Option) -> Option { + let info = egress?; + match info.route_kind { + UpstreamRouteKind::Direct => { + let src_ip = info + .direct_bind_ip + .or_else(|| info.local_addr.map(|addr| addr.ip())); + let ip = src_ip?; + let mut parts = Vec::new(); + if let Some(dev) = detect_interface_for_ip(ip) { + parts.push(format!("dev={dev}")); + } + parts.push(format!("src={ip}")); + Some(format!("direct {}", parts.join(" "))) + } + UpstreamRouteKind::Socks4 => Some( + info.socks_bound_addr + .map(|addr| format!("socks4 bnd={addr}")) + .unwrap_or_else(|| "socks4".to_string()), + ), + UpstreamRouteKind::Socks5 => Some( + info.socks_bound_addr + .map(|addr| format!("socks5 bnd={addr}")) + .unwrap_or_else(|| "socks5".to_string()), + ), + } +} + #[cfg(unix)] fn detect_interface_for_ip(ip: IpAddr) -> Option { use nix::ifaddrs::getifaddrs; @@ -160,6 +190,15 @@ pub async fn format_me_route( v4_ok: bool, v6_ok: bool, ) -> String { + if let Some(route) = reports + .iter() + .flat_map(|report| report.samples.iter()) + .find(|sample| sample.error.is_none() && sample.handshake_ms.is_some()) + .and_then(|sample| sample.route.clone()) + { + return route; + } + let enabled_upstreams: Vec<_> = upstreams.iter().filter(|u| u.enabled).collect(); if enabled_upstreams.is_empty() { return detect_direct_route_details(reports, prefer_ipv6, v4_ok, v6_ok) @@ -222,6 +261,7 @@ mod tests { let s = sample(MePingSample { dc: 4, addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), 8888), + route: Some("direct src=1.2.3.4".to_string()), connect_ms: Some(12.3), handshake_ms: Some(34.7), error: None, @@ -238,6 +278,7 @@ mod tests { let s = sample(MePingSample { dc: -5, addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(5, 6, 7, 8)), 80), + route: Some("socks5".to_string()), connect_ms: Some(10.0), handshake_ms: None, error: Some("handshake timeout".to_string()), @@ -278,10 +319,12 @@ pub async fn run_me_ping(pool: &Arc, rng: &SecureRandom) -> Vec { connect_ms = Some(conn_rtt); + route = route_from_egress(upstream_egress); match pool.handshake_only(stream, addr, upstream_egress, rng).await { Ok(hs) => { handshake_ms = Some(hs.handshake_ms); @@ -302,6 +345,7 @@ pub async fn run_me_ping(pool: &Arc, rng: &SecureRandom) -> Vec Arc { + let registry = Arc::new(ConnRegistry::new()); + registry.update_route_backpressure_policy( + me_route_backpressure_base_timeout_ms, + me_route_backpressure_high_timeout_ms, + me_route_backpressure_high_watermark_pct, + ); Arc::new(Self { - registry: Arc::new(ConnRegistry::new()), + registry, writers: Arc::new(RwLock::new(Vec::new())), rr: AtomicU64::new(0), decision, @@ -204,6 +216,7 @@ impl MePool { me_hardswap_warmup_pass_backoff_base_ms: AtomicU64::new( me_hardswap_warmup_pass_backoff_base_ms, ), + me_socks_kdf_policy: AtomicU8::new(me_socks_kdf_policy.as_u8()), }) } @@ -260,6 +273,26 @@ impl MePool { &self.registry } + pub fn update_runtime_transport_policy( + &self, + socks_kdf_policy: MeSocksKdfPolicy, + route_backpressure_base_timeout_ms: u64, + route_backpressure_high_timeout_ms: u64, + route_backpressure_high_watermark_pct: u8, + ) { + self.me_socks_kdf_policy + .store(socks_kdf_policy.as_u8(), Ordering::Relaxed); + self.registry.update_route_backpressure_policy( + route_backpressure_base_timeout_ms, + route_backpressure_high_timeout_ms, + route_backpressure_high_watermark_pct, + ); + } + + pub(super) fn socks_kdf_policy(&self) -> MeSocksKdfPolicy { + MeSocksKdfPolicy::from_u8(self.me_socks_kdf_policy.load(Ordering::Relaxed)) + } + pub(super) fn writers_arc(&self) -> Arc>> { self.writers.clone() } diff --git a/src/transport/middle_proxy/reader.rs b/src/transport/middle_proxy/reader.rs index 95bd0d8..ea0dd75 100644 --- a/src/transport/middle_proxy/reader.rs +++ b/src/transport/middle_proxy/reader.rs @@ -124,7 +124,14 @@ pub(crate) async fn reader_loop( match routed { RouteResult::NoConn => stats.increment_me_route_drop_no_conn(), RouteResult::ChannelClosed => stats.increment_me_route_drop_channel_closed(), - RouteResult::QueueFull => stats.increment_me_route_drop_queue_full(), + RouteResult::QueueFullBase => { + stats.increment_me_route_drop_queue_full(); + stats.increment_me_route_drop_queue_full_base(); + } + RouteResult::QueueFullHigh => { + stats.increment_me_route_drop_queue_full(); + stats.increment_me_route_drop_queue_full_high(); + } RouteResult::Routed => {} } reg.unregister(cid).await; @@ -140,7 +147,14 @@ pub(crate) async fn reader_loop( match routed { RouteResult::NoConn => stats.increment_me_route_drop_no_conn(), RouteResult::ChannelClosed => stats.increment_me_route_drop_channel_closed(), - RouteResult::QueueFull => stats.increment_me_route_drop_queue_full(), + RouteResult::QueueFullBase => { + stats.increment_me_route_drop_queue_full(); + stats.increment_me_route_drop_queue_full_base(); + } + RouteResult::QueueFullHigh => { + stats.increment_me_route_drop_queue_full(); + stats.increment_me_route_drop_queue_full_high(); + } RouteResult::Routed => {} } reg.unregister(cid).await; diff --git a/src/transport/middle_proxy/registry.rs b/src/transport/middle_proxy/registry.rs index 2122ed8..223fa71 100644 --- a/src/transport/middle_proxy/registry.rs +++ b/src/transport/middle_proxy/registry.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; use std::net::SocketAddr; -use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::atomic::{AtomicU8, AtomicU64, Ordering}; use std::time::Duration; use tokio::sync::{mpsc, RwLock}; @@ -10,14 +10,17 @@ use super::codec::WriterCommand; use super::MeResponse; const ROUTE_CHANNEL_CAPACITY: usize = 4096; -const ROUTE_BACKPRESSURE_TIMEOUT: Duration = Duration::from_millis(25); +const ROUTE_BACKPRESSURE_BASE_TIMEOUT_MS: u64 = 25; +const ROUTE_BACKPRESSURE_HIGH_TIMEOUT_MS: u64 = 120; +const ROUTE_BACKPRESSURE_HIGH_WATERMARK_PCT: u8 = 80; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum RouteResult { Routed, NoConn, ChannelClosed, - QueueFull, + QueueFullBase, + QueueFullHigh, } #[derive(Clone)] @@ -65,6 +68,9 @@ impl RegistryInner { pub struct ConnRegistry { inner: RwLock, next_id: AtomicU64, + route_backpressure_base_timeout_ms: AtomicU64, + route_backpressure_high_timeout_ms: AtomicU64, + route_backpressure_high_watermark_pct: AtomicU8, } impl ConnRegistry { @@ -73,9 +79,35 @@ impl ConnRegistry { Self { inner: RwLock::new(RegistryInner::new()), next_id: AtomicU64::new(start), + route_backpressure_base_timeout_ms: AtomicU64::new( + ROUTE_BACKPRESSURE_BASE_TIMEOUT_MS, + ), + route_backpressure_high_timeout_ms: AtomicU64::new( + ROUTE_BACKPRESSURE_HIGH_TIMEOUT_MS, + ), + route_backpressure_high_watermark_pct: AtomicU8::new( + ROUTE_BACKPRESSURE_HIGH_WATERMARK_PCT, + ), } } + pub fn update_route_backpressure_policy( + &self, + base_timeout_ms: u64, + high_timeout_ms: u64, + high_watermark_pct: u8, + ) { + let base = base_timeout_ms.max(1); + let high = high_timeout_ms.max(base); + let watermark = high_watermark_pct.clamp(1, 100); + self.route_backpressure_base_timeout_ms + .store(base, Ordering::Relaxed); + self.route_backpressure_high_timeout_ms + .store(high, Ordering::Relaxed); + self.route_backpressure_high_watermark_pct + .store(watermark, Ordering::Relaxed); + } + pub async fn register(&self) -> (u64, mpsc::Receiver) { let id = self.next_id.fetch_add(1, Ordering::Relaxed); let (tx, rx) = mpsc::channel(ROUTE_CHANNEL_CAPACITY); @@ -112,10 +144,40 @@ impl ConnRegistry { Err(TrySendError::Closed(_)) => RouteResult::ChannelClosed, Err(TrySendError::Full(resp)) => { // Absorb short bursts without dropping/closing the session immediately. - match tokio::time::timeout(ROUTE_BACKPRESSURE_TIMEOUT, tx.send(resp)).await { + let base_timeout_ms = + self.route_backpressure_base_timeout_ms.load(Ordering::Relaxed).max(1); + let high_timeout_ms = self + .route_backpressure_high_timeout_ms + .load(Ordering::Relaxed) + .max(base_timeout_ms); + let high_watermark_pct = self + .route_backpressure_high_watermark_pct + .load(Ordering::Relaxed) + .clamp(1, 100); + let used = ROUTE_CHANNEL_CAPACITY.saturating_sub(tx.capacity()); + let used_pct = if ROUTE_CHANNEL_CAPACITY == 0 { + 100 + } else { + (used.saturating_mul(100) / ROUTE_CHANNEL_CAPACITY) as u8 + }; + let high_profile = used_pct >= high_watermark_pct; + let timeout_ms = if high_profile { + high_timeout_ms + } else { + base_timeout_ms + }; + let timeout_dur = Duration::from_millis(timeout_ms); + + match tokio::time::timeout(timeout_dur, tx.send(resp)).await { Ok(Ok(())) => RouteResult::Routed, Ok(Err(_)) => RouteResult::ChannelClosed, - Err(_) => RouteResult::QueueFull, + Err(_) => { + if high_profile { + RouteResult::QueueFullHigh + } else { + RouteResult::QueueFullBase + } + } } } } From 6c12af2b941f7d66deb0c2879f5c932ca6b29cf0 Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 13:38:30 +0300 Subject: [PATCH 09/10] ME Connectivity: socks-url Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com> --- src/transport/middle_proxy/ping.rs | 30 ++++++++++++++++++++---------- src/transport/upstream.rs | 6 ++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/transport/middle_proxy/ping.rs b/src/transport/middle_proxy/ping.rs index a05e44d..b9f0836 100644 --- a/src/transport/middle_proxy/ping.rs +++ b/src/transport/middle_proxy/ping.rs @@ -101,16 +101,26 @@ fn route_from_egress(egress: Option) -> Option { parts.push(format!("src={ip}")); Some(format!("direct {}", parts.join(" "))) } - UpstreamRouteKind::Socks4 => Some( - info.socks_bound_addr - .map(|addr| format!("socks4 bnd={addr}")) - .unwrap_or_else(|| "socks4".to_string()), - ), - UpstreamRouteKind::Socks5 => Some( - info.socks_bound_addr - .map(|addr| format!("socks5 bnd={addr}")) - .unwrap_or_else(|| "socks5".to_string()), - ), + UpstreamRouteKind::Socks4 => { + let route = info + .socks_proxy_addr + .map(|addr| format!("socks4://{addr}")) + .unwrap_or_else(|| "socks4://unknown".to_string()); + Some(match info.socks_bound_addr { + Some(bound) => format!("{route} bnd={bound}"), + None => route, + }) + } + UpstreamRouteKind::Socks5 => { + let route = info + .socks_proxy_addr + .map(|addr| format!("socks5://{addr}")) + .unwrap_or_else(|| "socks5://unknown".to_string()); + Some(match info.socks_bound_addr { + Some(bound) => format!("{route} bnd={bound}"), + None => route, + }) + } } } diff --git a/src/transport/upstream.rs b/src/transport/upstream.rs index 5ab198c..1918fdc 100644 --- a/src/transport/upstream.rs +++ b/src/transport/upstream.rs @@ -164,6 +164,7 @@ pub struct UpstreamEgressInfo { pub local_addr: Option, pub direct_bind_ip: Option, pub socks_bound_addr: Option, + pub socks_proxy_addr: Option, } // ============= Upstream Manager ============= @@ -523,6 +524,7 @@ impl UpstreamManager { local_addr, direct_bind_ip: bind_ip, socks_bound_addr: None, + socks_proxy_addr: None, }, )) }, @@ -593,6 +595,7 @@ impl UpstreamManager { } }; let local_addr = stream.local_addr().ok(); + let socks_proxy_addr = stream.peer_addr().ok(); Ok(( stream, UpstreamEgressInfo { @@ -600,6 +603,7 @@ impl UpstreamManager { local_addr, direct_bind_ip: None, socks_bound_addr: Some(bound.addr), + socks_proxy_addr, }, )) }, @@ -672,6 +676,7 @@ impl UpstreamManager { } }; let local_addr = stream.local_addr().ok(); + let socks_proxy_addr = stream.peer_addr().ok(); Ok(( stream, UpstreamEgressInfo { @@ -679,6 +684,7 @@ impl UpstreamManager { local_addr, direct_bind_ip: None, socks_bound_addr: Some(bound.addr), + socks_proxy_addr, }, )) }, From 9afaa28add0df19e40e196bb6e08583ad93e4ed3 Mon Sep 17 00:00:00 2001 From: Alexey <247128645+axkurcom@users.noreply.github.com> Date: Sat, 28 Feb 2026 14:21:09 +0300 Subject: [PATCH 10/10] UpstreamManager: Backoff Retries --- src/config/defaults.rs | 14 +++++ src/config/hot_reload.rs | 8 +++ src/config/load.rs | 76 +++++++++++++++++++++++ src/config/types.rs | 15 +++++ src/main.rs | 7 ++- src/transport/upstream.rs | 127 ++++++++++++++++++++++++++++---------- 6 files changed, 214 insertions(+), 33 deletions(-) diff --git a/src/config/defaults.rs b/src/config/defaults.rs index dbc251c..ab087fd 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -8,6 +8,8 @@ const DEFAULT_STUN_TCP_FALLBACK: bool = true; const DEFAULT_MIDDLE_PROXY_WARM_STANDBY: usize = 16; const DEFAULT_ME_RECONNECT_MAX_CONCURRENT_PER_DC: u32 = 8; const DEFAULT_ME_RECONNECT_FAST_RETRY_COUNT: u32 = 16; +const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 3; +const DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD: u32 = 4; const DEFAULT_LISTEN_ADDR_IPV6: &str = "::"; const DEFAULT_ACCESS_USER: &str = "default"; const DEFAULT_ACCESS_SECRET: &str = "00000000000000000000000000000000"; @@ -158,6 +160,18 @@ pub(crate) fn default_me_reconnect_fast_retry_count() -> u32 { DEFAULT_ME_RECONNECT_FAST_RETRY_COUNT } +pub(crate) fn default_upstream_connect_retry_attempts() -> u32 { + DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS +} + +pub(crate) fn default_upstream_connect_retry_backoff_ms() -> u64 { + 250 +} + +pub(crate) fn default_upstream_unhealthy_fail_threshold() -> u32 { + DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD +} + pub(crate) fn default_crypto_pending_buffer() -> usize { 256 * 1024 } diff --git a/src/config/hot_reload.rs b/src/config/hot_reload.rs index 579a9cb..eec6b8c 100644 --- a/src/config/hot_reload.rs +++ b/src/config/hot_reload.rs @@ -117,6 +117,14 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig) { if old.general.stun_nat_probe_concurrency != new.general.stun_nat_probe_concurrency { warn!("config reload: general.stun_nat_probe_concurrency changed; restart required"); } + if old.general.upstream_connect_retry_attempts != new.general.upstream_connect_retry_attempts + || old.general.upstream_connect_retry_backoff_ms + != new.general.upstream_connect_retry_backoff_ms + || old.general.upstream_unhealthy_fail_threshold + != new.general.upstream_unhealthy_fail_threshold + { + warn!("config reload: general.upstream_* changed; restart required"); + } } /// Resolve the public host for link generation — mirrors the logic in main.rs. diff --git a/src/config/load.rs b/src/config/load.rs index 7c578a3..3aafda2 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -237,6 +237,18 @@ impl ProxyConfig { )); } + if config.general.upstream_connect_retry_attempts == 0 { + return Err(ProxyError::Config( + "general.upstream_connect_retry_attempts must be > 0".to_string(), + )); + } + + if config.general.upstream_unhealthy_fail_threshold == 0 { + return Err(ProxyError::Config( + "general.upstream_unhealthy_fail_threshold must be > 0".to_string(), + )); + } + if config.general.me_reinit_every_secs == 0 { return Err(ProxyError::Config( "general.me_reinit_every_secs must be > 0".to_string(), @@ -567,6 +579,18 @@ mod tests { cfg.general.me_reconnect_fast_retry_count, default_me_reconnect_fast_retry_count() ); + assert_eq!( + cfg.general.upstream_connect_retry_attempts, + default_upstream_connect_retry_attempts() + ); + assert_eq!( + cfg.general.upstream_connect_retry_backoff_ms, + default_upstream_connect_retry_backoff_ms() + ); + assert_eq!( + cfg.general.upstream_unhealthy_fail_threshold, + default_upstream_unhealthy_fail_threshold() + ); assert_eq!(cfg.general.update_every, default_update_every()); assert_eq!(cfg.server.listen_addr_ipv4, default_listen_addr_ipv4()); assert_eq!(cfg.server.listen_addr_ipv6, default_listen_addr_ipv6_opt()); @@ -593,6 +617,18 @@ mod tests { general.me_reconnect_fast_retry_count, default_me_reconnect_fast_retry_count() ); + assert_eq!( + general.upstream_connect_retry_attempts, + default_upstream_connect_retry_attempts() + ); + assert_eq!( + general.upstream_connect_retry_backoff_ms, + default_upstream_connect_retry_backoff_ms() + ); + assert_eq!( + general.upstream_unhealthy_fail_threshold, + default_upstream_unhealthy_fail_threshold() + ); assert_eq!(general.update_every, default_update_every()); let server = ServerConfig::default(); @@ -765,6 +801,46 @@ mod tests { let _ = std::fs::remove_file(path); } + #[test] + fn upstream_connect_retry_attempts_zero_is_rejected() { + let toml = r#" + [general] + upstream_connect_retry_attempts = 0 + + [censorship] + tls_domain = "example.com" + + [access.users] + user = "00000000000000000000000000000000" + "#; + let dir = std::env::temp_dir(); + let path = dir.join("telemt_upstream_connect_retry_attempts_zero_test.toml"); + std::fs::write(&path, toml).unwrap(); + let err = ProxyConfig::load(&path).unwrap_err().to_string(); + assert!(err.contains("general.upstream_connect_retry_attempts must be > 0")); + let _ = std::fs::remove_file(path); + } + + #[test] + fn upstream_unhealthy_fail_threshold_zero_is_rejected() { + let toml = r#" + [general] + upstream_unhealthy_fail_threshold = 0 + + [censorship] + tls_domain = "example.com" + + [access.users] + user = "00000000000000000000000000000000" + "#; + let dir = std::env::temp_dir(); + let path = dir.join("telemt_upstream_unhealthy_fail_threshold_zero_test.toml"); + std::fs::write(&path, toml).unwrap(); + let err = ProxyConfig::load(&path).unwrap_err().to_string(); + assert!(err.contains("general.upstream_unhealthy_fail_threshold must be > 0")); + let _ = std::fs::remove_file(path); + } + #[test] fn me_hardswap_warmup_defaults_are_set() { let toml = r#" diff --git a/src/config/types.rs b/src/config/types.rs index 902d816..7a3f6e9 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -365,6 +365,18 @@ pub struct GeneralConfig { #[serde(default = "default_me_reconnect_fast_retry_count")] pub me_reconnect_fast_retry_count: u32, + /// Connect attempts for the selected upstream before returning error/fallback. + #[serde(default = "default_upstream_connect_retry_attempts")] + pub upstream_connect_retry_attempts: u32, + + /// Delay in milliseconds between upstream connect attempts. + #[serde(default = "default_upstream_connect_retry_backoff_ms")] + pub upstream_connect_retry_backoff_ms: u64, + + /// Consecutive failed requests before upstream is marked unhealthy. + #[serde(default = "default_upstream_unhealthy_fail_threshold")] + pub upstream_unhealthy_fail_threshold: u32, + /// Ignore STUN/interface IP mismatch (keep using Middle Proxy even if NAT detected). #[serde(default)] pub stun_iface_mismatch_ignore: bool, @@ -522,6 +534,9 @@ impl Default for GeneralConfig { me_reconnect_backoff_base_ms: default_reconnect_backoff_base_ms(), me_reconnect_backoff_cap_ms: default_reconnect_backoff_cap_ms(), me_reconnect_fast_retry_count: default_me_reconnect_fast_retry_count(), + upstream_connect_retry_attempts: default_upstream_connect_retry_attempts(), + upstream_connect_retry_backoff_ms: default_upstream_connect_retry_backoff_ms(), + upstream_unhealthy_fail_threshold: default_upstream_unhealthy_fail_threshold(), stun_iface_mismatch_ignore: false, unknown_dc_log_path: default_unknown_dc_log_path(), log_level: LogLevel::Normal, diff --git a/src/main.rs b/src/main.rs index 4d4d3f5..a87dd99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -261,7 +261,12 @@ async fn main() -> std::result::Result<(), Box> { warn!("Using default tls_domain. Consider setting a custom domain."); } - let upstream_manager = Arc::new(UpstreamManager::new(config.upstreams.clone())); + let upstream_manager = Arc::new(UpstreamManager::new( + config.upstreams.clone(), + config.general.upstream_connect_retry_attempts, + config.general.upstream_connect_retry_backoff_ms, + config.general.upstream_unhealthy_fail_threshold, + )); let mut tls_domains = Vec::with_capacity(1 + config.censorship.tls_domains.len()); tls_domains.push(config.censorship.tls_domain.clone()); diff --git a/src/transport/upstream.rs b/src/transport/upstream.rs index 1918fdc..8411f5a 100644 --- a/src/transport/upstream.rs +++ b/src/transport/upstream.rs @@ -172,10 +172,18 @@ pub struct UpstreamEgressInfo { #[derive(Clone)] pub struct UpstreamManager { upstreams: Arc>>, + connect_retry_attempts: u32, + connect_retry_backoff: Duration, + unhealthy_fail_threshold: u32, } impl UpstreamManager { - pub fn new(configs: Vec) -> Self { + pub fn new( + configs: Vec, + connect_retry_attempts: u32, + connect_retry_backoff_ms: u64, + unhealthy_fail_threshold: u32, + ) -> Self { let states = configs.into_iter() .filter(|c| c.enabled) .map(UpstreamState::new) @@ -183,6 +191,9 @@ impl UpstreamManager { Self { upstreams: Arc::new(RwLock::new(states)), + connect_retry_attempts: connect_retry_attempts.max(1), + connect_retry_backoff: Duration::from_millis(connect_retry_backoff_ms), + unhealthy_fail_threshold: unhealthy_fail_threshold.max(1), } } @@ -430,43 +441,83 @@ impl UpstreamManager { upstream.selected_scope = s.to_string(); } - let start = Instant::now(); - let bind_rr = { let guard = self.upstreams.read().await; guard.get(idx).map(|u| u.bind_rr.clone()) }; - match self.connect_via_upstream(&upstream, target, bind_rr).await { - Ok((stream, egress)) => { - let rtt_ms = start.elapsed().as_secs_f64() * 1000.0; - let mut guard = self.upstreams.write().await; - if let Some(u) = guard.get_mut(idx) { - if !u.healthy { - debug!(rtt_ms = format!("{:.1}", rtt_ms), "Upstream recovered"); - } - u.healthy = true; - u.fails = 0; + let mut last_error: Option = None; + for attempt in 1..=self.connect_retry_attempts { + let start = Instant::now(); + match self + .connect_via_upstream(&upstream, target, bind_rr.clone()) + .await + { + Ok((stream, egress)) => { + let rtt_ms = start.elapsed().as_secs_f64() * 1000.0; + let mut guard = self.upstreams.write().await; + if let Some(u) = guard.get_mut(idx) { + if !u.healthy { + debug!(rtt_ms = format!("{:.1}", rtt_ms), "Upstream recovered"); + } + if attempt > 1 { + debug!( + attempt, + attempts = self.connect_retry_attempts, + rtt_ms = format!("{:.1}", rtt_ms), + "Upstream connect recovered after retry" + ); + } + u.healthy = true; + u.fails = 0; - if let Some(di) = dc_idx.and_then(UpstreamState::dc_array_idx) { - u.dc_latency[di].update(rtt_ms); + if let Some(di) = dc_idx.and_then(UpstreamState::dc_array_idx) { + u.dc_latency[di].update(rtt_ms); + } } + return Ok((stream, egress)); } - Ok((stream, egress)) - }, - Err(e) => { - let mut guard = self.upstreams.write().await; - if let Some(u) = guard.get_mut(idx) { - u.fails += 1; - warn!(fails = u.fails, "Upstream failed: {}", e); - if u.fails > 3 { - u.healthy = false; - warn!("Upstream marked unhealthy"); + Err(e) => { + if attempt < self.connect_retry_attempts { + debug!( + attempt, + attempts = self.connect_retry_attempts, + target = %target, + error = %e, + "Upstream connect attempt failed, retrying" + ); + if !self.connect_retry_backoff.is_zero() { + tokio::time::sleep(self.connect_retry_backoff).await; + } } + last_error = Some(e); } - Err(e) } } + + let error = last_error.unwrap_or_else(|| { + ProxyError::Config("Upstream connect attempts exhausted".to_string()) + }); + + let mut guard = self.upstreams.write().await; + if let Some(u) = guard.get_mut(idx) { + u.fails += 1; + warn!( + fails = u.fails, + attempts = self.connect_retry_attempts, + "Upstream failed after retries: {}", + error + ); + if u.fails >= self.unhealthy_fail_threshold { + u.healthy = false; + warn!( + fails = u.fails, + threshold = self.unhealthy_fail_threshold, + "Upstream marked unhealthy" + ); + } + } + Err(error) } async fn connect_via_upstream( @@ -1035,18 +1086,26 @@ impl UpstreamManager { u.fails += 1; debug!(dc = dc_zero_idx + 1, fails = u.fails, "Health check failed (both): {}", e); - if u.fails > 3 { + if u.fails >= self.unhealthy_fail_threshold { u.healthy = false; - warn!("Upstream unhealthy (fails)"); + warn!( + fails = u.fails, + threshold = self.unhealthy_fail_threshold, + "Upstream unhealthy (fails)" + ); } } Err(_) => { u.fails += 1; debug!(dc = dc_zero_idx + 1, fails = u.fails, "Health check timeout (both)"); - if u.fails > 3 { + if u.fails >= self.unhealthy_fail_threshold { u.healthy = false; - warn!("Upstream unhealthy (timeout)"); + warn!( + fails = u.fails, + threshold = self.unhealthy_fail_threshold, + "Upstream unhealthy (timeout)" + ); } } } @@ -1057,9 +1116,13 @@ impl UpstreamManager { let mut guard = self.upstreams.write().await; let u = &mut guard[i]; u.fails += 1; - if u.fails > 3 { + if u.fails >= self.unhealthy_fail_threshold { u.healthy = false; - warn!("Upstream unhealthy (no fallback family)"); + warn!( + fails = u.fails, + threshold = self.unhealthy_fail_threshold, + "Upstream unhealthy (no fallback family)" + ); } u.last_check = std::time::Instant::now(); }