From 9f9ca9f2702f28bb726d081805d0ff249e2b9795 Mon Sep 17 00:00:00 2001 From: astronaut808 <38975427+astronaut808@users.noreply.github.com> Date: Sun, 3 May 2026 18:06:59 +0500 Subject: [PATCH] Add TLS front profile health metrics --- .../TLS_FRONT_PROFILE_FIDELITY.en.md | 49 +++- .../TLS_FRONT_PROFILE_FIDELITY.ru.md | 51 +++- src/maestro/mod.rs | 1 + src/maestro/runtime_tasks.rs | 4 + src/metrics.rs | 255 +++++++++++++++++- src/tls_front/cache.rs | 71 ++++- 6 files changed, 416 insertions(+), 15 deletions(-) diff --git a/docs/Architecture/Fronting-splitting/TLS_FRONT_PROFILE_FIDELITY.en.md b/docs/Architecture/Fronting-splitting/TLS_FRONT_PROFILE_FIDELITY.en.md index 21d6558..4c445ad 100644 --- a/docs/Architecture/Fronting-splitting/TLS_FRONT_PROFILE_FIDELITY.en.md +++ b/docs/Architecture/Fronting-splitting/TLS_FRONT_PROFILE_FIDELITY.en.md @@ -128,7 +128,48 @@ Recommended for cleaner testing: Persisted cache artifacts are useful, but they are not required if packet captures already demonstrate the runtime result. -### 4. Capture a direct-origin trace +### 4. Check TLS-front profile health metrics + +If the metrics endpoint is enabled, check the TLS-front profile health before packet-capture validation: + +```bash +curl -s http://127.0.0.1:9999/metrics | grep -E 'telemt_tls_front_profile|telemt_tls_fetch_profile_cache|telemt_tls_front_full_cert' +``` + +The profile-health metrics expose the runtime state of configured TLS front domains: + +- `telemt_tls_front_profile_domains` shows configured, emitted, and suppressed domain series. +- `telemt_tls_front_profile_info` shows profile source and feature flags per domain. +- `telemt_tls_front_profile_age_seconds` shows cached profile age. +- `telemt_tls_front_profile_app_data_records` shows cached AppData record count. +- `telemt_tls_front_profile_ticket_records` shows cached ticket-like tail record count. +- `telemt_tls_front_profile_change_cipher_spec_records` shows cached ChangeCipherSpec count. +- `telemt_tls_front_profile_app_data_bytes` shows total cached AppData bytes. + +Interpretation: + +- `source="merged"` or `source="raw"` means real TLS profile data is being used. +- `source="default"` or `is_default="true"` means the domain currently uses the synthetic default fallback. +- `has_cert_payload="true"` means certificate payload data is available for TLS emulation. +- Non-zero AppData/ticket/CCS counters show captured server-flight shape. + +Example healthy output: + +```text +telemt_tls_front_profile_domains{status="configured"} 1 +telemt_tls_front_profile_domains{status="emitted"} 1 +telemt_tls_front_profile_domains{status="suppressed"} 0 +telemt_tls_front_profile_info{domain="itunes.apple.com",source="merged",is_default="false",has_cert_info="true",has_cert_payload="true"} 1 +telemt_tls_front_profile_age_seconds{domain="itunes.apple.com"} 20 +telemt_tls_front_profile_app_data_records{domain="itunes.apple.com"} 3 +telemt_tls_front_profile_ticket_records{domain="itunes.apple.com"} 1 +telemt_tls_front_profile_change_cipher_spec_records{domain="itunes.apple.com"} 1 +telemt_tls_front_profile_app_data_bytes{domain="itunes.apple.com"} 5240 +``` + +These metrics do not prove byte-level origin equivalence. They are an operational health signal that the configured domain is backed by real cached profile data instead of default fallback data. + +### 5. Capture a direct-origin trace From a separate client host, connect directly to the origin: @@ -142,7 +183,7 @@ Capture with: sudo tcpdump -i any -w origin-direct.pcap host ORIGIN_IP and port 443 ``` -### 5. Capture a Telemt FakeTLS success-path trace +### 6. Capture a Telemt FakeTLS success-path trace Now connect to Telemt with a real Telegram client through an `ee` proxy link that targets the Telemt instance. @@ -154,7 +195,7 @@ Capture with: sudo tcpdump -i any -w telemt-emulated.pcap host TELEMT_IP and port 443 ``` -### 6. Decode TLS record structure +### 7. Decode TLS record structure Use `tshark` to print record-level structure: @@ -182,7 +223,7 @@ Focus on the server flight after ClientHello: - `20` = ChangeCipherSpec - `23` = ApplicationData -### 7. Build a comparison table +### 8. Build a comparison table A compact table like the following is usually enough: diff --git a/docs/Architecture/Fronting-splitting/TLS_FRONT_PROFILE_FIDELITY.ru.md b/docs/Architecture/Fronting-splitting/TLS_FRONT_PROFILE_FIDELITY.ru.md index be68ad7..b0214fc 100644 --- a/docs/Architecture/Fronting-splitting/TLS_FRONT_PROFILE_FIDELITY.ru.md +++ b/docs/Architecture/Fronting-splitting/TLS_FRONT_PROFILE_FIDELITY.ru.md @@ -126,9 +126,50 @@ openssl s_client -connect ORIGIN_IP:443 -servername YOUR_DOMAIN , shared_state: Arc, ip_tracker: Arc, + tls_cache: Option>, config_rx: watch::Receiver>, ) { // metrics_listen takes precedence; fall back to metrics_port for backward compat. @@ -363,6 +365,7 @@ pub(crate) async fn spawn_metrics_if_configured( let shared_state = shared_state.clone(); let config_rx_metrics = config_rx.clone(); let ip_tracker_metrics = ip_tracker.clone(); + let tls_cache_metrics = tls_cache.clone(); let whitelist = config.server.metrics_whitelist.clone(); let listen_backlog = config.server.listen_backlog; tokio::spawn(async move { @@ -374,6 +377,7 @@ pub(crate) async fn spawn_metrics_if_configured( beobachten, shared_state, ip_tracker_metrics, + tls_cache_metrics, config_rx_metrics, whitelist, ) diff --git a/src/metrics.rs b/src/metrics.rs index 670d0e0..e100d2d 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -18,12 +18,15 @@ use crate::ip_tracker::UserIpTracker; use crate::proxy::shared_state::ProxySharedState; use crate::stats::Stats; use crate::stats::beobachten::BeobachtenStore; +use crate::tls_front::TlsFrontCache; use crate::tls_front::cache; use crate::tls_front::fetcher; use crate::transport::{ListenOptions, create_listener}; // Keeps `/metrics` response size bounded when per-user telemetry is enabled. const USER_LABELED_METRICS_MAX_USERS: usize = 4096; +// Keeps TLS-front per-domain health series bounded for large generated configs. +const TLS_FRONT_PROFILE_HEALTH_MAX_DOMAINS: usize = 256; pub async fn serve( port: u16, @@ -33,6 +36,7 @@ pub async fn serve( beobachten: Arc, shared_state: Arc, ip_tracker: Arc, + tls_cache: Option>, config_rx: tokio::sync::watch::Receiver>, whitelist: Vec, ) { @@ -57,6 +61,7 @@ pub async fn serve( beobachten, shared_state, ip_tracker, + tls_cache, config_rx, whitelist, ) @@ -112,6 +117,7 @@ pub async fn serve( beobachten, shared_state, ip_tracker, + tls_cache, config_rx, whitelist, ) @@ -122,6 +128,7 @@ pub async fn serve( let beobachten_v6 = beobachten.clone(); let shared_state_v6 = shared_state.clone(); let ip_tracker_v6 = ip_tracker.clone(); + let tls_cache_v6 = tls_cache.clone(); let config_rx_v6 = config_rx.clone(); let whitelist_v6 = whitelist.clone(); tokio::spawn(async move { @@ -131,6 +138,7 @@ pub async fn serve( beobachten_v6, shared_state_v6, ip_tracker_v6, + tls_cache_v6, config_rx_v6, whitelist_v6, ) @@ -142,6 +150,7 @@ pub async fn serve( beobachten, shared_state, ip_tracker, + tls_cache, config_rx, whitelist, ) @@ -171,6 +180,7 @@ async fn serve_listener( beobachten: Arc, shared_state: Arc, ip_tracker: Arc, + tls_cache: Option>, config_rx: tokio::sync::watch::Receiver>, whitelist: Arc>, ) { @@ -192,6 +202,7 @@ async fn serve_listener( let beobachten = beobachten.clone(); let shared_state = shared_state.clone(); let ip_tracker = ip_tracker.clone(); + let tls_cache = tls_cache.clone(); let config_rx_conn = config_rx.clone(); tokio::spawn(async move { let svc = service_fn(move |req| { @@ -199,6 +210,7 @@ async fn serve_listener( let beobachten = beobachten.clone(); let shared_state = shared_state.clone(); let ip_tracker = ip_tracker.clone(); + let tls_cache = tls_cache.clone(); let config = config_rx_conn.borrow().clone(); async move { handle( @@ -207,6 +219,7 @@ async fn serve_listener( &beobachten, &shared_state, &ip_tracker, + tls_cache.as_deref(), &config, ) .await @@ -228,10 +241,11 @@ async fn handle( beobachten: &BeobachtenStore, shared_state: &ProxySharedState, ip_tracker: &UserIpTracker, + tls_cache: Option<&TlsFrontCache>, config: &ProxyConfig, ) -> Result>, Infallible> { if req.uri().path() == "/metrics" { - let body = render_metrics(stats, shared_state, config, ip_tracker).await; + let body = render_metrics(stats, shared_state, config, ip_tracker, tls_cache).await; let resp = Response::builder() .status(StatusCode::OK) .header("content-type", "text/plain; version=0.0.4; charset=utf-8") @@ -266,11 +280,151 @@ fn render_beobachten(beobachten: &BeobachtenStore, config: &ProxyConfig) -> Stri beobachten.snapshot_text(ttl) } +fn tls_front_domains(config: &ProxyConfig) -> Vec { + let mut domains = Vec::with_capacity(1 + config.censorship.tls_domains.len()); + if !config.censorship.tls_domain.is_empty() { + domains.push(config.censorship.tls_domain.clone()); + } + for domain in &config.censorship.tls_domains { + if !domain.is_empty() && !domains.contains(domain) { + domains.push(domain.clone()); + } + } + domains +} + +fn prometheus_label_value(value: &str) -> String { + value.replace('\\', "\\\\").replace('"', "\\\"") +} + +async fn render_tls_front_profile_health( + out: &mut String, + config: &ProxyConfig, + tls_cache: Option<&TlsFrontCache>, +) { + use std::fmt::Write; + + let domains = tls_front_domains(config); + let (health, suppressed) = match (config.censorship.tls_emulation, tls_cache) { + (true, Some(cache)) => { + cache + .profile_health_snapshot(&domains, TLS_FRONT_PROFILE_HEALTH_MAX_DOMAINS) + .await + } + _ => (Vec::new(), domains.len()), + }; + + let _ = writeln!( + out, + "# HELP telemt_tls_front_profile_domains TLS front configured profile domains by export status" + ); + let _ = writeln!(out, "# TYPE telemt_tls_front_profile_domains gauge"); + let _ = writeln!( + out, + "telemt_tls_front_profile_domains{{status=\"configured\"}} {}", + domains.len() + ); + let _ = writeln!( + out, + "telemt_tls_front_profile_domains{{status=\"emitted\"}} {}", + health.len() + ); + let _ = writeln!( + out, + "telemt_tls_front_profile_domains{{status=\"suppressed\"}} {}", + suppressed + ); + let _ = writeln!( + out, + "# HELP telemt_tls_front_profile_info TLS front profile source and feature flags per configured domain" + ); + let _ = writeln!(out, "# TYPE telemt_tls_front_profile_info gauge"); + let _ = writeln!( + out, + "# HELP telemt_tls_front_profile_age_seconds Age of cached TLS front profile data per configured domain" + ); + let _ = writeln!( + out, + "# TYPE telemt_tls_front_profile_age_seconds gauge" + ); + let _ = writeln!( + out, + "# HELP telemt_tls_front_profile_app_data_records TLS front cached app-data record count per configured domain" + ); + let _ = writeln!( + out, + "# TYPE telemt_tls_front_profile_app_data_records gauge" + ); + let _ = writeln!( + out, + "# HELP telemt_tls_front_profile_ticket_records TLS front cached ticket-like tail record count per configured domain" + ); + let _ = writeln!( + out, + "# TYPE telemt_tls_front_profile_ticket_records gauge" + ); + let _ = writeln!( + out, + "# HELP telemt_tls_front_profile_change_cipher_spec_records TLS front cached ChangeCipherSpec record count per configured domain" + ); + let _ = writeln!( + out, + "# TYPE telemt_tls_front_profile_change_cipher_spec_records gauge" + ); + let _ = writeln!( + out, + "# HELP telemt_tls_front_profile_app_data_bytes TLS front cached total app-data bytes per configured domain" + ); + let _ = writeln!( + out, + "# TYPE telemt_tls_front_profile_app_data_bytes gauge" + ); + + for item in health { + let domain = prometheus_label_value(&item.domain); + let _ = writeln!( + out, + "telemt_tls_front_profile_info{{domain=\"{}\",source=\"{}\",is_default=\"{}\",has_cert_info=\"{}\",has_cert_payload=\"{}\"}} 1", + domain, + item.source, + item.is_default, + item.has_cert_info, + item.has_cert_payload + ); + let _ = writeln!( + out, + "telemt_tls_front_profile_age_seconds{{domain=\"{}\"}} {}", + domain, item.age_seconds + ); + let _ = writeln!( + out, + "telemt_tls_front_profile_app_data_records{{domain=\"{}\"}} {}", + domain, item.app_data_records + ); + let _ = writeln!( + out, + "telemt_tls_front_profile_ticket_records{{domain=\"{}\"}} {}", + domain, item.ticket_records + ); + let _ = writeln!( + out, + "telemt_tls_front_profile_change_cipher_spec_records{{domain=\"{}\"}} {}", + domain, item.change_cipher_spec_count + ); + let _ = writeln!( + out, + "telemt_tls_front_profile_app_data_bytes{{domain=\"{}\"}} {}", + domain, item.total_app_data_len + ); + } +} + async fn render_metrics( stats: &Stats, shared_state: &ProxySharedState, config: &ProxyConfig, ip_tracker: &UserIpTracker, + tls_cache: Option<&TlsFrontCache>, ) -> String { use std::fmt::Write; let mut out = String::with_capacity(4096); @@ -423,6 +577,7 @@ async fn render_metrics( "telemt_tls_front_full_cert_budget_cap_drops_total {}", cache::full_cert_sent_cap_drops_for_metrics() ); + render_tls_front_profile_health(&mut out, config, tls_cache).await; let _ = writeln!( out, @@ -3361,6 +3516,11 @@ mod tests { use super::*; use http_body_util::BodyExt; use std::net::IpAddr; + use std::time::SystemTime; + + use crate::tls_front::types::{ + CachedTlsData, ParsedServerHello, TlsBehaviorProfile, TlsCertPayload, TlsProfileSource, + }; #[tokio::test] async fn test_render_metrics_format() { @@ -3429,7 +3589,7 @@ mod tests { .await .unwrap(); - let output = render_metrics(&stats, shared_state.as_ref(), &config, &tracker).await; + let output = render_metrics(&stats, shared_state.as_ref(), &config, &tracker, None).await; assert!(output.contains(&format!( "telemt_build_info{{version=\"{}\"}} 1", @@ -3494,13 +3654,86 @@ mod tests { assert!(output.contains("telemt_ip_tracker_cleanup_queue_len 0")); } + #[tokio::test] + async fn test_render_tls_front_profile_health() { + let stats = Stats::new(); + let shared_state = ProxySharedState::new(); + let tracker = UserIpTracker::new(); + let mut config = ProxyConfig::default(); + config.censorship.tls_domain = "primary.example".to_string(); + config.censorship.tls_domains = vec!["fallback.example".to_string()]; + + let cache = TlsFrontCache::new( + &[ + "primary.example".to_string(), + "fallback.example".to_string(), + ], + 1024, + "tlsfront-profile-health-test", + ); + cache + .set( + "primary.example", + CachedTlsData { + server_hello_template: ParsedServerHello { + version: [0x03, 0x03], + random: [0u8; 32], + session_id: Vec::new(), + cipher_suite: [0x13, 0x01], + compression: 0, + extensions: Vec::new(), + }, + cert_info: None, + cert_payload: Some(TlsCertPayload { + cert_chain_der: vec![vec![0x30, 0x01]], + certificate_message: vec![0x0b, 0x00, 0x00, 0x00], + }), + app_data_records_sizes: vec![1024, 512], + total_app_data_len: 1536, + behavior_profile: TlsBehaviorProfile { + change_cipher_spec_count: 1, + app_data_record_sizes: vec![1024, 512], + ticket_record_sizes: vec![69], + source: TlsProfileSource::Merged, + }, + fetched_at: SystemTime::now(), + domain: "primary.example".to_string(), + }, + ) + .await; + + let output = render_metrics(&stats, &shared_state, &config, &tracker, Some(&cache)).await; + + assert!(output.contains("telemt_tls_front_profile_domains{status=\"configured\"} 2")); + assert!(output.contains("telemt_tls_front_profile_domains{status=\"emitted\"} 2")); + assert!(output.contains("telemt_tls_front_profile_domains{status=\"suppressed\"} 0")); + assert!( + output.contains("telemt_tls_front_profile_info{domain=\"primary.example\",source=\"merged\",is_default=\"false\",has_cert_info=\"false\",has_cert_payload=\"true\"} 1") + ); + assert!( + output.contains("telemt_tls_front_profile_info{domain=\"fallback.example\",source=\"default\",is_default=\"true\",has_cert_info=\"false\",has_cert_payload=\"false\"} 1") + ); + assert!( + output.contains("telemt_tls_front_profile_app_data_records{domain=\"primary.example\"} 2") + ); + assert!( + output.contains("telemt_tls_front_profile_ticket_records{domain=\"primary.example\"} 1") + ); + assert!( + output.contains("telemt_tls_front_profile_change_cipher_spec_records{domain=\"primary.example\"} 1") + ); + assert!( + output.contains("telemt_tls_front_profile_app_data_bytes{domain=\"primary.example\"} 1536") + ); + } + #[tokio::test] async fn test_render_empty_stats() { let stats = Stats::new(); let shared_state = ProxySharedState::new(); let tracker = UserIpTracker::new(); let config = ProxyConfig::default(); - let output = render_metrics(&stats, &shared_state, &config, &tracker).await; + let output = render_metrics(&stats, &shared_state, &config, &tracker, None).await; assert!(output.contains("telemt_connections_total 0")); assert!(output.contains("telemt_connections_bad_total 0")); assert!(output.contains("telemt_handshake_timeouts_total 0")); @@ -3524,7 +3757,7 @@ mod tests { let mut config = ProxyConfig::default(); config.access.user_max_unique_ips_global_each = 2; - let output = render_metrics(&stats, &shared_state, &config, &tracker).await; + let output = render_metrics(&stats, &shared_state, &config, &tracker, None).await; assert!(output.contains("telemt_user_unique_ips_limit{user=\"alice\"} 2")); assert!(output.contains("telemt_user_unique_ips_utilization{user=\"alice\"} 0.500000")); @@ -3536,7 +3769,7 @@ mod tests { let shared_state = ProxySharedState::new(); let tracker = UserIpTracker::new(); let config = ProxyConfig::default(); - let output = render_metrics(&stats, &shared_state, &config, &tracker).await; + let output = render_metrics(&stats, &shared_state, &config, &tracker, None).await; assert!(output.contains("# TYPE telemt_uptime_seconds gauge")); assert!(output.contains("# TYPE telemt_connections_total counter")); assert!(output.contains("# TYPE telemt_connections_bad_total counter")); @@ -3585,6 +3818,15 @@ mod tests { assert!( output.contains("# TYPE telemt_tls_front_full_cert_budget_cap_drops_total counter") ); + assert!(output.contains("# TYPE telemt_tls_front_profile_domains gauge")); + assert!(output.contains("# TYPE telemt_tls_front_profile_info gauge")); + assert!(output.contains("# TYPE telemt_tls_front_profile_age_seconds gauge")); + assert!(output.contains("# TYPE telemt_tls_front_profile_app_data_records gauge")); + assert!(output.contains("# TYPE telemt_tls_front_profile_ticket_records gauge")); + assert!( + output.contains("# TYPE telemt_tls_front_profile_change_cipher_spec_records gauge") + ); + assert!(output.contains("# TYPE telemt_tls_front_profile_app_data_bytes gauge")); } #[tokio::test] @@ -3605,6 +3847,7 @@ mod tests { &beobachten, shared_state.as_ref(), &tracker, + None, &config, ) .await @@ -3639,6 +3882,7 @@ mod tests { &beobachten, shared_state.as_ref(), &tracker, + None, &config, ) .await @@ -3656,6 +3900,7 @@ mod tests { &beobachten, shared_state.as_ref(), &tracker, + None, &config, ) .await diff --git a/src/tls_front/cache.rs b/src/tls_front/cache.rs index f18084b..dc3d34e 100644 --- a/src/tls_front/cache.rs +++ b/src/tls_front/cache.rs @@ -12,7 +12,7 @@ use tokio::time::sleep; use tracing::{debug, info, warn}; use crate::tls_front::types::{ - CachedTlsData, ParsedServerHello, TlsBehaviorProfile, TlsFetchResult, + CachedTlsData, ParsedServerHello, TlsBehaviorProfile, TlsFetchResult, TlsProfileSource, }; const FULL_CERT_SENT_SWEEP_INTERVAL_SECS: u64 = 30; @@ -42,6 +42,30 @@ pub struct TlsFrontCache { disk_path: PathBuf, } +/// Read-only health view for one configured TLS front domain. +#[derive(Debug, Clone)] +pub(crate) struct TlsFrontProfileHealth { + pub(crate) domain: String, + pub(crate) source: &'static str, + pub(crate) age_seconds: u64, + pub(crate) is_default: bool, + pub(crate) has_cert_info: bool, + pub(crate) has_cert_payload: bool, + pub(crate) app_data_records: usize, + pub(crate) ticket_records: usize, + pub(crate) change_cipher_spec_count: u8, + pub(crate) total_app_data_len: usize, +} + +fn profile_source_label(source: TlsProfileSource) -> &'static str { + match source { + TlsProfileSource::Default => "default", + TlsProfileSource::Raw => "raw", + TlsProfileSource::Rustls => "rustls", + TlsProfileSource::Merged => "merged", + } +} + #[allow(dead_code)] impl TlsFrontCache { pub fn new(domains: &[String], default_len: usize, disk_path: impl AsRef) -> Self { @@ -93,6 +117,51 @@ impl TlsFrontCache { self.memory.read().await.contains_key(domain) } + pub(crate) async fn profile_health_snapshot( + &self, + domains: &[String], + max_domains: usize, + ) -> (Vec, usize) { + let guard = self.memory.read().await; + let now = SystemTime::now(); + let mut snapshot = Vec::with_capacity(domains.len().min(max_domains)); + let mut suppressed = 0usize; + + for domain in domains { + if snapshot.len() >= max_domains { + suppressed = suppressed.saturating_add(1); + continue; + } + + let cached = guard + .get(domain) + .cloned() + .unwrap_or_else(|| self.default.clone()); + let behavior = &cached.behavior_profile; + let age_seconds = now + .duration_since(cached.fetched_at) + .map(|duration| duration.as_secs()) + .unwrap_or(0); + + snapshot.push(TlsFrontProfileHealth { + domain: domain.clone(), + source: profile_source_label(behavior.source), + age_seconds, + is_default: cached.domain == "default", + has_cert_info: cached.cert_info.is_some(), + has_cert_payload: cached.cert_payload.is_some(), + app_data_records: cached.app_data_records_sizes.len().max( + behavior.app_data_record_sizes.len(), + ), + ticket_records: behavior.ticket_record_sizes.len(), + change_cipher_spec_count: behavior.change_cipher_spec_count, + total_app_data_len: cached.total_app_data_len, + }); + } + + (snapshot, suppressed) + } + fn full_cert_sent_shard_index(client_ip: IpAddr) -> usize { let mut hasher = DefaultHasher::new(); client_ip.hash(&mut hasher);