mirror of
https://github.com/telemt/telemt.git
synced 2026-04-16 18:14:10 +03:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
342b0119dd | ||
|
|
2605929b93 | ||
|
|
36814b6355 | ||
|
|
269ba537ad | ||
|
|
5c0eb6dbe8 | ||
|
|
dd07fa9453 | ||
|
|
bb1a372ac4 | ||
|
|
6661401a34 | ||
|
|
cd65fb432b | ||
|
|
caf0717789 | ||
|
|
4a610d83a3 | ||
|
|
aba4205dcc | ||
|
|
ef9b7b1492 |
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "telemt"
|
name = "telemt"
|
||||||
version = "3.3.26"
|
version = "3.3.28"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -21,9 +21,7 @@
|
|||||||
|
|
||||||
#### О релизах
|
#### О релизах
|
||||||
|
|
||||||
[3.3.15](https://github.com/telemt/telemt/releases/tag/3.3.15) по итогам работы в продакшн признан одним из самых стабильных и рекомендуется к использованию, когда cutting-edge фичи некритичны!
|
[3.3.27](https://github.com/telemt/telemt/releases/tag/3.3.27) даёт баланс стабильности и передового функционала, а так же последние исправления по безопасности и багам
|
||||||
|
|
||||||
[3.3.24](https://github.com/telemt/telemt/releases/tag/3.3.24) даёт баланс стабильности и передового функционала, а так же последние исправления по безопасности и багам
|
|
||||||
|
|
||||||
Будем рады вашему фидбеку и предложениям по улучшению — особенно в части **API**, **статистики**, **UX**
|
Будем рады вашему фидбеку и предложениям по улучшению — особенно в части **API**, **статистики**, **UX**
|
||||||
|
|
||||||
@@ -44,9 +42,7 @@
|
|||||||
|
|
||||||
#### About releases
|
#### About releases
|
||||||
|
|
||||||
[3.3.15](https://github.com/telemt/telemt/releases/tag/3.3.15) is, based on the results of his work in production, recognized as one of the most stable and recommended for use when cutting-edge features are not so necessary!
|
[3.3.27](https://github.com/telemt/telemt/releases/tag/3.3.27) provides a balance of stability and advanced functionality, as well as the latest security and bug fixes
|
||||||
|
|
||||||
[3.3.24](https://github.com/telemt/telemt/releases/tag/3.3.24) provides a balance of stability and advanced functionality, as well as the latest security and bug fixes
|
|
||||||
|
|
||||||
We are looking forward to your feedback and improvement proposals — especially regarding **API**, **statistics**, **UX**
|
We are looking forward to your feedback and improvement proposals — especially regarding **API**, **statistics**, **UX**
|
||||||
|
|
||||||
|
|||||||
@@ -205,6 +205,16 @@ pub(super) struct ZeroPoolData {
|
|||||||
pub(super) refill_failed_total: u64,
|
pub(super) refill_failed_total: u64,
|
||||||
pub(super) writer_restored_same_endpoint_total: u64,
|
pub(super) writer_restored_same_endpoint_total: u64,
|
||||||
pub(super) writer_restored_fallback_total: u64,
|
pub(super) writer_restored_fallback_total: u64,
|
||||||
|
pub(super) teardown_attempt_total_normal: u64,
|
||||||
|
pub(super) teardown_attempt_total_hard_detach: u64,
|
||||||
|
pub(super) teardown_success_total_normal: u64,
|
||||||
|
pub(super) teardown_success_total_hard_detach: u64,
|
||||||
|
pub(super) teardown_timeout_total: u64,
|
||||||
|
pub(super) teardown_escalation_total: u64,
|
||||||
|
pub(super) teardown_noop_total: u64,
|
||||||
|
pub(super) teardown_cleanup_side_effect_failures_total: u64,
|
||||||
|
pub(super) teardown_duration_count_total: u64,
|
||||||
|
pub(super) teardown_duration_sum_seconds_total: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Clone)]
|
#[derive(Serialize, Clone)]
|
||||||
|
|||||||
@@ -4,6 +4,9 @@ use std::time::{SystemTime, UNIX_EPOCH};
|
|||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::config::ProxyConfig;
|
use crate::config::ProxyConfig;
|
||||||
|
use crate::stats::{
|
||||||
|
MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason, Stats,
|
||||||
|
};
|
||||||
|
|
||||||
use super::ApiShared;
|
use super::ApiShared;
|
||||||
|
|
||||||
@@ -98,6 +101,50 @@ pub(super) struct RuntimeMeQualityCountersData {
|
|||||||
pub(super) reconnect_success_total: u64,
|
pub(super) reconnect_success_total: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub(super) struct RuntimeMeQualityTeardownAttemptData {
|
||||||
|
pub(super) reason: &'static str,
|
||||||
|
pub(super) mode: &'static str,
|
||||||
|
pub(super) total: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub(super) struct RuntimeMeQualityTeardownSuccessData {
|
||||||
|
pub(super) mode: &'static str,
|
||||||
|
pub(super) total: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub(super) struct RuntimeMeQualityTeardownSideEffectData {
|
||||||
|
pub(super) step: &'static str,
|
||||||
|
pub(super) total: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub(super) struct RuntimeMeQualityTeardownDurationBucketData {
|
||||||
|
pub(super) le_seconds: &'static str,
|
||||||
|
pub(super) total: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub(super) struct RuntimeMeQualityTeardownDurationData {
|
||||||
|
pub(super) mode: &'static str,
|
||||||
|
pub(super) count: u64,
|
||||||
|
pub(super) sum_seconds: f64,
|
||||||
|
pub(super) buckets: Vec<RuntimeMeQualityTeardownDurationBucketData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub(super) struct RuntimeMeQualityTeardownData {
|
||||||
|
pub(super) attempts: Vec<RuntimeMeQualityTeardownAttemptData>,
|
||||||
|
pub(super) success: Vec<RuntimeMeQualityTeardownSuccessData>,
|
||||||
|
pub(super) timeout_total: u64,
|
||||||
|
pub(super) escalation_total: u64,
|
||||||
|
pub(super) noop_total: u64,
|
||||||
|
pub(super) cleanup_side_effect_failures: Vec<RuntimeMeQualityTeardownSideEffectData>,
|
||||||
|
pub(super) duration: Vec<RuntimeMeQualityTeardownDurationData>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
pub(super) struct RuntimeMeQualityRouteDropData {
|
pub(super) struct RuntimeMeQualityRouteDropData {
|
||||||
pub(super) no_conn_total: u64,
|
pub(super) no_conn_total: u64,
|
||||||
@@ -107,6 +154,25 @@ pub(super) struct RuntimeMeQualityRouteDropData {
|
|||||||
pub(super) queue_full_high_total: u64,
|
pub(super) queue_full_high_total: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub(super) struct RuntimeMeQualityFamilyStateData {
|
||||||
|
pub(super) family: &'static str,
|
||||||
|
pub(super) state: &'static str,
|
||||||
|
pub(super) state_since_epoch_secs: u64,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub(super) suppressed_until_epoch_secs: Option<u64>,
|
||||||
|
pub(super) fail_streak: u32,
|
||||||
|
pub(super) recover_success_streak: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub(super) struct RuntimeMeQualityDrainGateData {
|
||||||
|
pub(super) route_quorum_ok: bool,
|
||||||
|
pub(super) redundancy_ok: bool,
|
||||||
|
pub(super) block_reason: &'static str,
|
||||||
|
pub(super) updated_at_epoch_secs: u64,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
pub(super) struct RuntimeMeQualityDcRttData {
|
pub(super) struct RuntimeMeQualityDcRttData {
|
||||||
pub(super) dc: i16,
|
pub(super) dc: i16,
|
||||||
@@ -120,7 +186,10 @@ pub(super) struct RuntimeMeQualityDcRttData {
|
|||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
pub(super) struct RuntimeMeQualityPayload {
|
pub(super) struct RuntimeMeQualityPayload {
|
||||||
pub(super) counters: RuntimeMeQualityCountersData,
|
pub(super) counters: RuntimeMeQualityCountersData,
|
||||||
|
pub(super) teardown: RuntimeMeQualityTeardownData,
|
||||||
pub(super) route_drops: RuntimeMeQualityRouteDropData,
|
pub(super) route_drops: RuntimeMeQualityRouteDropData,
|
||||||
|
pub(super) family_states: Vec<RuntimeMeQualityFamilyStateData>,
|
||||||
|
pub(super) drain_gate: RuntimeMeQualityDrainGateData,
|
||||||
pub(super) dc_rtt: Vec<RuntimeMeQualityDcRttData>,
|
pub(super) dc_rtt: Vec<RuntimeMeQualityDcRttData>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -361,6 +430,19 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
|
|||||||
};
|
};
|
||||||
|
|
||||||
let status = pool.api_status_snapshot().await;
|
let status = pool.api_status_snapshot().await;
|
||||||
|
let family_states = pool
|
||||||
|
.api_family_state_snapshot()
|
||||||
|
.into_iter()
|
||||||
|
.map(|entry| RuntimeMeQualityFamilyStateData {
|
||||||
|
family: entry.family,
|
||||||
|
state: entry.state,
|
||||||
|
state_since_epoch_secs: entry.state_since_epoch_secs,
|
||||||
|
suppressed_until_epoch_secs: entry.suppressed_until_epoch_secs,
|
||||||
|
fail_streak: entry.fail_streak,
|
||||||
|
recover_success_streak: entry.recover_success_streak,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let drain_gate_snapshot = pool.api_drain_gate_snapshot();
|
||||||
RuntimeMeQualityData {
|
RuntimeMeQualityData {
|
||||||
enabled: true,
|
enabled: true,
|
||||||
reason: None,
|
reason: None,
|
||||||
@@ -374,6 +456,7 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
|
|||||||
reconnect_attempt_total: shared.stats.get_me_reconnect_attempts(),
|
reconnect_attempt_total: shared.stats.get_me_reconnect_attempts(),
|
||||||
reconnect_success_total: shared.stats.get_me_reconnect_success(),
|
reconnect_success_total: shared.stats.get_me_reconnect_success(),
|
||||||
},
|
},
|
||||||
|
teardown: build_runtime_me_teardown_data(shared),
|
||||||
route_drops: RuntimeMeQualityRouteDropData {
|
route_drops: RuntimeMeQualityRouteDropData {
|
||||||
no_conn_total: shared.stats.get_me_route_drop_no_conn(),
|
no_conn_total: shared.stats.get_me_route_drop_no_conn(),
|
||||||
channel_closed_total: shared.stats.get_me_route_drop_channel_closed(),
|
channel_closed_total: shared.stats.get_me_route_drop_channel_closed(),
|
||||||
@@ -381,6 +464,13 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
|
|||||||
queue_full_base_total: shared.stats.get_me_route_drop_queue_full_base(),
|
queue_full_base_total: shared.stats.get_me_route_drop_queue_full_base(),
|
||||||
queue_full_high_total: shared.stats.get_me_route_drop_queue_full_high(),
|
queue_full_high_total: shared.stats.get_me_route_drop_queue_full_high(),
|
||||||
},
|
},
|
||||||
|
family_states,
|
||||||
|
drain_gate: RuntimeMeQualityDrainGateData {
|
||||||
|
route_quorum_ok: drain_gate_snapshot.route_quorum_ok,
|
||||||
|
redundancy_ok: drain_gate_snapshot.redundancy_ok,
|
||||||
|
block_reason: drain_gate_snapshot.block_reason,
|
||||||
|
updated_at_epoch_secs: drain_gate_snapshot.updated_at_epoch_secs,
|
||||||
|
},
|
||||||
dc_rtt: status
|
dc_rtt: status
|
||||||
.dcs
|
.dcs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@@ -397,6 +487,81 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_runtime_me_teardown_data(shared: &ApiShared) -> RuntimeMeQualityTeardownData {
|
||||||
|
let attempts = MeWriterTeardownReason::ALL
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.flat_map(|reason| {
|
||||||
|
MeWriterTeardownMode::ALL
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.map(move |mode| RuntimeMeQualityTeardownAttemptData {
|
||||||
|
reason: reason.as_str(),
|
||||||
|
mode: mode.as_str(),
|
||||||
|
total: shared.stats.get_me_writer_teardown_attempt_total(reason, mode),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let success = MeWriterTeardownMode::ALL
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.map(|mode| RuntimeMeQualityTeardownSuccessData {
|
||||||
|
mode: mode.as_str(),
|
||||||
|
total: shared.stats.get_me_writer_teardown_success_total(mode),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let cleanup_side_effect_failures = MeWriterCleanupSideEffectStep::ALL
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.map(|step| RuntimeMeQualityTeardownSideEffectData {
|
||||||
|
step: step.as_str(),
|
||||||
|
total: shared
|
||||||
|
.stats
|
||||||
|
.get_me_writer_cleanup_side_effect_failures_total(step),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let duration = MeWriterTeardownMode::ALL
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.map(|mode| {
|
||||||
|
let count = shared.stats.get_me_writer_teardown_duration_count(mode);
|
||||||
|
let mut buckets: Vec<RuntimeMeQualityTeardownDurationBucketData> = Stats::me_writer_teardown_duration_bucket_labels()
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(bucket_idx, label)| RuntimeMeQualityTeardownDurationBucketData {
|
||||||
|
le_seconds: label,
|
||||||
|
total: shared
|
||||||
|
.stats
|
||||||
|
.get_me_writer_teardown_duration_bucket_total(mode, bucket_idx),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
buckets.push(RuntimeMeQualityTeardownDurationBucketData {
|
||||||
|
le_seconds: "+Inf",
|
||||||
|
total: count,
|
||||||
|
});
|
||||||
|
RuntimeMeQualityTeardownDurationData {
|
||||||
|
mode: mode.as_str(),
|
||||||
|
count,
|
||||||
|
sum_seconds: shared.stats.get_me_writer_teardown_duration_sum_seconds(mode),
|
||||||
|
buckets,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
RuntimeMeQualityTeardownData {
|
||||||
|
attempts,
|
||||||
|
success,
|
||||||
|
timeout_total: shared.stats.get_me_writer_teardown_timeout_total(),
|
||||||
|
escalation_total: shared.stats.get_me_writer_teardown_escalation_total(),
|
||||||
|
noop_total: shared.stats.get_me_writer_teardown_noop_total(),
|
||||||
|
cleanup_side_effect_failures,
|
||||||
|
duration,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(super) async fn build_runtime_upstream_quality_data(
|
pub(super) async fn build_runtime_upstream_quality_data(
|
||||||
shared: &ApiShared,
|
shared: &ApiShared,
|
||||||
) -> RuntimeUpstreamQualityData {
|
) -> RuntimeUpstreamQualityData {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
use crate::config::ApiConfig;
|
use crate::config::ApiConfig;
|
||||||
use crate::stats::Stats;
|
use crate::stats::{MeWriterTeardownMode, Stats};
|
||||||
use crate::transport::upstream::IpPreference;
|
use crate::transport::upstream::IpPreference;
|
||||||
use crate::transport::UpstreamRouteKind;
|
use crate::transport::UpstreamRouteKind;
|
||||||
|
|
||||||
@@ -106,6 +106,29 @@ pub(super) fn build_zero_all_data(stats: &Stats, configured_users: usize) -> Zer
|
|||||||
refill_failed_total: stats.get_me_refill_failed_total(),
|
refill_failed_total: stats.get_me_refill_failed_total(),
|
||||||
writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(),
|
writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(),
|
||||||
writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(),
|
writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(),
|
||||||
|
teardown_attempt_total_normal: stats
|
||||||
|
.get_me_writer_teardown_attempt_total_by_mode(MeWriterTeardownMode::Normal),
|
||||||
|
teardown_attempt_total_hard_detach: stats
|
||||||
|
.get_me_writer_teardown_attempt_total_by_mode(MeWriterTeardownMode::HardDetach),
|
||||||
|
teardown_success_total_normal: stats
|
||||||
|
.get_me_writer_teardown_success_total(MeWriterTeardownMode::Normal),
|
||||||
|
teardown_success_total_hard_detach: stats
|
||||||
|
.get_me_writer_teardown_success_total(MeWriterTeardownMode::HardDetach),
|
||||||
|
teardown_timeout_total: stats.get_me_writer_teardown_timeout_total(),
|
||||||
|
teardown_escalation_total: stats.get_me_writer_teardown_escalation_total(),
|
||||||
|
teardown_noop_total: stats.get_me_writer_teardown_noop_total(),
|
||||||
|
teardown_cleanup_side_effect_failures_total: stats
|
||||||
|
.get_me_writer_cleanup_side_effect_failures_total_all(),
|
||||||
|
teardown_duration_count_total: stats
|
||||||
|
.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal)
|
||||||
|
.saturating_add(
|
||||||
|
stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::HardDetach),
|
||||||
|
),
|
||||||
|
teardown_duration_sum_seconds_total: stats
|
||||||
|
.get_me_writer_teardown_duration_sum_seconds(MeWriterTeardownMode::Normal)
|
||||||
|
+ stats.get_me_writer_teardown_duration_sum_seconds(
|
||||||
|
MeWriterTeardownMode::HardDetach,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
desync: ZeroDesyncData {
|
desync: ZeroDesyncData {
|
||||||
secure_padding_invalid_total: stats.get_secure_padding_invalid(),
|
secure_padding_invalid_total: stats.get_secure_padding_invalid(),
|
||||||
|
|||||||
@@ -65,6 +65,10 @@ pub(crate) fn default_tls_domain() -> String {
|
|||||||
"petrovich.ru".to_string()
|
"petrovich.ru".to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn default_tls_fetch_scope() -> String {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn default_mask_port() -> u16 {
|
pub(crate) fn default_mask_port() -> u16 {
|
||||||
443
|
443
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -623,6 +623,7 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
|
|||||||
}
|
}
|
||||||
if old.censorship.tls_domain != new.censorship.tls_domain
|
if old.censorship.tls_domain != new.censorship.tls_domain
|
||||||
|| old.censorship.tls_domains != new.censorship.tls_domains
|
|| old.censorship.tls_domains != new.censorship.tls_domains
|
||||||
|
|| old.censorship.tls_fetch_scope != new.censorship.tls_fetch_scope
|
||||||
|| old.censorship.mask != new.censorship.mask
|
|| old.censorship.mask != new.censorship.mask
|
||||||
|| old.censorship.mask_host != new.censorship.mask_host
|
|| old.censorship.mask_host != new.censorship.mask_host
|
||||||
|| old.censorship.mask_port != new.censorship.mask_port
|
|| old.censorship.mask_port != new.censorship.mask_port
|
||||||
|
|||||||
@@ -779,6 +779,9 @@ impl ProxyConfig {
|
|||||||
config.censorship.mask_host = Some(config.censorship.tls_domain.clone());
|
config.censorship.mask_host = Some(config.censorship.tls_domain.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normalize optional TLS fetch scope: whitespace-only values disable scoped routing.
|
||||||
|
config.censorship.tls_fetch_scope = config.censorship.tls_fetch_scope.trim().to_string();
|
||||||
|
|
||||||
// Merge primary + extra TLS domains, deduplicate (primary always first).
|
// Merge primary + extra TLS domains, deduplicate (primary always first).
|
||||||
if !config.censorship.tls_domains.is_empty() {
|
if !config.censorship.tls_domains.is_empty() {
|
||||||
let mut all = Vec::with_capacity(1 + config.censorship.tls_domains.len());
|
let mut all = Vec::with_capacity(1 + config.censorship.tls_domains.len());
|
||||||
@@ -2097,6 +2100,59 @@ mod tests {
|
|||||||
let _ = std::fs::remove_file(path);
|
let _ = std::fs::remove_file(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tls_fetch_scope_default_is_empty() {
|
||||||
|
let toml = r#"
|
||||||
|
[censorship]
|
||||||
|
tls_domain = "example.com"
|
||||||
|
|
||||||
|
[access.users]
|
||||||
|
user = "00000000000000000000000000000000"
|
||||||
|
"#;
|
||||||
|
let dir = std::env::temp_dir();
|
||||||
|
let path = dir.join("telemt_tls_fetch_scope_default_test.toml");
|
||||||
|
std::fs::write(&path, toml).unwrap();
|
||||||
|
let cfg = ProxyConfig::load(&path).unwrap();
|
||||||
|
assert!(cfg.censorship.tls_fetch_scope.is_empty());
|
||||||
|
let _ = std::fs::remove_file(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tls_fetch_scope_is_trimmed_during_load() {
|
||||||
|
let toml = r#"
|
||||||
|
[censorship]
|
||||||
|
tls_domain = "example.com"
|
||||||
|
tls_fetch_scope = " me "
|
||||||
|
|
||||||
|
[access.users]
|
||||||
|
user = "00000000000000000000000000000000"
|
||||||
|
"#;
|
||||||
|
let dir = std::env::temp_dir();
|
||||||
|
let path = dir.join("telemt_tls_fetch_scope_trim_test.toml");
|
||||||
|
std::fs::write(&path, toml).unwrap();
|
||||||
|
let cfg = ProxyConfig::load(&path).unwrap();
|
||||||
|
assert_eq!(cfg.censorship.tls_fetch_scope, "me");
|
||||||
|
let _ = std::fs::remove_file(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tls_fetch_scope_whitespace_becomes_empty() {
|
||||||
|
let toml = r#"
|
||||||
|
[censorship]
|
||||||
|
tls_domain = "example.com"
|
||||||
|
tls_fetch_scope = " "
|
||||||
|
|
||||||
|
[access.users]
|
||||||
|
user = "00000000000000000000000000000000"
|
||||||
|
"#;
|
||||||
|
let dir = std::env::temp_dir();
|
||||||
|
let path = dir.join("telemt_tls_fetch_scope_blank_test.toml");
|
||||||
|
std::fs::write(&path, toml).unwrap();
|
||||||
|
let cfg = ProxyConfig::load(&path).unwrap();
|
||||||
|
assert!(cfg.censorship.tls_fetch_scope.is_empty());
|
||||||
|
let _ = std::fs::remove_file(path);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn invalid_ad_tag_is_disabled_during_load() {
|
fn invalid_ad_tag_is_disabled_during_load() {
|
||||||
let toml = r#"
|
let toml = r#"
|
||||||
|
|||||||
@@ -1308,6 +1308,11 @@ pub struct AntiCensorshipConfig {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub tls_domains: Vec<String>,
|
pub tls_domains: Vec<String>,
|
||||||
|
|
||||||
|
/// Upstream scope used for TLS front metadata fetches.
|
||||||
|
/// Empty value keeps default upstream routing behavior.
|
||||||
|
#[serde(default = "default_tls_fetch_scope")]
|
||||||
|
pub tls_fetch_scope: String,
|
||||||
|
|
||||||
#[serde(default = "default_true")]
|
#[serde(default = "default_true")]
|
||||||
pub mask: bool,
|
pub mask: bool,
|
||||||
|
|
||||||
@@ -1365,6 +1370,7 @@ impl Default for AntiCensorshipConfig {
|
|||||||
Self {
|
Self {
|
||||||
tls_domain: default_tls_domain(),
|
tls_domain: default_tls_domain(),
|
||||||
tls_domains: Vec::new(),
|
tls_domains: Vec::new(),
|
||||||
|
tls_fetch_scope: default_tls_fetch_scope(),
|
||||||
mask: default_true(),
|
mask: default_true(),
|
||||||
mask_host: None,
|
mask_host: None,
|
||||||
mask_port: default_mask_port(),
|
mask_port: default_mask_port(),
|
||||||
|
|||||||
@@ -38,12 +38,15 @@ pub(crate) async fn bootstrap_tls_front(
|
|||||||
.clone()
|
.clone()
|
||||||
.unwrap_or_else(|| config.censorship.tls_domain.clone());
|
.unwrap_or_else(|| config.censorship.tls_domain.clone());
|
||||||
let mask_unix_sock = config.censorship.mask_unix_sock.clone();
|
let mask_unix_sock = config.censorship.mask_unix_sock.clone();
|
||||||
|
let tls_fetch_scope = (!config.censorship.tls_fetch_scope.is_empty())
|
||||||
|
.then(|| config.censorship.tls_fetch_scope.clone());
|
||||||
let fetch_timeout = Duration::from_secs(5);
|
let fetch_timeout = Duration::from_secs(5);
|
||||||
|
|
||||||
let cache_initial = cache.clone();
|
let cache_initial = cache.clone();
|
||||||
let domains_initial = tls_domains.to_vec();
|
let domains_initial = tls_domains.to_vec();
|
||||||
let host_initial = mask_host.clone();
|
let host_initial = mask_host.clone();
|
||||||
let unix_sock_initial = mask_unix_sock.clone();
|
let unix_sock_initial = mask_unix_sock.clone();
|
||||||
|
let scope_initial = tls_fetch_scope.clone();
|
||||||
let upstream_initial = upstream_manager.clone();
|
let upstream_initial = upstream_manager.clone();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
let mut join = tokio::task::JoinSet::new();
|
let mut join = tokio::task::JoinSet::new();
|
||||||
@@ -51,6 +54,7 @@ pub(crate) async fn bootstrap_tls_front(
|
|||||||
let cache_domain = cache_initial.clone();
|
let cache_domain = cache_initial.clone();
|
||||||
let host_domain = host_initial.clone();
|
let host_domain = host_initial.clone();
|
||||||
let unix_sock_domain = unix_sock_initial.clone();
|
let unix_sock_domain = unix_sock_initial.clone();
|
||||||
|
let scope_domain = scope_initial.clone();
|
||||||
let upstream_domain = upstream_initial.clone();
|
let upstream_domain = upstream_initial.clone();
|
||||||
join.spawn(async move {
|
join.spawn(async move {
|
||||||
match crate::tls_front::fetcher::fetch_real_tls(
|
match crate::tls_front::fetcher::fetch_real_tls(
|
||||||
@@ -59,6 +63,7 @@ pub(crate) async fn bootstrap_tls_front(
|
|||||||
&domain,
|
&domain,
|
||||||
fetch_timeout,
|
fetch_timeout,
|
||||||
Some(upstream_domain),
|
Some(upstream_domain),
|
||||||
|
scope_domain.as_deref(),
|
||||||
proxy_protocol,
|
proxy_protocol,
|
||||||
unix_sock_domain.as_deref(),
|
unix_sock_domain.as_deref(),
|
||||||
)
|
)
|
||||||
@@ -100,6 +105,7 @@ pub(crate) async fn bootstrap_tls_front(
|
|||||||
let domains_refresh = tls_domains.to_vec();
|
let domains_refresh = tls_domains.to_vec();
|
||||||
let host_refresh = mask_host.clone();
|
let host_refresh = mask_host.clone();
|
||||||
let unix_sock_refresh = mask_unix_sock.clone();
|
let unix_sock_refresh = mask_unix_sock.clone();
|
||||||
|
let scope_refresh = tls_fetch_scope.clone();
|
||||||
let upstream_refresh = upstream_manager.clone();
|
let upstream_refresh = upstream_manager.clone();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
loop {
|
loop {
|
||||||
@@ -112,6 +118,7 @@ pub(crate) async fn bootstrap_tls_front(
|
|||||||
let cache_domain = cache_refresh.clone();
|
let cache_domain = cache_refresh.clone();
|
||||||
let host_domain = host_refresh.clone();
|
let host_domain = host_refresh.clone();
|
||||||
let unix_sock_domain = unix_sock_refresh.clone();
|
let unix_sock_domain = unix_sock_refresh.clone();
|
||||||
|
let scope_domain = scope_refresh.clone();
|
||||||
let upstream_domain = upstream_refresh.clone();
|
let upstream_domain = upstream_refresh.clone();
|
||||||
join.spawn(async move {
|
join.spawn(async move {
|
||||||
match crate::tls_front::fetcher::fetch_real_tls(
|
match crate::tls_front::fetcher::fetch_real_tls(
|
||||||
@@ -120,6 +127,7 @@ pub(crate) async fn bootstrap_tls_front(
|
|||||||
&domain,
|
&domain,
|
||||||
fetch_timeout,
|
fetch_timeout,
|
||||||
Some(upstream_domain),
|
Some(upstream_domain),
|
||||||
|
scope_domain.as_deref(),
|
||||||
proxy_protocol,
|
proxy_protocol,
|
||||||
unix_sock_domain.as_deref(),
|
unix_sock_domain.as_deref(),
|
||||||
)
|
)
|
||||||
|
|||||||
178
src/metrics.rs
178
src/metrics.rs
@@ -16,7 +16,9 @@ use tracing::{info, warn, debug};
|
|||||||
use crate::config::ProxyConfig;
|
use crate::config::ProxyConfig;
|
||||||
use crate::ip_tracker::UserIpTracker;
|
use crate::ip_tracker::UserIpTracker;
|
||||||
use crate::stats::beobachten::BeobachtenStore;
|
use crate::stats::beobachten::BeobachtenStore;
|
||||||
use crate::stats::Stats;
|
use crate::stats::{
|
||||||
|
MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason, Stats,
|
||||||
|
};
|
||||||
use crate::transport::{ListenOptions, create_listener};
|
use crate::transport::{ListenOptions, create_listener};
|
||||||
|
|
||||||
pub async fn serve(
|
pub async fn serve(
|
||||||
@@ -1770,6 +1772,169 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# HELP telemt_me_writer_teardown_attempt_total ME writer teardown attempts by reason and mode"
|
||||||
|
);
|
||||||
|
let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_attempt_total counter");
|
||||||
|
for reason in MeWriterTeardownReason::ALL {
|
||||||
|
for mode in MeWriterTeardownMode::ALL {
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_attempt_total{{reason=\"{}\",mode=\"{}\"}} {}",
|
||||||
|
reason.as_str(),
|
||||||
|
mode.as_str(),
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_attempt_total(reason, mode)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# HELP telemt_me_writer_teardown_success_total ME writer teardown successes by mode"
|
||||||
|
);
|
||||||
|
let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_success_total counter");
|
||||||
|
for mode in MeWriterTeardownMode::ALL {
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_success_total{{mode=\"{}\"}} {}",
|
||||||
|
mode.as_str(),
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_success_total(mode)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# HELP telemt_me_writer_teardown_timeout_total Teardown operations that timed out"
|
||||||
|
);
|
||||||
|
let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_timeout_total counter");
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_timeout_total {}",
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_timeout_total()
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# HELP telemt_me_writer_teardown_escalation_total Watchdog teardown escalations to hard detach"
|
||||||
|
);
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# TYPE telemt_me_writer_teardown_escalation_total counter"
|
||||||
|
);
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_escalation_total {}",
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_escalation_total()
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# HELP telemt_me_writer_teardown_noop_total Teardown operations that became no-op"
|
||||||
|
);
|
||||||
|
let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_noop_total counter");
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_noop_total {}",
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_noop_total()
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# HELP telemt_me_writer_teardown_duration_seconds ME writer teardown latency histogram by mode"
|
||||||
|
);
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# TYPE telemt_me_writer_teardown_duration_seconds histogram"
|
||||||
|
);
|
||||||
|
let bucket_labels = Stats::me_writer_teardown_duration_bucket_labels();
|
||||||
|
for mode in MeWriterTeardownMode::ALL {
|
||||||
|
for (bucket_idx, label) in bucket_labels.iter().enumerate() {
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_duration_seconds_bucket{{mode=\"{}\",le=\"{}\"}} {}",
|
||||||
|
mode.as_str(),
|
||||||
|
label,
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_duration_bucket_total(mode, bucket_idx)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_duration_seconds_bucket{{mode=\"{}\",le=\"+Inf\"}} {}",
|
||||||
|
mode.as_str(),
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_duration_count(mode)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_duration_seconds_sum{{mode=\"{}\"}} {:.6}",
|
||||||
|
mode.as_str(),
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_duration_sum_seconds(mode)
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_teardown_duration_seconds_count{{mode=\"{}\"}} {}",
|
||||||
|
mode.as_str(),
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_teardown_duration_count(mode)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# HELP telemt_me_writer_cleanup_side_effect_failures_total Failed cleanup side effects by step"
|
||||||
|
);
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"# TYPE telemt_me_writer_cleanup_side_effect_failures_total counter"
|
||||||
|
);
|
||||||
|
for step in MeWriterCleanupSideEffectStep::ALL {
|
||||||
|
let _ = writeln!(
|
||||||
|
out,
|
||||||
|
"telemt_me_writer_cleanup_side_effect_failures_total{{step=\"{}\"}} {}",
|
||||||
|
step.as_str(),
|
||||||
|
if me_allows_normal {
|
||||||
|
stats.get_me_writer_cleanup_side_effect_failures_total(step)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
let _ = writeln!(out, "# HELP telemt_me_refill_triggered_total Immediate ME refill runs started");
|
let _ = writeln!(out, "# HELP telemt_me_refill_triggered_total Immediate ME refill runs started");
|
||||||
let _ = writeln!(out, "# TYPE telemt_me_refill_triggered_total counter");
|
let _ = writeln!(out, "# TYPE telemt_me_refill_triggered_total counter");
|
||||||
let _ = writeln!(
|
let _ = writeln!(
|
||||||
@@ -2175,6 +2340,17 @@ mod tests {
|
|||||||
assert!(output.contains("# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter"));
|
assert!(output.contains("# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter"));
|
||||||
assert!(output.contains("# TYPE telemt_me_idle_close_by_peer_total counter"));
|
assert!(output.contains("# TYPE telemt_me_idle_close_by_peer_total counter"));
|
||||||
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
|
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
|
||||||
|
assert!(output.contains("# TYPE telemt_me_writer_teardown_attempt_total counter"));
|
||||||
|
assert!(output.contains("# TYPE telemt_me_writer_teardown_success_total counter"));
|
||||||
|
assert!(output.contains("# TYPE telemt_me_writer_teardown_timeout_total counter"));
|
||||||
|
assert!(output.contains("# TYPE telemt_me_writer_teardown_escalation_total counter"));
|
||||||
|
assert!(output.contains("# TYPE telemt_me_writer_teardown_noop_total counter"));
|
||||||
|
assert!(output.contains(
|
||||||
|
"# TYPE telemt_me_writer_teardown_duration_seconds histogram"
|
||||||
|
));
|
||||||
|
assert!(output.contains(
|
||||||
|
"# TYPE telemt_me_writer_cleanup_side_effect_failures_total counter"
|
||||||
|
));
|
||||||
assert!(output.contains("# TYPE telemt_me_writer_close_signal_drop_total counter"));
|
assert!(output.contains("# TYPE telemt_me_writer_close_signal_drop_total counter"));
|
||||||
assert!(output.contains(
|
assert!(output.contains(
|
||||||
"# TYPE telemt_me_writer_close_signal_channel_full_total counter"
|
"# TYPE telemt_me_writer_close_signal_channel_full_total counter"
|
||||||
|
|||||||
357
src/stats/mod.rs
357
src/stats/mod.rs
@@ -19,6 +19,137 @@ use tracing::debug;
|
|||||||
use crate::config::{MeTelemetryLevel, MeWriterPickMode};
|
use crate::config::{MeTelemetryLevel, MeWriterPickMode};
|
||||||
use self::telemetry::TelemetryPolicy;
|
use self::telemetry::TelemetryPolicy;
|
||||||
|
|
||||||
|
const ME_WRITER_TEARDOWN_MODE_COUNT: usize = 2;
|
||||||
|
const ME_WRITER_TEARDOWN_REASON_COUNT: usize = 11;
|
||||||
|
const ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT: usize = 2;
|
||||||
|
const ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT: usize = 12;
|
||||||
|
const ME_WRITER_TEARDOWN_DURATION_BUCKET_BOUNDS_MICROS: [u64; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] = [
|
||||||
|
1_000,
|
||||||
|
5_000,
|
||||||
|
10_000,
|
||||||
|
25_000,
|
||||||
|
50_000,
|
||||||
|
100_000,
|
||||||
|
250_000,
|
||||||
|
500_000,
|
||||||
|
1_000_000,
|
||||||
|
2_500_000,
|
||||||
|
5_000_000,
|
||||||
|
10_000_000,
|
||||||
|
];
|
||||||
|
const ME_WRITER_TEARDOWN_DURATION_BUCKET_LABELS: [&str; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] = [
|
||||||
|
"0.001",
|
||||||
|
"0.005",
|
||||||
|
"0.01",
|
||||||
|
"0.025",
|
||||||
|
"0.05",
|
||||||
|
"0.1",
|
||||||
|
"0.25",
|
||||||
|
"0.5",
|
||||||
|
"1",
|
||||||
|
"2.5",
|
||||||
|
"5",
|
||||||
|
"10",
|
||||||
|
];
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum MeWriterTeardownMode {
|
||||||
|
Normal = 0,
|
||||||
|
HardDetach = 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MeWriterTeardownMode {
|
||||||
|
pub const ALL: [Self; ME_WRITER_TEARDOWN_MODE_COUNT] =
|
||||||
|
[Self::Normal, Self::HardDetach];
|
||||||
|
|
||||||
|
pub const fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Normal => "normal",
|
||||||
|
Self::HardDetach => "hard_detach",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn idx(self) -> usize {
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum MeWriterTeardownReason {
|
||||||
|
ReaderExit = 0,
|
||||||
|
WriterTaskExit = 1,
|
||||||
|
PingSendFail = 2,
|
||||||
|
SignalSendFail = 3,
|
||||||
|
RouteChannelClosed = 4,
|
||||||
|
CloseRpcChannelClosed = 5,
|
||||||
|
PruneClosedWriter = 6,
|
||||||
|
ReapTimeoutExpired = 7,
|
||||||
|
ReapThresholdForce = 8,
|
||||||
|
ReapEmpty = 9,
|
||||||
|
WatchdogStuckDraining = 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MeWriterTeardownReason {
|
||||||
|
pub const ALL: [Self; ME_WRITER_TEARDOWN_REASON_COUNT] = [
|
||||||
|
Self::ReaderExit,
|
||||||
|
Self::WriterTaskExit,
|
||||||
|
Self::PingSendFail,
|
||||||
|
Self::SignalSendFail,
|
||||||
|
Self::RouteChannelClosed,
|
||||||
|
Self::CloseRpcChannelClosed,
|
||||||
|
Self::PruneClosedWriter,
|
||||||
|
Self::ReapTimeoutExpired,
|
||||||
|
Self::ReapThresholdForce,
|
||||||
|
Self::ReapEmpty,
|
||||||
|
Self::WatchdogStuckDraining,
|
||||||
|
];
|
||||||
|
|
||||||
|
pub const fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::ReaderExit => "reader_exit",
|
||||||
|
Self::WriterTaskExit => "writer_task_exit",
|
||||||
|
Self::PingSendFail => "ping_send_fail",
|
||||||
|
Self::SignalSendFail => "signal_send_fail",
|
||||||
|
Self::RouteChannelClosed => "route_channel_closed",
|
||||||
|
Self::CloseRpcChannelClosed => "close_rpc_channel_closed",
|
||||||
|
Self::PruneClosedWriter => "prune_closed_writer",
|
||||||
|
Self::ReapTimeoutExpired => "reap_timeout_expired",
|
||||||
|
Self::ReapThresholdForce => "reap_threshold_force",
|
||||||
|
Self::ReapEmpty => "reap_empty",
|
||||||
|
Self::WatchdogStuckDraining => "watchdog_stuck_draining",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn idx(self) -> usize {
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum MeWriterCleanupSideEffectStep {
|
||||||
|
CloseSignalChannelFull = 0,
|
||||||
|
CloseSignalChannelClosed = 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MeWriterCleanupSideEffectStep {
|
||||||
|
pub const ALL: [Self; ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT] =
|
||||||
|
[Self::CloseSignalChannelFull, Self::CloseSignalChannelClosed];
|
||||||
|
|
||||||
|
pub const fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::CloseSignalChannelFull => "close_signal_channel_full",
|
||||||
|
Self::CloseSignalChannelClosed => "close_signal_channel_closed",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn idx(self) -> usize {
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ============= Stats =============
|
// ============= Stats =============
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
@@ -128,6 +259,18 @@ pub struct Stats {
|
|||||||
me_draining_writers_reap_progress_total: AtomicU64,
|
me_draining_writers_reap_progress_total: AtomicU64,
|
||||||
me_writer_removed_total: AtomicU64,
|
me_writer_removed_total: AtomicU64,
|
||||||
me_writer_removed_unexpected_total: AtomicU64,
|
me_writer_removed_unexpected_total: AtomicU64,
|
||||||
|
me_writer_teardown_attempt_total:
|
||||||
|
[[AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT]; ME_WRITER_TEARDOWN_REASON_COUNT],
|
||||||
|
me_writer_teardown_success_total: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT],
|
||||||
|
me_writer_teardown_timeout_total: AtomicU64,
|
||||||
|
me_writer_teardown_escalation_total: AtomicU64,
|
||||||
|
me_writer_teardown_noop_total: AtomicU64,
|
||||||
|
me_writer_cleanup_side_effect_failures_total:
|
||||||
|
[AtomicU64; ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT],
|
||||||
|
me_writer_teardown_duration_bucket_hits:
|
||||||
|
[[AtomicU64; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT + 1]; ME_WRITER_TEARDOWN_MODE_COUNT],
|
||||||
|
me_writer_teardown_duration_sum_micros: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT],
|
||||||
|
me_writer_teardown_duration_count: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT],
|
||||||
me_refill_triggered_total: AtomicU64,
|
me_refill_triggered_total: AtomicU64,
|
||||||
me_refill_skipped_inflight_total: AtomicU64,
|
me_refill_skipped_inflight_total: AtomicU64,
|
||||||
me_refill_failed_total: AtomicU64,
|
me_refill_failed_total: AtomicU64,
|
||||||
@@ -765,6 +908,74 @@ impl Stats {
|
|||||||
self.me_writer_removed_unexpected_total.fetch_add(1, Ordering::Relaxed);
|
self.me_writer_removed_unexpected_total.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pub fn increment_me_writer_teardown_attempt_total(
|
||||||
|
&self,
|
||||||
|
reason: MeWriterTeardownReason,
|
||||||
|
mode: MeWriterTeardownMode,
|
||||||
|
) {
|
||||||
|
if self.telemetry_me_allows_normal() {
|
||||||
|
self.me_writer_teardown_attempt_total[reason.idx()][mode.idx()]
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn increment_me_writer_teardown_success_total(&self, mode: MeWriterTeardownMode) {
|
||||||
|
if self.telemetry_me_allows_normal() {
|
||||||
|
self.me_writer_teardown_success_total[mode.idx()].fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn increment_me_writer_teardown_timeout_total(&self) {
|
||||||
|
if self.telemetry_me_allows_normal() {
|
||||||
|
self.me_writer_teardown_timeout_total
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn increment_me_writer_teardown_escalation_total(&self) {
|
||||||
|
if self.telemetry_me_allows_normal() {
|
||||||
|
self.me_writer_teardown_escalation_total
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn increment_me_writer_teardown_noop_total(&self) {
|
||||||
|
if self.telemetry_me_allows_normal() {
|
||||||
|
self.me_writer_teardown_noop_total
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn increment_me_writer_cleanup_side_effect_failures_total(
|
||||||
|
&self,
|
||||||
|
step: MeWriterCleanupSideEffectStep,
|
||||||
|
) {
|
||||||
|
if self.telemetry_me_allows_normal() {
|
||||||
|
self.me_writer_cleanup_side_effect_failures_total[step.idx()]
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn observe_me_writer_teardown_duration(
|
||||||
|
&self,
|
||||||
|
mode: MeWriterTeardownMode,
|
||||||
|
duration: Duration,
|
||||||
|
) {
|
||||||
|
if !self.telemetry_me_allows_normal() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let duration_micros = duration.as_micros().min(u64::MAX as u128) as u64;
|
||||||
|
let mut bucket_idx = ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT;
|
||||||
|
for (idx, upper_bound_micros) in ME_WRITER_TEARDOWN_DURATION_BUCKET_BOUNDS_MICROS
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
if duration_micros <= upper_bound_micros {
|
||||||
|
bucket_idx = idx;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.me_writer_teardown_duration_bucket_hits[mode.idx()][bucket_idx]
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
self.me_writer_teardown_duration_sum_micros[mode.idx()]
|
||||||
|
.fetch_add(duration_micros, Ordering::Relaxed);
|
||||||
|
self.me_writer_teardown_duration_count[mode.idx()].fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
pub fn increment_me_refill_triggered_total(&self) {
|
pub fn increment_me_refill_triggered_total(&self) {
|
||||||
if self.telemetry_me_allows_debug() {
|
if self.telemetry_me_allows_debug() {
|
||||||
self.me_refill_triggered_total.fetch_add(1, Ordering::Relaxed);
|
self.me_refill_triggered_total.fetch_add(1, Ordering::Relaxed);
|
||||||
@@ -1297,6 +1508,79 @@ impl Stats {
|
|||||||
pub fn get_me_writer_removed_unexpected_total(&self) -> u64 {
|
pub fn get_me_writer_removed_unexpected_total(&self) -> u64 {
|
||||||
self.me_writer_removed_unexpected_total.load(Ordering::Relaxed)
|
self.me_writer_removed_unexpected_total.load(Ordering::Relaxed)
|
||||||
}
|
}
|
||||||
|
pub fn get_me_writer_teardown_attempt_total(
|
||||||
|
&self,
|
||||||
|
reason: MeWriterTeardownReason,
|
||||||
|
mode: MeWriterTeardownMode,
|
||||||
|
) -> u64 {
|
||||||
|
self.me_writer_teardown_attempt_total[reason.idx()][mode.idx()]
|
||||||
|
.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_attempt_total_by_mode(&self, mode: MeWriterTeardownMode) -> u64 {
|
||||||
|
MeWriterTeardownReason::ALL
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.map(|reason| self.get_me_writer_teardown_attempt_total(reason, mode))
|
||||||
|
.sum()
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_success_total(&self, mode: MeWriterTeardownMode) -> u64 {
|
||||||
|
self.me_writer_teardown_success_total[mode.idx()].load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_timeout_total(&self) -> u64 {
|
||||||
|
self.me_writer_teardown_timeout_total.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_escalation_total(&self) -> u64 {
|
||||||
|
self.me_writer_teardown_escalation_total
|
||||||
|
.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_noop_total(&self) -> u64 {
|
||||||
|
self.me_writer_teardown_noop_total.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_cleanup_side_effect_failures_total(
|
||||||
|
&self,
|
||||||
|
step: MeWriterCleanupSideEffectStep,
|
||||||
|
) -> u64 {
|
||||||
|
self.me_writer_cleanup_side_effect_failures_total[step.idx()]
|
||||||
|
.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_cleanup_side_effect_failures_total_all(&self) -> u64 {
|
||||||
|
MeWriterCleanupSideEffectStep::ALL
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.map(|step| self.get_me_writer_cleanup_side_effect_failures_total(step))
|
||||||
|
.sum()
|
||||||
|
}
|
||||||
|
pub fn me_writer_teardown_duration_bucket_labels(
|
||||||
|
) -> &'static [&'static str; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] {
|
||||||
|
&ME_WRITER_TEARDOWN_DURATION_BUCKET_LABELS
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_duration_bucket_hits(
|
||||||
|
&self,
|
||||||
|
mode: MeWriterTeardownMode,
|
||||||
|
bucket_idx: usize,
|
||||||
|
) -> u64 {
|
||||||
|
self.me_writer_teardown_duration_bucket_hits[mode.idx()][bucket_idx]
|
||||||
|
.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_duration_bucket_total(
|
||||||
|
&self,
|
||||||
|
mode: MeWriterTeardownMode,
|
||||||
|
bucket_idx: usize,
|
||||||
|
) -> u64 {
|
||||||
|
let capped_idx = bucket_idx.min(ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT);
|
||||||
|
let mut total = 0u64;
|
||||||
|
for idx in 0..=capped_idx {
|
||||||
|
total = total.saturating_add(self.get_me_writer_teardown_duration_bucket_hits(mode, idx));
|
||||||
|
}
|
||||||
|
total
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_duration_count(&self, mode: MeWriterTeardownMode) -> u64 {
|
||||||
|
self.me_writer_teardown_duration_count[mode.idx()].load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
pub fn get_me_writer_teardown_duration_sum_seconds(&self, mode: MeWriterTeardownMode) -> f64 {
|
||||||
|
self.me_writer_teardown_duration_sum_micros[mode.idx()].load(Ordering::Relaxed) as f64
|
||||||
|
/ 1_000_000.0
|
||||||
|
}
|
||||||
pub fn get_me_refill_triggered_total(&self) -> u64 {
|
pub fn get_me_refill_triggered_total(&self) -> u64 {
|
||||||
self.me_refill_triggered_total.load(Ordering::Relaxed)
|
self.me_refill_triggered_total.load(Ordering::Relaxed)
|
||||||
}
|
}
|
||||||
@@ -1800,6 +2084,79 @@ mod tests {
|
|||||||
assert_eq!(stats.get_me_keepalive_sent(), 0);
|
assert_eq!(stats.get_me_keepalive_sent(), 0);
|
||||||
assert_eq!(stats.get_me_route_drop_queue_full(), 0);
|
assert_eq!(stats.get_me_route_drop_queue_full(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_teardown_counters_and_duration() {
|
||||||
|
let stats = Stats::new();
|
||||||
|
stats.increment_me_writer_teardown_attempt_total(
|
||||||
|
MeWriterTeardownReason::ReaderExit,
|
||||||
|
MeWriterTeardownMode::Normal,
|
||||||
|
);
|
||||||
|
stats.increment_me_writer_teardown_success_total(MeWriterTeardownMode::Normal);
|
||||||
|
stats.observe_me_writer_teardown_duration(
|
||||||
|
MeWriterTeardownMode::Normal,
|
||||||
|
Duration::from_millis(3),
|
||||||
|
);
|
||||||
|
stats.increment_me_writer_cleanup_side_effect_failures_total(
|
||||||
|
MeWriterCleanupSideEffectStep::CloseSignalChannelFull,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
stats.get_me_writer_teardown_attempt_total(
|
||||||
|
MeWriterTeardownReason::ReaderExit,
|
||||||
|
MeWriterTeardownMode::Normal
|
||||||
|
),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
stats.get_me_writer_teardown_success_total(MeWriterTeardownMode::Normal),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
stats.get_me_writer_teardown_duration_sum_seconds(MeWriterTeardownMode::Normal) > 0.0
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
stats.get_me_writer_cleanup_side_effect_failures_total(
|
||||||
|
MeWriterCleanupSideEffectStep::CloseSignalChannelFull
|
||||||
|
),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_teardown_counters_respect_me_silent() {
|
||||||
|
let stats = Stats::new();
|
||||||
|
stats.apply_telemetry_policy(TelemetryPolicy {
|
||||||
|
core_enabled: true,
|
||||||
|
user_enabled: true,
|
||||||
|
me_level: MeTelemetryLevel::Silent,
|
||||||
|
});
|
||||||
|
stats.increment_me_writer_teardown_attempt_total(
|
||||||
|
MeWriterTeardownReason::ReaderExit,
|
||||||
|
MeWriterTeardownMode::Normal,
|
||||||
|
);
|
||||||
|
stats.increment_me_writer_teardown_timeout_total();
|
||||||
|
stats.observe_me_writer_teardown_duration(
|
||||||
|
MeWriterTeardownMode::Normal,
|
||||||
|
Duration::from_millis(1),
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
stats.get_me_writer_teardown_attempt_total(
|
||||||
|
MeWriterTeardownReason::ReaderExit,
|
||||||
|
MeWriterTeardownMode::Normal
|
||||||
|
),
|
||||||
|
0
|
||||||
|
);
|
||||||
|
assert_eq!(stats.get_me_writer_teardown_timeout_total(), 0);
|
||||||
|
assert_eq!(
|
||||||
|
stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal),
|
||||||
|
0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_replay_checker_basic() {
|
fn test_replay_checker_basic() {
|
||||||
|
|||||||
@@ -394,15 +394,17 @@ async fn connect_tcp_with_upstream(
|
|||||||
port: u16,
|
port: u16,
|
||||||
connect_timeout: Duration,
|
connect_timeout: Duration,
|
||||||
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
|
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
|
||||||
|
scope: Option<&str>,
|
||||||
) -> Result<TcpStream> {
|
) -> Result<TcpStream> {
|
||||||
if let Some(manager) = upstream {
|
if let Some(manager) = upstream {
|
||||||
if let Some(addr) = resolve_socket_addr(host, port) {
|
if let Some(addr) = resolve_socket_addr(host, port) {
|
||||||
match manager.connect(addr, None, None).await {
|
match manager.connect(addr, None, scope).await {
|
||||||
Ok(stream) => return Ok(stream),
|
Ok(stream) => return Ok(stream),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(
|
warn!(
|
||||||
host = %host,
|
host = %host,
|
||||||
port = port,
|
port = port,
|
||||||
|
scope = ?scope,
|
||||||
error = %e,
|
error = %e,
|
||||||
"Upstream connect failed, using direct connect"
|
"Upstream connect failed, using direct connect"
|
||||||
);
|
);
|
||||||
@@ -410,12 +412,13 @@ async fn connect_tcp_with_upstream(
|
|||||||
}
|
}
|
||||||
} else if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await {
|
} else if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await {
|
||||||
if let Some(addr) = addrs.find(|a| a.is_ipv4()) {
|
if let Some(addr) = addrs.find(|a| a.is_ipv4()) {
|
||||||
match manager.connect(addr, None, None).await {
|
match manager.connect(addr, None, scope).await {
|
||||||
Ok(stream) => return Ok(stream),
|
Ok(stream) => return Ok(stream),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(
|
warn!(
|
||||||
host = %host,
|
host = %host,
|
||||||
port = port,
|
port = port,
|
||||||
|
scope = ?scope,
|
||||||
error = %e,
|
error = %e,
|
||||||
"Upstream connect failed, using direct connect"
|
"Upstream connect failed, using direct connect"
|
||||||
);
|
);
|
||||||
@@ -537,6 +540,7 @@ async fn fetch_via_raw_tls(
|
|||||||
sni: &str,
|
sni: &str,
|
||||||
connect_timeout: Duration,
|
connect_timeout: Duration,
|
||||||
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
|
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
|
||||||
|
scope: Option<&str>,
|
||||||
proxy_protocol: u8,
|
proxy_protocol: u8,
|
||||||
unix_sock: Option<&str>,
|
unix_sock: Option<&str>,
|
||||||
) -> Result<TlsFetchResult> {
|
) -> Result<TlsFetchResult> {
|
||||||
@@ -572,7 +576,7 @@ async fn fetch_via_raw_tls(
|
|||||||
#[cfg(not(unix))]
|
#[cfg(not(unix))]
|
||||||
let _ = unix_sock;
|
let _ = unix_sock;
|
||||||
|
|
||||||
let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream).await?;
|
let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope).await?;
|
||||||
fetch_via_raw_tls_stream(stream, sni, connect_timeout, proxy_protocol).await
|
fetch_via_raw_tls_stream(stream, sni, connect_timeout, proxy_protocol).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -675,6 +679,7 @@ async fn fetch_via_rustls(
|
|||||||
sni: &str,
|
sni: &str,
|
||||||
connect_timeout: Duration,
|
connect_timeout: Duration,
|
||||||
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
|
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
|
||||||
|
scope: Option<&str>,
|
||||||
proxy_protocol: u8,
|
proxy_protocol: u8,
|
||||||
unix_sock: Option<&str>,
|
unix_sock: Option<&str>,
|
||||||
) -> Result<TlsFetchResult> {
|
) -> Result<TlsFetchResult> {
|
||||||
@@ -710,7 +715,7 @@ async fn fetch_via_rustls(
|
|||||||
#[cfg(not(unix))]
|
#[cfg(not(unix))]
|
||||||
let _ = unix_sock;
|
let _ = unix_sock;
|
||||||
|
|
||||||
let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream).await?;
|
let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope).await?;
|
||||||
fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await
|
fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -726,6 +731,7 @@ pub async fn fetch_real_tls(
|
|||||||
sni: &str,
|
sni: &str,
|
||||||
connect_timeout: Duration,
|
connect_timeout: Duration,
|
||||||
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
|
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
|
||||||
|
scope: Option<&str>,
|
||||||
proxy_protocol: u8,
|
proxy_protocol: u8,
|
||||||
unix_sock: Option<&str>,
|
unix_sock: Option<&str>,
|
||||||
) -> Result<TlsFetchResult> {
|
) -> Result<TlsFetchResult> {
|
||||||
@@ -735,6 +741,7 @@ pub async fn fetch_real_tls(
|
|||||||
sni,
|
sni,
|
||||||
connect_timeout,
|
connect_timeout,
|
||||||
upstream.clone(),
|
upstream.clone(),
|
||||||
|
scope,
|
||||||
proxy_protocol,
|
proxy_protocol,
|
||||||
unix_sock,
|
unix_sock,
|
||||||
)
|
)
|
||||||
@@ -753,6 +760,7 @@ pub async fn fetch_real_tls(
|
|||||||
sni,
|
sni,
|
||||||
connect_timeout,
|
connect_timeout,
|
||||||
upstream,
|
upstream,
|
||||||
|
scope,
|
||||||
proxy_protocol,
|
proxy_protocol,
|
||||||
unix_sock,
|
unix_sock,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -10,9 +10,10 @@ use tracing::{debug, info, warn};
|
|||||||
use crate::config::MeFloorMode;
|
use crate::config::MeFloorMode;
|
||||||
use crate::crypto::SecureRandom;
|
use crate::crypto::SecureRandom;
|
||||||
use crate::network::IpFamily;
|
use crate::network::IpFamily;
|
||||||
|
use crate::stats::MeWriterTeardownReason;
|
||||||
|
|
||||||
use super::MePool;
|
use super::MePool;
|
||||||
use super::pool::MeWriter;
|
use super::pool::{MeFamilyRuntimeState, MeWriter};
|
||||||
|
|
||||||
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
|
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
@@ -33,6 +34,33 @@ const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MIN: usize = 8;
|
|||||||
const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MAX: usize = 256;
|
const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MAX: usize = 256;
|
||||||
const HEALTH_DRAIN_REAP_OPPORTUNISTIC_INTERVAL_SECS: u64 = 1;
|
const HEALTH_DRAIN_REAP_OPPORTUNISTIC_INTERVAL_SECS: u64 = 1;
|
||||||
const HEALTH_DRAIN_TIMEOUT_ENFORCER_INTERVAL_SECS: u64 = 1;
|
const HEALTH_DRAIN_TIMEOUT_ENFORCER_INTERVAL_SECS: u64 = 1;
|
||||||
|
const FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD: u32 = 6;
|
||||||
|
const FAMILY_SUPPRESS_WINDOW_SECS: u64 = 120;
|
||||||
|
const FAMILY_RECOVER_PROBE_INTERVAL_SECS: u64 = 5;
|
||||||
|
const FAMILY_RECOVER_SUCCESS_STREAK_REQUIRED: u32 = 3;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct FamilyCircuitState {
|
||||||
|
state: MeFamilyRuntimeState,
|
||||||
|
state_since_at: Instant,
|
||||||
|
suppressed_until: Option<Instant>,
|
||||||
|
next_probe_at: Instant,
|
||||||
|
fail_streak: u32,
|
||||||
|
recover_success_streak: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FamilyCircuitState {
|
||||||
|
fn new(now: Instant) -> Self {
|
||||||
|
Self {
|
||||||
|
state: MeFamilyRuntimeState::Healthy,
|
||||||
|
state_since_at: now,
|
||||||
|
suppressed_until: None,
|
||||||
|
next_probe_at: now,
|
||||||
|
fail_streak: 0,
|
||||||
|
recover_success_streak: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct DcFloorPlanEntry {
|
struct DcFloorPlanEntry {
|
||||||
@@ -72,6 +100,25 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
|||||||
let mut floor_warn_next_allowed: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
let mut floor_warn_next_allowed: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||||
let mut drain_warn_next_allowed: HashMap<u64, Instant> = HashMap::new();
|
let mut drain_warn_next_allowed: HashMap<u64, Instant> = HashMap::new();
|
||||||
let mut drain_soft_evict_next_allowed: HashMap<u64, Instant> = HashMap::new();
|
let mut drain_soft_evict_next_allowed: HashMap<u64, Instant> = HashMap::new();
|
||||||
|
let mut family_v4_circuit = FamilyCircuitState::new(Instant::now());
|
||||||
|
let mut family_v6_circuit = FamilyCircuitState::new(Instant::now());
|
||||||
|
let init_epoch_secs = MePool::now_epoch_secs();
|
||||||
|
pool.set_family_runtime_state(
|
||||||
|
IpFamily::V4,
|
||||||
|
family_v4_circuit.state,
|
||||||
|
init_epoch_secs,
|
||||||
|
0,
|
||||||
|
family_v4_circuit.fail_streak,
|
||||||
|
family_v4_circuit.recover_success_streak,
|
||||||
|
);
|
||||||
|
pool.set_family_runtime_state(
|
||||||
|
IpFamily::V6,
|
||||||
|
family_v6_circuit.state,
|
||||||
|
init_epoch_secs,
|
||||||
|
0,
|
||||||
|
family_v6_circuit.fail_streak,
|
||||||
|
family_v6_circuit.recover_success_streak,
|
||||||
|
);
|
||||||
let mut degraded_interval = true;
|
let mut degraded_interval = true;
|
||||||
loop {
|
loop {
|
||||||
let interval = if degraded_interval {
|
let interval = if degraded_interval {
|
||||||
@@ -87,7 +134,9 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
|||||||
&mut drain_soft_evict_next_allowed,
|
&mut drain_soft_evict_next_allowed,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
let v4_degraded = check_family(
|
let now = Instant::now();
|
||||||
|
let now_epoch_secs = MePool::now_epoch_secs();
|
||||||
|
let v4_degraded_raw = check_family(
|
||||||
IpFamily::V4,
|
IpFamily::V4,
|
||||||
&pool,
|
&pool,
|
||||||
&rng,
|
&rng,
|
||||||
@@ -106,25 +155,53 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
|||||||
&mut drain_soft_evict_next_allowed,
|
&mut drain_soft_evict_next_allowed,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
let v6_degraded = check_family(
|
let v4_degraded = apply_family_circuit_result(
|
||||||
IpFamily::V6,
|
|
||||||
&pool,
|
&pool,
|
||||||
&rng,
|
IpFamily::V4,
|
||||||
&mut backoff,
|
&mut family_v4_circuit,
|
||||||
&mut next_attempt,
|
Some(v4_degraded_raw),
|
||||||
&mut inflight,
|
false,
|
||||||
&mut outage_backoff,
|
now,
|
||||||
&mut outage_next_attempt,
|
now_epoch_secs,
|
||||||
&mut single_endpoint_outage,
|
);
|
||||||
&mut shadow_rotate_deadline,
|
|
||||||
&mut idle_refresh_next_attempt,
|
let v6_check_ran = should_run_family_check(&mut family_v6_circuit, now);
|
||||||
&mut adaptive_idle_since,
|
let v6_degraded_raw = if v6_check_ran {
|
||||||
&mut adaptive_recover_until,
|
check_family(
|
||||||
&mut floor_warn_next_allowed,
|
IpFamily::V6,
|
||||||
&mut drain_warn_next_allowed,
|
&pool,
|
||||||
&mut drain_soft_evict_next_allowed,
|
&rng,
|
||||||
)
|
&mut backoff,
|
||||||
.await;
|
&mut next_attempt,
|
||||||
|
&mut inflight,
|
||||||
|
&mut outage_backoff,
|
||||||
|
&mut outage_next_attempt,
|
||||||
|
&mut single_endpoint_outage,
|
||||||
|
&mut shadow_rotate_deadline,
|
||||||
|
&mut idle_refresh_next_attempt,
|
||||||
|
&mut adaptive_idle_since,
|
||||||
|
&mut adaptive_recover_until,
|
||||||
|
&mut floor_warn_next_allowed,
|
||||||
|
&mut drain_warn_next_allowed,
|
||||||
|
&mut drain_soft_evict_next_allowed,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
|
let v6_degraded = apply_family_circuit_result(
|
||||||
|
&pool,
|
||||||
|
IpFamily::V6,
|
||||||
|
&mut family_v6_circuit,
|
||||||
|
if v6_check_ran {
|
||||||
|
Some(v6_degraded_raw)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
now,
|
||||||
|
now_epoch_secs,
|
||||||
|
);
|
||||||
degraded_interval = v4_degraded || v6_degraded;
|
degraded_interval = v4_degraded || v6_degraded;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -146,6 +223,148 @@ pub async fn me_drain_timeout_enforcer(pool: Arc<MePool>) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn should_run_family_check(circuit: &mut FamilyCircuitState, now: Instant) -> bool {
|
||||||
|
match circuit.state {
|
||||||
|
MeFamilyRuntimeState::Suppressed => {
|
||||||
|
if now < circuit.next_probe_at {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
circuit.next_probe_at =
|
||||||
|
now + Duration::from_secs(FAMILY_RECOVER_PROBE_INTERVAL_SECS);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
_ => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_family_circuit_result(
|
||||||
|
pool: &Arc<MePool>,
|
||||||
|
family: IpFamily,
|
||||||
|
circuit: &mut FamilyCircuitState,
|
||||||
|
degraded: Option<bool>,
|
||||||
|
allow_suppress: bool,
|
||||||
|
now: Instant,
|
||||||
|
now_epoch_secs: u64,
|
||||||
|
) -> bool {
|
||||||
|
let Some(degraded) = degraded else {
|
||||||
|
// Preserve suppression state when probe tick is intentionally skipped.
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
let previous_state = circuit.state;
|
||||||
|
match circuit.state {
|
||||||
|
MeFamilyRuntimeState::Suppressed => {
|
||||||
|
if degraded {
|
||||||
|
circuit.fail_streak = circuit.fail_streak.saturating_add(1);
|
||||||
|
circuit.recover_success_streak = 0;
|
||||||
|
let until = now + Duration::from_secs(FAMILY_SUPPRESS_WINDOW_SECS);
|
||||||
|
circuit.suppressed_until = Some(until);
|
||||||
|
circuit.state_since_at = now;
|
||||||
|
warn!(
|
||||||
|
?family,
|
||||||
|
fail_streak = circuit.fail_streak,
|
||||||
|
suppress_secs = FAMILY_SUPPRESS_WINDOW_SECS,
|
||||||
|
"ME family remains suppressed due to ongoing failures"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
circuit.fail_streak = 0;
|
||||||
|
circuit.recover_success_streak = 1;
|
||||||
|
circuit.state = MeFamilyRuntimeState::Recovering;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MeFamilyRuntimeState::Recovering => {
|
||||||
|
if degraded {
|
||||||
|
circuit.fail_streak = circuit.fail_streak.saturating_add(1);
|
||||||
|
if allow_suppress {
|
||||||
|
circuit.state = MeFamilyRuntimeState::Suppressed;
|
||||||
|
let until = now + Duration::from_secs(FAMILY_SUPPRESS_WINDOW_SECS);
|
||||||
|
circuit.suppressed_until = Some(until);
|
||||||
|
circuit.next_probe_at =
|
||||||
|
now + Duration::from_secs(FAMILY_RECOVER_PROBE_INTERVAL_SECS);
|
||||||
|
warn!(
|
||||||
|
?family,
|
||||||
|
fail_streak = circuit.fail_streak,
|
||||||
|
suppress_secs = FAMILY_SUPPRESS_WINDOW_SECS,
|
||||||
|
"ME family temporarily suppressed after repeated degradation"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
circuit.state = MeFamilyRuntimeState::Degraded;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
circuit.recover_success_streak = circuit.recover_success_streak.saturating_add(1);
|
||||||
|
if circuit.recover_success_streak >= FAMILY_RECOVER_SUCCESS_STREAK_REQUIRED {
|
||||||
|
circuit.fail_streak = 0;
|
||||||
|
circuit.recover_success_streak = 0;
|
||||||
|
circuit.suppressed_until = None;
|
||||||
|
circuit.state = MeFamilyRuntimeState::Healthy;
|
||||||
|
info!(
|
||||||
|
?family,
|
||||||
|
"ME family suppression lifted after stable recovery probes"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
if degraded {
|
||||||
|
circuit.fail_streak = circuit.fail_streak.saturating_add(1);
|
||||||
|
circuit.recover_success_streak = 0;
|
||||||
|
circuit.state = MeFamilyRuntimeState::Degraded;
|
||||||
|
if allow_suppress && circuit.fail_streak >= FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD {
|
||||||
|
circuit.state = MeFamilyRuntimeState::Suppressed;
|
||||||
|
let until = now + Duration::from_secs(FAMILY_SUPPRESS_WINDOW_SECS);
|
||||||
|
circuit.suppressed_until = Some(until);
|
||||||
|
circuit.next_probe_at =
|
||||||
|
now + Duration::from_secs(FAMILY_RECOVER_PROBE_INTERVAL_SECS);
|
||||||
|
warn!(
|
||||||
|
?family,
|
||||||
|
fail_streak = circuit.fail_streak,
|
||||||
|
suppress_secs = FAMILY_SUPPRESS_WINDOW_SECS,
|
||||||
|
"ME family temporarily suppressed after repeated degradation"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
circuit.fail_streak = 0;
|
||||||
|
circuit.recover_success_streak = 0;
|
||||||
|
circuit.suppressed_until = None;
|
||||||
|
circuit.state = MeFamilyRuntimeState::Healthy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if previous_state != circuit.state {
|
||||||
|
circuit.state_since_at = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
let suppressed_until_epoch_secs = circuit
|
||||||
|
.suppressed_until
|
||||||
|
.and_then(|until| {
|
||||||
|
if until > now {
|
||||||
|
Some(
|
||||||
|
now_epoch_secs
|
||||||
|
.saturating_add(until.saturating_duration_since(now).as_secs()),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or(0);
|
||||||
|
let state_since_epoch_secs = if previous_state == circuit.state {
|
||||||
|
pool.family_runtime_state_since_epoch_secs(family)
|
||||||
|
} else {
|
||||||
|
now_epoch_secs
|
||||||
|
};
|
||||||
|
pool.set_family_runtime_state(
|
||||||
|
family,
|
||||||
|
circuit.state,
|
||||||
|
state_since_epoch_secs,
|
||||||
|
suppressed_until_epoch_secs,
|
||||||
|
circuit.fail_streak,
|
||||||
|
circuit.recover_success_streak,
|
||||||
|
);
|
||||||
|
|
||||||
|
!matches!(circuit.state, MeFamilyRuntimeState::Suppressed) && degraded
|
||||||
|
}
|
||||||
|
|
||||||
fn draining_writer_timeout_expired(
|
fn draining_writer_timeout_expired(
|
||||||
pool: &MePool,
|
pool: &MePool,
|
||||||
writer: &MeWriter,
|
writer: &MeWriter,
|
||||||
@@ -358,7 +577,8 @@ pub(super) async fn reap_draining_writers(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
pool.stats.increment_pool_force_close_total();
|
pool.stats.increment_pool_force_close_total();
|
||||||
pool.remove_writer_and_close_clients(writer_id).await;
|
pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapTimeoutExpired)
|
||||||
|
.await;
|
||||||
pool.stats
|
pool.stats
|
||||||
.increment_me_draining_writers_reap_progress_total();
|
.increment_me_draining_writers_reap_progress_total();
|
||||||
}
|
}
|
||||||
@@ -376,7 +596,8 @@ pub(super) async fn reap_draining_writers(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
pool.stats.increment_pool_force_close_total();
|
pool.stats.increment_pool_force_close_total();
|
||||||
pool.remove_writer_and_close_clients(writer_id).await;
|
pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapThresholdForce)
|
||||||
|
.await;
|
||||||
pool.stats
|
pool.stats
|
||||||
.increment_me_draining_writers_reap_progress_total();
|
.increment_me_draining_writers_reap_progress_total();
|
||||||
closed_total = closed_total.saturating_add(1);
|
closed_total = closed_total.saturating_add(1);
|
||||||
@@ -388,7 +609,8 @@ pub(super) async fn reap_draining_writers(
|
|||||||
if !closed_writer_ids.insert(writer_id) {
|
if !closed_writer_ids.insert(writer_id) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
pool.remove_writer_and_close_clients(writer_id).await;
|
pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapEmpty)
|
||||||
|
.await;
|
||||||
pool.stats
|
pool.stats
|
||||||
.increment_me_draining_writers_reap_progress_total();
|
.increment_me_draining_writers_reap_progress_total();
|
||||||
closed_total = closed_total.saturating_add(1);
|
closed_total = closed_total.saturating_add(1);
|
||||||
@@ -1646,11 +1868,14 @@ pub async fn me_zombie_writer_watchdog(pool: Arc<MePool>) {
|
|||||||
for (writer_id, had_clients) in &zombie_ids_with_meta {
|
for (writer_id, had_clients) in &zombie_ids_with_meta {
|
||||||
let result = tokio::time::timeout(
|
let result = tokio::time::timeout(
|
||||||
Duration::from_secs(REMOVE_TIMEOUT_SECS),
|
Duration::from_secs(REMOVE_TIMEOUT_SECS),
|
||||||
pool.remove_writer_and_close_clients(*writer_id),
|
pool.remove_writer_and_close_clients(
|
||||||
|
*writer_id,
|
||||||
|
MeWriterTeardownReason::WatchdogStuckDraining,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
match result {
|
match result {
|
||||||
Ok(()) => {
|
Ok(true) => {
|
||||||
removal_timeout_streak.remove(writer_id);
|
removal_timeout_streak.remove(writer_id);
|
||||||
pool.stats.increment_pool_force_close_total();
|
pool.stats.increment_pool_force_close_total();
|
||||||
pool.stats
|
pool.stats
|
||||||
@@ -1661,7 +1886,16 @@ pub async fn me_zombie_writer_watchdog(pool: Arc<MePool>) {
|
|||||||
"Zombie writer removed by watchdog"
|
"Zombie writer removed by watchdog"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
Ok(false) => {
|
||||||
|
removal_timeout_streak.remove(writer_id);
|
||||||
|
debug!(
|
||||||
|
writer_id,
|
||||||
|
had_clients,
|
||||||
|
"Zombie writer watchdog removal became no-op"
|
||||||
|
);
|
||||||
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
|
pool.stats.increment_me_writer_teardown_timeout_total();
|
||||||
let streak = removal_timeout_streak
|
let streak = removal_timeout_streak
|
||||||
.entry(*writer_id)
|
.entry(*writer_id)
|
||||||
.and_modify(|value| *value = value.saturating_add(1))
|
.and_modify(|value| *value = value.saturating_add(1))
|
||||||
@@ -1675,10 +1909,14 @@ pub async fn me_zombie_writer_watchdog(pool: Arc<MePool>) {
|
|||||||
if *streak < HARD_DETACH_TIMEOUT_STREAK {
|
if *streak < HARD_DETACH_TIMEOUT_STREAK {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
pool.stats.increment_me_writer_teardown_escalation_total();
|
||||||
|
|
||||||
let hard_detach = tokio::time::timeout(
|
let hard_detach = tokio::time::timeout(
|
||||||
Duration::from_secs(REMOVE_TIMEOUT_SECS),
|
Duration::from_secs(REMOVE_TIMEOUT_SECS),
|
||||||
pool.remove_draining_writer_hard_detach(*writer_id),
|
pool.remove_draining_writer_hard_detach(
|
||||||
|
*writer_id,
|
||||||
|
MeWriterTeardownReason::WatchdogStuckDraining,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
match hard_detach {
|
match hard_detach {
|
||||||
@@ -1702,6 +1940,7 @@ pub async fn me_zombie_writer_watchdog(pool: Arc<MePool>) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
|
pool.stats.increment_me_writer_teardown_timeout_total();
|
||||||
warn!(
|
warn!(
|
||||||
writer_id,
|
writer_id,
|
||||||
had_clients,
|
had_clients,
|
||||||
@@ -1725,13 +1964,19 @@ mod tests {
|
|||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
|
|
||||||
use super::reap_draining_writers;
|
use super::{
|
||||||
|
FamilyCircuitState, apply_family_circuit_result, reap_draining_writers,
|
||||||
|
should_run_family_check,
|
||||||
|
};
|
||||||
use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode};
|
use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode};
|
||||||
use crate::crypto::SecureRandom;
|
use crate::crypto::SecureRandom;
|
||||||
|
use crate::network::IpFamily;
|
||||||
use crate::network::probe::NetworkDecision;
|
use crate::network::probe::NetworkDecision;
|
||||||
use crate::stats::Stats;
|
use crate::stats::Stats;
|
||||||
use crate::transport::middle_proxy::codec::WriterCommand;
|
use crate::transport::middle_proxy::codec::WriterCommand;
|
||||||
use crate::transport::middle_proxy::pool::{MePool, MeWriter, WriterContour};
|
use crate::transport::middle_proxy::pool::{
|
||||||
|
MeFamilyRuntimeState, MePool, MeWriter, WriterContour,
|
||||||
|
};
|
||||||
use crate::transport::middle_proxy::registry::ConnMeta;
|
use crate::transport::middle_proxy::registry::ConnMeta;
|
||||||
|
|
||||||
async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> {
|
async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> {
|
||||||
@@ -1909,4 +2154,47 @@ mod tests {
|
|||||||
assert_eq!(pool.registry.get_writer(conn_b).await.unwrap().writer_id, 20);
|
assert_eq!(pool.registry.get_writer(conn_b).await.unwrap().writer_id, 20);
|
||||||
assert_eq!(pool.registry.get_writer(conn_c).await.unwrap().writer_id, 30);
|
assert_eq!(pool.registry.get_writer(conn_c).await.unwrap().writer_id, 30);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn suppressed_family_probe_skip_preserves_suppressed_state() {
|
||||||
|
let pool = make_pool(0).await;
|
||||||
|
let now = Instant::now();
|
||||||
|
let now_epoch_secs = MePool::now_epoch_secs();
|
||||||
|
let suppressed_until_epoch_secs = now_epoch_secs.saturating_add(60);
|
||||||
|
pool.set_family_runtime_state(
|
||||||
|
IpFamily::V6,
|
||||||
|
MeFamilyRuntimeState::Suppressed,
|
||||||
|
now_epoch_secs,
|
||||||
|
suppressed_until_epoch_secs,
|
||||||
|
7,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut circuit = FamilyCircuitState {
|
||||||
|
state: MeFamilyRuntimeState::Suppressed,
|
||||||
|
state_since_at: now,
|
||||||
|
suppressed_until: Some(now + Duration::from_secs(60)),
|
||||||
|
next_probe_at: now + Duration::from_secs(5),
|
||||||
|
fail_streak: 7,
|
||||||
|
recover_success_streak: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
assert!(!should_run_family_check(&mut circuit, now));
|
||||||
|
assert!(!apply_family_circuit_result(
|
||||||
|
&pool,
|
||||||
|
IpFamily::V6,
|
||||||
|
&mut circuit,
|
||||||
|
None,
|
||||||
|
true,
|
||||||
|
now,
|
||||||
|
now_epoch_secs,
|
||||||
|
));
|
||||||
|
assert_eq!(circuit.state, MeFamilyRuntimeState::Suppressed);
|
||||||
|
assert_eq!(circuit.fail_streak, 7);
|
||||||
|
assert_eq!(circuit.recover_success_streak, 0);
|
||||||
|
assert_eq!(
|
||||||
|
pool.family_runtime_state(IpFamily::V6),
|
||||||
|
MeFamilyRuntimeState::Suppressed,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -316,7 +316,12 @@ async fn reap_draining_writers_maintains_warn_state_subset_property_under_bulk_c
|
|||||||
|
|
||||||
let ids = sorted_writer_ids(&pool).await;
|
let ids = sorted_writer_ids(&pool).await;
|
||||||
for writer_id in ids.into_iter().take(3) {
|
for writer_id in ids.into_iter().take(3) {
|
||||||
let _ = pool.remove_writer_and_close_clients(writer_id).await;
|
let _ = pool
|
||||||
|
.remove_writer_and_close_clients(
|
||||||
|
writer_id,
|
||||||
|
crate::stats::MeWriterTeardownReason::ReapEmpty,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
|
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
|
||||||
|
|||||||
@@ -197,7 +197,9 @@ async fn reap_draining_writers_drops_warn_state_for_removed_writer() {
|
|||||||
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
|
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
|
||||||
assert!(warn_next_allowed.contains_key(&7));
|
assert!(warn_next_allowed.contains_key(&7));
|
||||||
|
|
||||||
let _ = pool.remove_writer_and_close_clients(7).await;
|
let _ = pool
|
||||||
|
.remove_writer_and_close_clients(7, crate::stats::MeWriterTeardownReason::ReapEmpty)
|
||||||
|
.await;
|
||||||
assert!(pool.registry.get_writer(conn_ids[0]).await.is_none());
|
assert!(pool.registry.get_writer(conn_ids[0]).await.is_none());
|
||||||
|
|
||||||
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
|
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
|
||||||
@@ -527,7 +529,12 @@ async fn reap_draining_writers_warn_state_never_exceeds_live_draining_population
|
|||||||
|
|
||||||
let existing_writer_ids = current_writer_ids(&pool).await;
|
let existing_writer_ids = current_writer_ids(&pool).await;
|
||||||
for writer_id in existing_writer_ids.into_iter().take(4) {
|
for writer_id in existing_writer_ids.into_iter().take(4) {
|
||||||
let _ = pool.remove_writer_and_close_clients(writer_id).await;
|
let _ = pool
|
||||||
|
.remove_writer_and_close_clients(
|
||||||
|
writer_id,
|
||||||
|
crate::stats::MeWriterTeardownReason::ReapEmpty,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
|
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
|
||||||
assert!(warn_next_allowed.len() <= pool.writers.read().await.len());
|
assert!(warn_next_allowed.len() <= pool.writers.read().await.len());
|
||||||
|
|||||||
@@ -74,6 +74,64 @@ impl WriterContour {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub(crate) enum MeFamilyRuntimeState {
|
||||||
|
Healthy = 0,
|
||||||
|
Degraded = 1,
|
||||||
|
Suppressed = 2,
|
||||||
|
Recovering = 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MeFamilyRuntimeState {
|
||||||
|
pub(crate) fn from_u8(value: u8) -> Self {
|
||||||
|
match value {
|
||||||
|
1 => Self::Degraded,
|
||||||
|
2 => Self::Suppressed,
|
||||||
|
3 => Self::Recovering,
|
||||||
|
_ => Self::Healthy,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Healthy => "healthy",
|
||||||
|
Self::Degraded => "degraded",
|
||||||
|
Self::Suppressed => "suppressed",
|
||||||
|
Self::Recovering => "recovering",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub(crate) enum MeDrainGateReason {
|
||||||
|
Open = 0,
|
||||||
|
CoverageQuorum = 1,
|
||||||
|
Redundancy = 2,
|
||||||
|
SuppressionActive = 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MeDrainGateReason {
|
||||||
|
pub(crate) fn from_u8(value: u8) -> Self {
|
||||||
|
match value {
|
||||||
|
1 => Self::CoverageQuorum,
|
||||||
|
2 => Self::Redundancy,
|
||||||
|
3 => Self::SuppressionActive,
|
||||||
|
_ => Self::Open,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Open => "open",
|
||||||
|
Self::CoverageQuorum => "coverage_quorum",
|
||||||
|
Self::Redundancy => "redundancy",
|
||||||
|
Self::SuppressionActive => "suppression_active",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SecretSnapshot {
|
pub struct SecretSnapshot {
|
||||||
pub epoch: u64,
|
pub epoch: u64,
|
||||||
@@ -203,6 +261,20 @@ pub struct MePool {
|
|||||||
pub(super) me_health_interval_ms_unhealthy: AtomicU64,
|
pub(super) me_health_interval_ms_unhealthy: AtomicU64,
|
||||||
pub(super) me_health_interval_ms_healthy: AtomicU64,
|
pub(super) me_health_interval_ms_healthy: AtomicU64,
|
||||||
pub(super) me_warn_rate_limit_ms: AtomicU64,
|
pub(super) me_warn_rate_limit_ms: AtomicU64,
|
||||||
|
pub(super) me_family_v4_runtime_state: AtomicU8,
|
||||||
|
pub(super) me_family_v6_runtime_state: AtomicU8,
|
||||||
|
pub(super) me_family_v4_state_since_epoch_secs: AtomicU64,
|
||||||
|
pub(super) me_family_v6_state_since_epoch_secs: AtomicU64,
|
||||||
|
pub(super) me_family_v4_suppressed_until_epoch_secs: AtomicU64,
|
||||||
|
pub(super) me_family_v6_suppressed_until_epoch_secs: AtomicU64,
|
||||||
|
pub(super) me_family_v4_fail_streak: AtomicU32,
|
||||||
|
pub(super) me_family_v6_fail_streak: AtomicU32,
|
||||||
|
pub(super) me_family_v4_recover_success_streak: AtomicU32,
|
||||||
|
pub(super) me_family_v6_recover_success_streak: AtomicU32,
|
||||||
|
pub(super) me_last_drain_gate_route_quorum_ok: AtomicBool,
|
||||||
|
pub(super) me_last_drain_gate_redundancy_ok: AtomicBool,
|
||||||
|
pub(super) me_last_drain_gate_block_reason: AtomicU8,
|
||||||
|
pub(super) me_last_drain_gate_updated_at_epoch_secs: AtomicU64,
|
||||||
pub(super) runtime_ready: AtomicBool,
|
pub(super) runtime_ready: AtomicBool,
|
||||||
pool_size: usize,
|
pool_size: usize,
|
||||||
pub(super) preferred_endpoints_by_dc: Arc<RwLock<HashMap<i32, Vec<SocketAddr>>>>,
|
pub(super) preferred_endpoints_by_dc: Arc<RwLock<HashMap<i32, Vec<SocketAddr>>>>,
|
||||||
@@ -518,6 +590,20 @@ impl MePool {
|
|||||||
me_health_interval_ms_unhealthy: AtomicU64::new(me_health_interval_ms_unhealthy.max(1)),
|
me_health_interval_ms_unhealthy: AtomicU64::new(me_health_interval_ms_unhealthy.max(1)),
|
||||||
me_health_interval_ms_healthy: AtomicU64::new(me_health_interval_ms_healthy.max(1)),
|
me_health_interval_ms_healthy: AtomicU64::new(me_health_interval_ms_healthy.max(1)),
|
||||||
me_warn_rate_limit_ms: AtomicU64::new(me_warn_rate_limit_ms.max(1)),
|
me_warn_rate_limit_ms: AtomicU64::new(me_warn_rate_limit_ms.max(1)),
|
||||||
|
me_family_v4_runtime_state: AtomicU8::new(MeFamilyRuntimeState::Healthy as u8),
|
||||||
|
me_family_v6_runtime_state: AtomicU8::new(MeFamilyRuntimeState::Healthy as u8),
|
||||||
|
me_family_v4_state_since_epoch_secs: AtomicU64::new(Self::now_epoch_secs()),
|
||||||
|
me_family_v6_state_since_epoch_secs: AtomicU64::new(Self::now_epoch_secs()),
|
||||||
|
me_family_v4_suppressed_until_epoch_secs: AtomicU64::new(0),
|
||||||
|
me_family_v6_suppressed_until_epoch_secs: AtomicU64::new(0),
|
||||||
|
me_family_v4_fail_streak: AtomicU32::new(0),
|
||||||
|
me_family_v6_fail_streak: AtomicU32::new(0),
|
||||||
|
me_family_v4_recover_success_streak: AtomicU32::new(0),
|
||||||
|
me_family_v6_recover_success_streak: AtomicU32::new(0),
|
||||||
|
me_last_drain_gate_route_quorum_ok: AtomicBool::new(false),
|
||||||
|
me_last_drain_gate_redundancy_ok: AtomicBool::new(false),
|
||||||
|
me_last_drain_gate_block_reason: AtomicU8::new(MeDrainGateReason::Open as u8),
|
||||||
|
me_last_drain_gate_updated_at_epoch_secs: AtomicU64::new(Self::now_epoch_secs()),
|
||||||
runtime_ready: AtomicBool::new(false),
|
runtime_ready: AtomicBool::new(false),
|
||||||
preferred_endpoints_by_dc: Arc::new(RwLock::new(preferred_endpoints_by_dc)),
|
preferred_endpoints_by_dc: Arc::new(RwLock::new(preferred_endpoints_by_dc)),
|
||||||
})
|
})
|
||||||
@@ -535,6 +621,153 @@ impl MePool {
|
|||||||
self.runtime_ready.load(Ordering::Relaxed)
|
self.runtime_ready.load(Ordering::Relaxed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) fn set_family_runtime_state(
|
||||||
|
&self,
|
||||||
|
family: IpFamily,
|
||||||
|
state: MeFamilyRuntimeState,
|
||||||
|
state_since_epoch_secs: u64,
|
||||||
|
suppressed_until_epoch_secs: u64,
|
||||||
|
fail_streak: u32,
|
||||||
|
recover_success_streak: u32,
|
||||||
|
) {
|
||||||
|
match family {
|
||||||
|
IpFamily::V4 => {
|
||||||
|
self.me_family_v4_runtime_state
|
||||||
|
.store(state as u8, Ordering::Relaxed);
|
||||||
|
self.me_family_v4_state_since_epoch_secs
|
||||||
|
.store(state_since_epoch_secs, Ordering::Relaxed);
|
||||||
|
self.me_family_v4_suppressed_until_epoch_secs
|
||||||
|
.store(suppressed_until_epoch_secs, Ordering::Relaxed);
|
||||||
|
self.me_family_v4_fail_streak
|
||||||
|
.store(fail_streak, Ordering::Relaxed);
|
||||||
|
self.me_family_v4_recover_success_streak
|
||||||
|
.store(recover_success_streak, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
IpFamily::V6 => {
|
||||||
|
self.me_family_v6_runtime_state
|
||||||
|
.store(state as u8, Ordering::Relaxed);
|
||||||
|
self.me_family_v6_state_since_epoch_secs
|
||||||
|
.store(state_since_epoch_secs, Ordering::Relaxed);
|
||||||
|
self.me_family_v6_suppressed_until_epoch_secs
|
||||||
|
.store(suppressed_until_epoch_secs, Ordering::Relaxed);
|
||||||
|
self.me_family_v6_fail_streak
|
||||||
|
.store(fail_streak, Ordering::Relaxed);
|
||||||
|
self.me_family_v6_recover_success_streak
|
||||||
|
.store(recover_success_streak, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn family_runtime_state(&self, family: IpFamily) -> MeFamilyRuntimeState {
|
||||||
|
match family {
|
||||||
|
IpFamily::V4 => MeFamilyRuntimeState::from_u8(
|
||||||
|
self.me_family_v4_runtime_state.load(Ordering::Relaxed),
|
||||||
|
),
|
||||||
|
IpFamily::V6 => MeFamilyRuntimeState::from_u8(
|
||||||
|
self.me_family_v6_runtime_state.load(Ordering::Relaxed),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn family_runtime_state_since_epoch_secs(&self, family: IpFamily) -> u64 {
|
||||||
|
match family {
|
||||||
|
IpFamily::V4 => self
|
||||||
|
.me_family_v4_state_since_epoch_secs
|
||||||
|
.load(Ordering::Relaxed),
|
||||||
|
IpFamily::V6 => self
|
||||||
|
.me_family_v6_state_since_epoch_secs
|
||||||
|
.load(Ordering::Relaxed),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn family_suppressed_until_epoch_secs(&self, family: IpFamily) -> u64 {
|
||||||
|
match family {
|
||||||
|
IpFamily::V4 => self
|
||||||
|
.me_family_v4_suppressed_until_epoch_secs
|
||||||
|
.load(Ordering::Relaxed),
|
||||||
|
IpFamily::V6 => self
|
||||||
|
.me_family_v6_suppressed_until_epoch_secs
|
||||||
|
.load(Ordering::Relaxed),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn family_fail_streak(&self, family: IpFamily) -> u32 {
|
||||||
|
match family {
|
||||||
|
IpFamily::V4 => self.me_family_v4_fail_streak.load(Ordering::Relaxed),
|
||||||
|
IpFamily::V6 => self.me_family_v6_fail_streak.load(Ordering::Relaxed),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn family_recover_success_streak(&self, family: IpFamily) -> u32 {
|
||||||
|
match family {
|
||||||
|
IpFamily::V4 => self
|
||||||
|
.me_family_v4_recover_success_streak
|
||||||
|
.load(Ordering::Relaxed),
|
||||||
|
IpFamily::V6 => self
|
||||||
|
.me_family_v6_recover_success_streak
|
||||||
|
.load(Ordering::Relaxed),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn is_family_temporarily_suppressed(
|
||||||
|
&self,
|
||||||
|
family: IpFamily,
|
||||||
|
now_epoch_secs: u64,
|
||||||
|
) -> bool {
|
||||||
|
self.family_suppressed_until_epoch_secs(family) > now_epoch_secs
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn family_enabled_for_drain_coverage(
|
||||||
|
&self,
|
||||||
|
family: IpFamily,
|
||||||
|
now_epoch_secs: u64,
|
||||||
|
) -> bool {
|
||||||
|
let configured = match family {
|
||||||
|
IpFamily::V4 => self.decision.ipv4_me,
|
||||||
|
IpFamily::V6 => self.decision.ipv6_me,
|
||||||
|
};
|
||||||
|
configured && !self.is_family_temporarily_suppressed(family, now_epoch_secs)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn set_last_drain_gate(
|
||||||
|
&self,
|
||||||
|
route_quorum_ok: bool,
|
||||||
|
redundancy_ok: bool,
|
||||||
|
block_reason: MeDrainGateReason,
|
||||||
|
updated_at_epoch_secs: u64,
|
||||||
|
) {
|
||||||
|
self.me_last_drain_gate_route_quorum_ok
|
||||||
|
.store(route_quorum_ok, Ordering::Relaxed);
|
||||||
|
self.me_last_drain_gate_redundancy_ok
|
||||||
|
.store(redundancy_ok, Ordering::Relaxed);
|
||||||
|
self.me_last_drain_gate_block_reason
|
||||||
|
.store(block_reason as u8, Ordering::Relaxed);
|
||||||
|
self.me_last_drain_gate_updated_at_epoch_secs
|
||||||
|
.store(updated_at_epoch_secs, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn last_drain_gate_route_quorum_ok(&self) -> bool {
|
||||||
|
self.me_last_drain_gate_route_quorum_ok
|
||||||
|
.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn last_drain_gate_redundancy_ok(&self) -> bool {
|
||||||
|
self.me_last_drain_gate_redundancy_ok
|
||||||
|
.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn last_drain_gate_block_reason(&self) -> MeDrainGateReason {
|
||||||
|
MeDrainGateReason::from_u8(
|
||||||
|
self.me_last_drain_gate_block_reason
|
||||||
|
.load(Ordering::Relaxed),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn last_drain_gate_updated_at_epoch_secs(&self) -> u64 {
|
||||||
|
self.me_last_drain_gate_updated_at_epoch_secs
|
||||||
|
.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn update_runtime_reinit_policy(
|
pub fn update_runtime_reinit_policy(
|
||||||
&self,
|
&self,
|
||||||
hardswap: bool,
|
hardswap: bool,
|
||||||
@@ -1021,9 +1254,10 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn active_coverage_required_total(&self) -> usize {
|
pub(super) async fn active_coverage_required_total(&self) -> usize {
|
||||||
|
let now_epoch_secs = Self::now_epoch_secs();
|
||||||
let mut endpoints_by_dc = HashMap::<i32, HashSet<SocketAddr>>::new();
|
let mut endpoints_by_dc = HashMap::<i32, HashSet<SocketAddr>>::new();
|
||||||
|
|
||||||
if self.decision.ipv4_me {
|
if self.family_enabled_for_drain_coverage(IpFamily::V4, now_epoch_secs) {
|
||||||
let map = self.proxy_map_v4.read().await;
|
let map = self.proxy_map_v4.read().await;
|
||||||
for (dc, addrs) in map.iter() {
|
for (dc, addrs) in map.iter() {
|
||||||
let entry = endpoints_by_dc.entry(*dc).or_default();
|
let entry = endpoints_by_dc.entry(*dc).or_default();
|
||||||
@@ -1033,7 +1267,7 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.decision.ipv6_me {
|
if self.family_enabled_for_drain_coverage(IpFamily::V6, now_epoch_secs) {
|
||||||
let map = self.proxy_map_v6.read().await;
|
let map = self.proxy_map_v6.read().await;
|
||||||
for (dc, addrs) in map.iter() {
|
for (dc, addrs) in map.iter() {
|
||||||
let entry = endpoints_by_dc.entry(*dc).or_default();
|
let entry = endpoints_by_dc.entry(*dc).or_default();
|
||||||
|
|||||||
@@ -164,9 +164,10 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn endpoints_for_dc(&self, target_dc: i32) -> Vec<SocketAddr> {
|
async fn endpoints_for_dc(&self, target_dc: i32) -> Vec<SocketAddr> {
|
||||||
|
let now_epoch_secs = Self::now_epoch_secs();
|
||||||
let mut endpoints = HashSet::<SocketAddr>::new();
|
let mut endpoints = HashSet::<SocketAddr>::new();
|
||||||
|
|
||||||
if self.decision.ipv4_me {
|
if self.family_enabled_for_drain_coverage(IpFamily::V4, now_epoch_secs) {
|
||||||
let map = self.proxy_map_v4.read().await;
|
let map = self.proxy_map_v4.read().await;
|
||||||
if let Some(addrs) = map.get(&target_dc) {
|
if let Some(addrs) = map.get(&target_dc) {
|
||||||
for (ip, port) in addrs {
|
for (ip, port) in addrs {
|
||||||
@@ -175,7 +176,7 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.decision.ipv6_me {
|
if self.family_enabled_for_drain_coverage(IpFamily::V6, now_epoch_secs) {
|
||||||
let map = self.proxy_map_v6.read().await;
|
let map = self.proxy_map_v6.read().await;
|
||||||
if let Some(addrs) = map.get(&target_dc) {
|
if let Some(addrs) = map.get(&target_dc) {
|
||||||
for (ip, port) in addrs {
|
for (ip, port) in addrs {
|
||||||
|
|||||||
@@ -11,8 +11,9 @@ use tracing::{debug, info, warn};
|
|||||||
use std::collections::hash_map::DefaultHasher;
|
use std::collections::hash_map::DefaultHasher;
|
||||||
|
|
||||||
use crate::crypto::SecureRandom;
|
use crate::crypto::SecureRandom;
|
||||||
|
use crate::network::IpFamily;
|
||||||
|
|
||||||
use super::pool::{MePool, WriterContour};
|
use super::pool::{MeDrainGateReason, MePool, WriterContour};
|
||||||
|
|
||||||
const ME_HARDSWAP_PENDING_TTL_SECS: u64 = 1800;
|
const ME_HARDSWAP_PENDING_TTL_SECS: u64 = 1800;
|
||||||
|
|
||||||
@@ -120,9 +121,10 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn desired_dc_endpoints(&self) -> HashMap<i32, HashSet<SocketAddr>> {
|
async fn desired_dc_endpoints(&self) -> HashMap<i32, HashSet<SocketAddr>> {
|
||||||
|
let now_epoch_secs = Self::now_epoch_secs();
|
||||||
let mut out: HashMap<i32, HashSet<SocketAddr>> = HashMap::new();
|
let mut out: HashMap<i32, HashSet<SocketAddr>> = HashMap::new();
|
||||||
|
|
||||||
if self.decision.ipv4_me {
|
if self.family_enabled_for_drain_coverage(IpFamily::V4, now_epoch_secs) {
|
||||||
let map_v4 = self.proxy_map_v4.read().await.clone();
|
let map_v4 = self.proxy_map_v4.read().await.clone();
|
||||||
for (dc, addrs) in map_v4 {
|
for (dc, addrs) in map_v4 {
|
||||||
let entry = out.entry(dc).or_default();
|
let entry = out.entry(dc).or_default();
|
||||||
@@ -132,7 +134,7 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.decision.ipv6_me {
|
if self.family_enabled_for_drain_coverage(IpFamily::V6, now_epoch_secs) {
|
||||||
let map_v6 = self.proxy_map_v6.read().await.clone();
|
let map_v6 = self.proxy_map_v6.read().await.clone();
|
||||||
for (dc, addrs) in map_v6 {
|
for (dc, addrs) in map_v6 {
|
||||||
let entry = out.entry(dc).or_default();
|
let entry = out.entry(dc).or_default();
|
||||||
@@ -313,13 +315,23 @@ impl MePool {
|
|||||||
|
|
||||||
pub async fn zero_downtime_reinit_after_map_change(self: &Arc<Self>, rng: &SecureRandom) {
|
pub async fn zero_downtime_reinit_after_map_change(self: &Arc<Self>, rng: &SecureRandom) {
|
||||||
let desired_by_dc = self.desired_dc_endpoints().await;
|
let desired_by_dc = self.desired_dc_endpoints().await;
|
||||||
|
let now_epoch_secs = Self::now_epoch_secs();
|
||||||
|
let v4_suppressed = self.is_family_temporarily_suppressed(IpFamily::V4, now_epoch_secs);
|
||||||
|
let v6_suppressed = self.is_family_temporarily_suppressed(IpFamily::V6, now_epoch_secs);
|
||||||
if desired_by_dc.is_empty() {
|
if desired_by_dc.is_empty() {
|
||||||
warn!("ME endpoint map is empty; skipping stale writer drain");
|
warn!("ME endpoint map is empty; skipping stale writer drain");
|
||||||
|
let reason = if (self.decision.ipv4_me && v4_suppressed)
|
||||||
|
|| (self.decision.ipv6_me && v6_suppressed)
|
||||||
|
{
|
||||||
|
MeDrainGateReason::SuppressionActive
|
||||||
|
} else {
|
||||||
|
MeDrainGateReason::CoverageQuorum
|
||||||
|
};
|
||||||
|
self.set_last_drain_gate(false, false, reason, now_epoch_secs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let desired_map_hash = Self::desired_map_hash(&desired_by_dc);
|
let desired_map_hash = Self::desired_map_hash(&desired_by_dc);
|
||||||
let now_epoch_secs = Self::now_epoch_secs();
|
|
||||||
let previous_generation = self.current_generation();
|
let previous_generation = self.current_generation();
|
||||||
let hardswap = self.hardswap.load(Ordering::Relaxed);
|
let hardswap = self.hardswap.load(Ordering::Relaxed);
|
||||||
let generation = if hardswap {
|
let generation = if hardswap {
|
||||||
@@ -390,7 +402,17 @@ impl MePool {
|
|||||||
.load(Ordering::Relaxed),
|
.load(Ordering::Relaxed),
|
||||||
);
|
);
|
||||||
let (coverage_ratio, missing_dc) = Self::coverage_ratio(&desired_by_dc, &active_writer_addrs);
|
let (coverage_ratio, missing_dc) = Self::coverage_ratio(&desired_by_dc, &active_writer_addrs);
|
||||||
|
let mut route_quorum_ok = coverage_ratio >= min_ratio;
|
||||||
|
let mut redundancy_ok = missing_dc.is_empty();
|
||||||
|
let mut redundancy_missing_dc = missing_dc.clone();
|
||||||
|
let mut gate_coverage_ratio = coverage_ratio;
|
||||||
if !hardswap && coverage_ratio < min_ratio {
|
if !hardswap && coverage_ratio < min_ratio {
|
||||||
|
self.set_last_drain_gate(
|
||||||
|
false,
|
||||||
|
redundancy_ok,
|
||||||
|
MeDrainGateReason::CoverageQuorum,
|
||||||
|
now_epoch_secs,
|
||||||
|
);
|
||||||
warn!(
|
warn!(
|
||||||
previous_generation,
|
previous_generation,
|
||||||
generation,
|
generation,
|
||||||
@@ -411,7 +433,17 @@ impl MePool {
|
|||||||
.collect();
|
.collect();
|
||||||
let (fresh_coverage_ratio, fresh_missing_dc) =
|
let (fresh_coverage_ratio, fresh_missing_dc) =
|
||||||
Self::coverage_ratio(&desired_by_dc, &fresh_writer_addrs);
|
Self::coverage_ratio(&desired_by_dc, &fresh_writer_addrs);
|
||||||
if !fresh_missing_dc.is_empty() {
|
route_quorum_ok = fresh_coverage_ratio >= min_ratio;
|
||||||
|
redundancy_ok = fresh_missing_dc.is_empty();
|
||||||
|
redundancy_missing_dc = fresh_missing_dc.clone();
|
||||||
|
gate_coverage_ratio = fresh_coverage_ratio;
|
||||||
|
if fresh_coverage_ratio < min_ratio {
|
||||||
|
self.set_last_drain_gate(
|
||||||
|
false,
|
||||||
|
redundancy_ok,
|
||||||
|
MeDrainGateReason::CoverageQuorum,
|
||||||
|
now_epoch_secs,
|
||||||
|
);
|
||||||
warn!(
|
warn!(
|
||||||
previous_generation,
|
previous_generation,
|
||||||
generation,
|
generation,
|
||||||
@@ -421,13 +453,16 @@ impl MePool {
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else if !missing_dc.is_empty() {
|
}
|
||||||
|
|
||||||
|
self.set_last_drain_gate(route_quorum_ok, redundancy_ok, MeDrainGateReason::Open, now_epoch_secs);
|
||||||
|
if !redundancy_ok {
|
||||||
warn!(
|
warn!(
|
||||||
missing_dc = ?missing_dc,
|
missing_dc = ?redundancy_missing_dc,
|
||||||
// Keep stale writers alive when fresh coverage is incomplete.
|
coverage_ratio = format_args!("{gate_coverage_ratio:.3}"),
|
||||||
"ME reinit coverage incomplete; keeping stale writers"
|
min_ratio = format_args!("{min_ratio:.3}"),
|
||||||
|
"ME reinit proceeds with weighted quorum while some DC groups remain uncovered"
|
||||||
);
|
);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if hardswap {
|
if hardswap {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use super::pool::{MePool, RefillDcKey};
|
use super::pool::{MeDrainGateReason, MePool, RefillDcKey};
|
||||||
use crate::network::IpFamily;
|
use crate::network::IpFamily;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
@@ -36,6 +36,24 @@ pub(crate) struct MeApiNatStunSnapshot {
|
|||||||
pub stun_backoff_remaining_ms: Option<u64>,
|
pub stun_backoff_remaining_ms: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct MeApiFamilyStateSnapshot {
|
||||||
|
pub family: &'static str,
|
||||||
|
pub state: &'static str,
|
||||||
|
pub state_since_epoch_secs: u64,
|
||||||
|
pub suppressed_until_epoch_secs: Option<u64>,
|
||||||
|
pub fail_streak: u32,
|
||||||
|
pub recover_success_streak: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct MeApiDrainGateSnapshot {
|
||||||
|
pub route_quorum_ok: bool,
|
||||||
|
pub redundancy_ok: bool,
|
||||||
|
pub block_reason: &'static str,
|
||||||
|
pub updated_at_epoch_secs: u64,
|
||||||
|
}
|
||||||
|
|
||||||
impl MePool {
|
impl MePool {
|
||||||
pub(crate) async fn api_refill_snapshot(&self) -> MeApiRefillSnapshot {
|
pub(crate) async fn api_refill_snapshot(&self) -> MeApiRefillSnapshot {
|
||||||
let inflight_endpoints_total = self.refill_inflight.lock().await.len();
|
let inflight_endpoints_total = self.refill_inflight.lock().await.len();
|
||||||
@@ -125,4 +143,35 @@ impl MePool {
|
|||||||
stun_backoff_remaining_ms,
|
stun_backoff_remaining_ms,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn api_family_state_snapshot(&self) -> Vec<MeApiFamilyStateSnapshot> {
|
||||||
|
[IpFamily::V4, IpFamily::V6]
|
||||||
|
.into_iter()
|
||||||
|
.map(|family| {
|
||||||
|
let state = self.family_runtime_state(family);
|
||||||
|
let suppressed_until = self.family_suppressed_until_epoch_secs(family);
|
||||||
|
MeApiFamilyStateSnapshot {
|
||||||
|
family: match family {
|
||||||
|
IpFamily::V4 => "v4",
|
||||||
|
IpFamily::V6 => "v6",
|
||||||
|
},
|
||||||
|
state: state.as_str(),
|
||||||
|
state_since_epoch_secs: self.family_runtime_state_since_epoch_secs(family),
|
||||||
|
suppressed_until_epoch_secs: (suppressed_until != 0).then_some(suppressed_until),
|
||||||
|
fail_streak: self.family_fail_streak(family),
|
||||||
|
recover_success_streak: self.family_recover_success_streak(family),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn api_drain_gate_snapshot(&self) -> MeApiDrainGateSnapshot {
|
||||||
|
let reason: MeDrainGateReason = self.last_drain_gate_block_reason();
|
||||||
|
MeApiDrainGateSnapshot {
|
||||||
|
route_quorum_ok: self.last_drain_gate_route_quorum_ok(),
|
||||||
|
redundancy_ok: self.last_drain_gate_redundancy_ok(),
|
||||||
|
block_reason: reason.as_str(),
|
||||||
|
updated_at_epoch_secs: self.last_drain_gate_updated_at_epoch_secs(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,9 @@ use crate::config::MeBindStaleMode;
|
|||||||
use crate::crypto::SecureRandom;
|
use crate::crypto::SecureRandom;
|
||||||
use crate::error::{ProxyError, Result};
|
use crate::error::{ProxyError, Result};
|
||||||
use crate::protocol::constants::{RPC_CLOSE_EXT_U32, RPC_PING_U32};
|
use crate::protocol::constants::{RPC_CLOSE_EXT_U32, RPC_PING_U32};
|
||||||
|
use crate::stats::{
|
||||||
|
MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason,
|
||||||
|
};
|
||||||
|
|
||||||
use super::codec::{RpcWriter, WriterCommand};
|
use super::codec::{RpcWriter, WriterCommand};
|
||||||
use super::pool::{MePool, MeWriter, WriterContour};
|
use super::pool::{MePool, MeWriter, WriterContour};
|
||||||
@@ -28,7 +31,7 @@ const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
|
|||||||
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
|
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
enum WriterTeardownMode {
|
enum WriterRemoveGuardMode {
|
||||||
Any,
|
Any,
|
||||||
DrainingOnly,
|
DrainingOnly,
|
||||||
}
|
}
|
||||||
@@ -49,9 +52,16 @@ impl MePool {
|
|||||||
|
|
||||||
for writer_id in closed_writer_ids {
|
for writer_id in closed_writer_ids {
|
||||||
if self.registry.is_writer_empty(writer_id).await {
|
if self.registry.is_writer_empty(writer_id).await {
|
||||||
let _ = self.remove_writer_only(writer_id).await;
|
let _ = self
|
||||||
|
.remove_writer_only(writer_id, MeWriterTeardownReason::PruneClosedWriter)
|
||||||
|
.await;
|
||||||
} else {
|
} else {
|
||||||
let _ = self.remove_writer_and_close_clients(writer_id).await;
|
let _ = self
|
||||||
|
.remove_writer_and_close_clients(
|
||||||
|
writer_id,
|
||||||
|
MeWriterTeardownReason::PruneClosedWriter,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -173,7 +183,11 @@ impl MePool {
|
|||||||
.is_ok()
|
.is_ok()
|
||||||
{
|
{
|
||||||
if let Some(pool) = pool_writer_task.upgrade() {
|
if let Some(pool) = pool_writer_task.upgrade() {
|
||||||
pool.remove_writer_and_close_clients(writer_id).await;
|
pool.remove_writer_and_close_clients(
|
||||||
|
writer_id,
|
||||||
|
MeWriterTeardownReason::WriterTaskExit,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
} else {
|
} else {
|
||||||
cancel_wr.cancel();
|
cancel_wr.cancel();
|
||||||
}
|
}
|
||||||
@@ -264,7 +278,11 @@ impl MePool {
|
|||||||
.is_ok()
|
.is_ok()
|
||||||
{
|
{
|
||||||
if let Some(pool) = pool.upgrade() {
|
if let Some(pool) = pool.upgrade() {
|
||||||
pool.remove_writer_and_close_clients(writer_id).await;
|
pool.remove_writer_and_close_clients(
|
||||||
|
writer_id,
|
||||||
|
MeWriterTeardownReason::ReaderExit,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
} else {
|
} else {
|
||||||
// Fallback for shutdown races: make writer task exit quickly so stale
|
// Fallback for shutdown races: make writer task exit quickly so stale
|
||||||
// channels are observable by periodic prune.
|
// channels are observable by periodic prune.
|
||||||
@@ -372,7 +390,11 @@ impl MePool {
|
|||||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||||
.is_ok()
|
.is_ok()
|
||||||
{
|
{
|
||||||
pool.remove_writer_and_close_clients(writer_id).await;
|
pool.remove_writer_and_close_clients(
|
||||||
|
writer_id,
|
||||||
|
MeWriterTeardownReason::PingSendFail,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -465,7 +487,11 @@ impl MePool {
|
|||||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||||
.is_ok()
|
.is_ok()
|
||||||
{
|
{
|
||||||
pool.remove_writer_and_close_clients(writer_id).await;
|
pool.remove_writer_and_close_clients(
|
||||||
|
writer_id,
|
||||||
|
MeWriterTeardownReason::SignalSendFail,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -499,7 +525,11 @@ impl MePool {
|
|||||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||||
.is_ok()
|
.is_ok()
|
||||||
{
|
{
|
||||||
pool.remove_writer_and_close_clients(writer_id).await;
|
pool.remove_writer_and_close_clients(
|
||||||
|
writer_id,
|
||||||
|
MeWriterTeardownReason::SignalSendFail,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -512,25 +542,48 @@ impl MePool {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn remove_writer_and_close_clients(self: &Arc<Self>, writer_id: u64) {
|
pub(crate) async fn remove_writer_and_close_clients(
|
||||||
|
self: &Arc<Self>,
|
||||||
|
writer_id: u64,
|
||||||
|
reason: MeWriterTeardownReason,
|
||||||
|
) -> bool {
|
||||||
// Full client cleanup now happens inside `registry.writer_lost` to keep
|
// Full client cleanup now happens inside `registry.writer_lost` to keep
|
||||||
// writer reap/remove paths strictly non-blocking per connection.
|
// writer reap/remove paths strictly non-blocking per connection.
|
||||||
let _ = self
|
self.remove_writer_with_mode(
|
||||||
.remove_writer_with_mode(writer_id, WriterTeardownMode::Any)
|
writer_id,
|
||||||
.await;
|
reason,
|
||||||
|
MeWriterTeardownMode::Normal,
|
||||||
|
WriterRemoveGuardMode::Any,
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn remove_draining_writer_hard_detach(
|
pub(super) async fn remove_draining_writer_hard_detach(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
writer_id: u64,
|
writer_id: u64,
|
||||||
|
reason: MeWriterTeardownReason,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
self.remove_writer_with_mode(writer_id, WriterTeardownMode::DrainingOnly)
|
self.remove_writer_with_mode(
|
||||||
.await
|
writer_id,
|
||||||
|
reason,
|
||||||
|
MeWriterTeardownMode::HardDetach,
|
||||||
|
WriterRemoveGuardMode::DrainingOnly,
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn remove_writer_only(self: &Arc<Self>, writer_id: u64) -> bool {
|
async fn remove_writer_only(
|
||||||
self.remove_writer_with_mode(writer_id, WriterTeardownMode::Any)
|
self: &Arc<Self>,
|
||||||
.await
|
writer_id: u64,
|
||||||
|
reason: MeWriterTeardownReason,
|
||||||
|
) -> bool {
|
||||||
|
self.remove_writer_with_mode(
|
||||||
|
writer_id,
|
||||||
|
reason,
|
||||||
|
MeWriterTeardownMode::Normal,
|
||||||
|
WriterRemoveGuardMode::Any,
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
// Authoritative teardown primitive shared by normal cleanup and watchdog path.
|
// Authoritative teardown primitive shared by normal cleanup and watchdog path.
|
||||||
@@ -542,8 +595,13 @@ impl MePool {
|
|||||||
async fn remove_writer_with_mode(
|
async fn remove_writer_with_mode(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
writer_id: u64,
|
writer_id: u64,
|
||||||
mode: WriterTeardownMode,
|
reason: MeWriterTeardownReason,
|
||||||
|
mode: MeWriterTeardownMode,
|
||||||
|
guard_mode: WriterRemoveGuardMode,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
|
let started_at = Instant::now();
|
||||||
|
self.stats
|
||||||
|
.increment_me_writer_teardown_attempt_total(reason, mode);
|
||||||
let mut close_tx: Option<mpsc::Sender<WriterCommand>> = None;
|
let mut close_tx: Option<mpsc::Sender<WriterCommand>> = None;
|
||||||
let mut removed_addr: Option<SocketAddr> = None;
|
let mut removed_addr: Option<SocketAddr> = None;
|
||||||
let mut removed_dc: Option<i32> = None;
|
let mut removed_dc: Option<i32> = None;
|
||||||
@@ -553,9 +611,12 @@ impl MePool {
|
|||||||
{
|
{
|
||||||
let mut ws = self.writers.write().await;
|
let mut ws = self.writers.write().await;
|
||||||
if let Some(pos) = ws.iter().position(|w| w.id == writer_id) {
|
if let Some(pos) = ws.iter().position(|w| w.id == writer_id) {
|
||||||
if matches!(mode, WriterTeardownMode::DrainingOnly)
|
if matches!(guard_mode, WriterRemoveGuardMode::DrainingOnly)
|
||||||
&& !ws[pos].draining.load(Ordering::Relaxed)
|
&& !ws[pos].draining.load(Ordering::Relaxed)
|
||||||
{
|
{
|
||||||
|
self.stats.increment_me_writer_teardown_noop_total();
|
||||||
|
self.stats
|
||||||
|
.observe_me_writer_teardown_duration(mode, started_at.elapsed());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
let w = ws.remove(pos);
|
let w = ws.remove(pos);
|
||||||
@@ -595,6 +656,9 @@ impl MePool {
|
|||||||
self.stats.increment_me_writer_close_signal_drop_total();
|
self.stats.increment_me_writer_close_signal_drop_total();
|
||||||
self.stats
|
self.stats
|
||||||
.increment_me_writer_close_signal_channel_full_total();
|
.increment_me_writer_close_signal_channel_full_total();
|
||||||
|
self.stats.increment_me_writer_cleanup_side_effect_failures_total(
|
||||||
|
MeWriterCleanupSideEffectStep::CloseSignalChannelFull,
|
||||||
|
);
|
||||||
debug!(
|
debug!(
|
||||||
writer_id,
|
writer_id,
|
||||||
"Skipping close signal for removed writer: command channel is full"
|
"Skipping close signal for removed writer: command channel is full"
|
||||||
@@ -602,6 +666,9 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
Err(TrySendError::Closed(_)) => {
|
Err(TrySendError::Closed(_)) => {
|
||||||
self.stats.increment_me_writer_close_signal_drop_total();
|
self.stats.increment_me_writer_close_signal_drop_total();
|
||||||
|
self.stats.increment_me_writer_cleanup_side_effect_failures_total(
|
||||||
|
MeWriterCleanupSideEffectStep::CloseSignalChannelClosed,
|
||||||
|
);
|
||||||
debug!(
|
debug!(
|
||||||
writer_id,
|
writer_id,
|
||||||
"Skipping close signal for removed writer: command channel is closed"
|
"Skipping close signal for removed writer: command channel is closed"
|
||||||
@@ -619,6 +686,13 @@ impl MePool {
|
|||||||
self.trigger_immediate_refill_for_dc(addr, writer_dc);
|
self.trigger_immediate_refill_for_dc(addr, writer_dc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if removed {
|
||||||
|
self.stats.increment_me_writer_teardown_success_total(mode);
|
||||||
|
} else {
|
||||||
|
self.stats.increment_me_writer_teardown_noop_total();
|
||||||
|
}
|
||||||
|
self.stats
|
||||||
|
.observe_me_writer_teardown_duration(mode, started_at.elapsed());
|
||||||
removed
|
removed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ use crate::config::{MeRouteNoWriterMode, MeWriterPickMode};
|
|||||||
use crate::error::{ProxyError, Result};
|
use crate::error::{ProxyError, Result};
|
||||||
use crate::network::IpFamily;
|
use crate::network::IpFamily;
|
||||||
use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32};
|
use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32};
|
||||||
|
use crate::stats::MeWriterTeardownReason;
|
||||||
|
|
||||||
use super::MePool;
|
use super::MePool;
|
||||||
use super::codec::WriterCommand;
|
use super::codec::WriterCommand;
|
||||||
@@ -134,7 +135,11 @@ impl MePool {
|
|||||||
Ok(()) => return Ok(()),
|
Ok(()) => return Ok(()),
|
||||||
Err(TimedSendError::Closed(_)) => {
|
Err(TimedSendError::Closed(_)) => {
|
||||||
warn!(writer_id = current.writer_id, "ME writer channel closed");
|
warn!(writer_id = current.writer_id, "ME writer channel closed");
|
||||||
self.remove_writer_and_close_clients(current.writer_id).await;
|
self.remove_writer_and_close_clients(
|
||||||
|
current.writer_id,
|
||||||
|
MeWriterTeardownReason::RouteChannelClosed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Err(TimedSendError::Timeout(_)) => {
|
Err(TimedSendError::Timeout(_)) => {
|
||||||
@@ -151,7 +156,11 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
Err(TrySendError::Closed(_)) => {
|
Err(TrySendError::Closed(_)) => {
|
||||||
warn!(writer_id = current.writer_id, "ME writer channel closed");
|
warn!(writer_id = current.writer_id, "ME writer channel closed");
|
||||||
self.remove_writer_and_close_clients(current.writer_id).await;
|
self.remove_writer_and_close_clients(
|
||||||
|
current.writer_id,
|
||||||
|
MeWriterTeardownReason::RouteChannelClosed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -458,7 +467,11 @@ impl MePool {
|
|||||||
Err(TrySendError::Closed(_)) => {
|
Err(TrySendError::Closed(_)) => {
|
||||||
self.stats.increment_me_writer_pick_closed_total(pick_mode);
|
self.stats.increment_me_writer_pick_closed_total(pick_mode);
|
||||||
warn!(writer_id = w.id, "ME writer channel closed");
|
warn!(writer_id = w.id, "ME writer channel closed");
|
||||||
self.remove_writer_and_close_clients(w.id).await;
|
self.remove_writer_and_close_clients(
|
||||||
|
w.id,
|
||||||
|
MeWriterTeardownReason::RouteChannelClosed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -503,7 +516,11 @@ impl MePool {
|
|||||||
Err(TimedSendError::Closed(_)) => {
|
Err(TimedSendError::Closed(_)) => {
|
||||||
self.stats.increment_me_writer_pick_closed_total(pick_mode);
|
self.stats.increment_me_writer_pick_closed_total(pick_mode);
|
||||||
warn!(writer_id = w.id, "ME writer channel closed (blocking)");
|
warn!(writer_id = w.id, "ME writer channel closed (blocking)");
|
||||||
self.remove_writer_and_close_clients(w.id).await;
|
self.remove_writer_and_close_clients(
|
||||||
|
w.id,
|
||||||
|
MeWriterTeardownReason::RouteChannelClosed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
Err(TimedSendError::Timeout(_)) => {
|
Err(TimedSendError::Timeout(_)) => {
|
||||||
self.stats.increment_me_writer_pick_full_total(pick_mode);
|
self.stats.increment_me_writer_pick_full_total(pick_mode);
|
||||||
@@ -654,7 +671,11 @@ impl MePool {
|
|||||||
}
|
}
|
||||||
Err(TrySendError::Closed(_)) => {
|
Err(TrySendError::Closed(_)) => {
|
||||||
debug!("ME close write failed");
|
debug!("ME close write failed");
|
||||||
self.remove_writer_and_close_clients(w.writer_id).await;
|
self.remove_writer_and_close_clients(
|
||||||
|
w.writer_id,
|
||||||
|
MeWriterTeardownReason::CloseRpcChannelClosed,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user