Teardown Monitoring in API

Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
This commit is contained in:
Alexey
2026-03-20 12:45:53 +03:00
parent b55b264345
commit ef9b7b1492
9 changed files with 496 additions and 35 deletions

View File

@@ -205,6 +205,16 @@ pub(super) struct ZeroPoolData {
pub(super) refill_failed_total: u64,
pub(super) writer_restored_same_endpoint_total: u64,
pub(super) writer_restored_fallback_total: u64,
pub(super) teardown_attempt_total_normal: u64,
pub(super) teardown_attempt_total_hard_detach: u64,
pub(super) teardown_success_total_normal: u64,
pub(super) teardown_success_total_hard_detach: u64,
pub(super) teardown_timeout_total: u64,
pub(super) teardown_escalation_total: u64,
pub(super) teardown_noop_total: u64,
pub(super) teardown_cleanup_side_effect_failures_total: u64,
pub(super) teardown_duration_count_total: u64,
pub(super) teardown_duration_sum_seconds_total: f64,
}
#[derive(Serialize, Clone)]

View File

@@ -4,6 +4,9 @@ use std::time::{SystemTime, UNIX_EPOCH};
use serde::Serialize;
use crate::config::ProxyConfig;
use crate::stats::{
MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason, Stats,
};
use super::ApiShared;
@@ -98,6 +101,50 @@ pub(super) struct RuntimeMeQualityCountersData {
pub(super) reconnect_success_total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownAttemptData {
pub(super) reason: &'static str,
pub(super) mode: &'static str,
pub(super) total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownSuccessData {
pub(super) mode: &'static str,
pub(super) total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownSideEffectData {
pub(super) step: &'static str,
pub(super) total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownDurationBucketData {
pub(super) le_seconds: &'static str,
pub(super) total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownDurationData {
pub(super) mode: &'static str,
pub(super) count: u64,
pub(super) sum_seconds: f64,
pub(super) buckets: Vec<RuntimeMeQualityTeardownDurationBucketData>,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownData {
pub(super) attempts: Vec<RuntimeMeQualityTeardownAttemptData>,
pub(super) success: Vec<RuntimeMeQualityTeardownSuccessData>,
pub(super) timeout_total: u64,
pub(super) escalation_total: u64,
pub(super) noop_total: u64,
pub(super) cleanup_side_effect_failures: Vec<RuntimeMeQualityTeardownSideEffectData>,
pub(super) duration: Vec<RuntimeMeQualityTeardownDurationData>,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityRouteDropData {
pub(super) no_conn_total: u64,
@@ -120,6 +167,7 @@ pub(super) struct RuntimeMeQualityDcRttData {
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityPayload {
pub(super) counters: RuntimeMeQualityCountersData,
pub(super) teardown: RuntimeMeQualityTeardownData,
pub(super) route_drops: RuntimeMeQualityRouteDropData,
pub(super) dc_rtt: Vec<RuntimeMeQualityDcRttData>,
}
@@ -374,6 +422,7 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
reconnect_attempt_total: shared.stats.get_me_reconnect_attempts(),
reconnect_success_total: shared.stats.get_me_reconnect_success(),
},
teardown: build_runtime_me_teardown_data(shared),
route_drops: RuntimeMeQualityRouteDropData {
no_conn_total: shared.stats.get_me_route_drop_no_conn(),
channel_closed_total: shared.stats.get_me_route_drop_channel_closed(),
@@ -397,6 +446,81 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
}
}
fn build_runtime_me_teardown_data(shared: &ApiShared) -> RuntimeMeQualityTeardownData {
let attempts = MeWriterTeardownReason::ALL
.iter()
.copied()
.flat_map(|reason| {
MeWriterTeardownMode::ALL
.iter()
.copied()
.map(move |mode| RuntimeMeQualityTeardownAttemptData {
reason: reason.as_str(),
mode: mode.as_str(),
total: shared.stats.get_me_writer_teardown_attempt_total(reason, mode),
})
})
.collect();
let success = MeWriterTeardownMode::ALL
.iter()
.copied()
.map(|mode| RuntimeMeQualityTeardownSuccessData {
mode: mode.as_str(),
total: shared.stats.get_me_writer_teardown_success_total(mode),
})
.collect();
let cleanup_side_effect_failures = MeWriterCleanupSideEffectStep::ALL
.iter()
.copied()
.map(|step| RuntimeMeQualityTeardownSideEffectData {
step: step.as_str(),
total: shared
.stats
.get_me_writer_cleanup_side_effect_failures_total(step),
})
.collect();
let duration = MeWriterTeardownMode::ALL
.iter()
.copied()
.map(|mode| {
let count = shared.stats.get_me_writer_teardown_duration_count(mode);
let mut buckets: Vec<RuntimeMeQualityTeardownDurationBucketData> = Stats::me_writer_teardown_duration_bucket_labels()
.iter()
.enumerate()
.map(|(bucket_idx, label)| RuntimeMeQualityTeardownDurationBucketData {
le_seconds: label,
total: shared
.stats
.get_me_writer_teardown_duration_bucket_total(mode, bucket_idx),
})
.collect();
buckets.push(RuntimeMeQualityTeardownDurationBucketData {
le_seconds: "+Inf",
total: count,
});
RuntimeMeQualityTeardownDurationData {
mode: mode.as_str(),
count,
sum_seconds: shared.stats.get_me_writer_teardown_duration_sum_seconds(mode),
buckets,
}
})
.collect();
RuntimeMeQualityTeardownData {
attempts,
success,
timeout_total: shared.stats.get_me_writer_teardown_timeout_total(),
escalation_total: shared.stats.get_me_writer_teardown_escalation_total(),
noop_total: shared.stats.get_me_writer_teardown_noop_total(),
cleanup_side_effect_failures,
duration,
}
}
pub(super) async fn build_runtime_upstream_quality_data(
shared: &ApiShared,
) -> RuntimeUpstreamQualityData {

View File

@@ -1,7 +1,7 @@
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use crate::config::ApiConfig;
use crate::stats::Stats;
use crate::stats::{MeWriterTeardownMode, Stats};
use crate::transport::upstream::IpPreference;
use crate::transport::UpstreamRouteKind;
@@ -106,6 +106,29 @@ pub(super) fn build_zero_all_data(stats: &Stats, configured_users: usize) -> Zer
refill_failed_total: stats.get_me_refill_failed_total(),
writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(),
writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(),
teardown_attempt_total_normal: stats
.get_me_writer_teardown_attempt_total_by_mode(MeWriterTeardownMode::Normal),
teardown_attempt_total_hard_detach: stats
.get_me_writer_teardown_attempt_total_by_mode(MeWriterTeardownMode::HardDetach),
teardown_success_total_normal: stats
.get_me_writer_teardown_success_total(MeWriterTeardownMode::Normal),
teardown_success_total_hard_detach: stats
.get_me_writer_teardown_success_total(MeWriterTeardownMode::HardDetach),
teardown_timeout_total: stats.get_me_writer_teardown_timeout_total(),
teardown_escalation_total: stats.get_me_writer_teardown_escalation_total(),
teardown_noop_total: stats.get_me_writer_teardown_noop_total(),
teardown_cleanup_side_effect_failures_total: stats
.get_me_writer_cleanup_side_effect_failures_total_all(),
teardown_duration_count_total: stats
.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal)
.saturating_add(
stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::HardDetach),
),
teardown_duration_sum_seconds_total: stats
.get_me_writer_teardown_duration_sum_seconds(MeWriterTeardownMode::Normal)
+ stats.get_me_writer_teardown_duration_sum_seconds(
MeWriterTeardownMode::HardDetach,
),
},
desync: ZeroDesyncData {
secure_padding_invalid_total: stats.get_secure_padding_invalid(),