mirror of
https://github.com/telemt/telemt.git
synced 2026-04-21 04:24:10 +03:00
Add health monitoring tests for draining writers
- Introduced adversarial tests to validate the behavior of the health monitoring system under various conditions, including the management of draining writers. - Implemented integration tests to ensure the health monitor correctly handles expired and empty draining writers. - Added regression tests to verify the functionality of the draining writers' cleanup process, ensuring it adheres to the defined thresholds and budgets. - Updated the module structure to include the new test files for better organization and maintainability.
This commit is contained in:
@@ -25,6 +25,9 @@ const HEALTH_RECONNECT_BUDGET_PER_CORE: usize = 2;
|
||||
const HEALTH_RECONNECT_BUDGET_PER_DC: usize = 1;
|
||||
const HEALTH_RECONNECT_BUDGET_MIN: usize = 4;
|
||||
const HEALTH_RECONNECT_BUDGET_MAX: usize = 128;
|
||||
const HEALTH_DRAIN_CLOSE_BUDGET_PER_CORE: usize = 16;
|
||||
const HEALTH_DRAIN_CLOSE_BUDGET_MIN: usize = 16;
|
||||
const HEALTH_DRAIN_CLOSE_BUDGET_MAX: usize = 256;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct DcFloorPlanEntry {
|
||||
@@ -111,7 +114,7 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
}
|
||||
}
|
||||
|
||||
async fn reap_draining_writers(
|
||||
pub(super) async fn reap_draining_writers(
|
||||
pool: &Arc<MePool>,
|
||||
warn_next_allowed: &mut HashMap<u64, Instant>,
|
||||
) {
|
||||
@@ -122,14 +125,22 @@ async fn reap_draining_writers(
|
||||
.me_pool_drain_threshold
|
||||
.load(std::sync::atomic::Ordering::Relaxed);
|
||||
let writers = pool.writers.read().await.clone();
|
||||
let activity = pool.registry.writer_activity_snapshot().await;
|
||||
let mut draining_writers = Vec::new();
|
||||
let mut empty_writer_ids = Vec::<u64>::new();
|
||||
let mut force_close_writer_ids = Vec::<u64>::new();
|
||||
for writer in writers {
|
||||
if !writer.draining.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
continue;
|
||||
}
|
||||
let is_empty = pool.registry.is_writer_empty(writer.id).await;
|
||||
if is_empty {
|
||||
pool.remove_writer_and_close_clients(writer.id).await;
|
||||
if activity
|
||||
.bound_clients_by_writer
|
||||
.get(&writer.id)
|
||||
.copied()
|
||||
.unwrap_or(0)
|
||||
== 0
|
||||
{
|
||||
empty_writer_ids.push(writer.id);
|
||||
continue;
|
||||
}
|
||||
draining_writers.push(writer);
|
||||
@@ -156,12 +167,13 @@ async fn reap_draining_writers(
|
||||
"ME draining writer threshold exceeded, force-closing oldest draining writers"
|
||||
);
|
||||
for writer in draining_writers.drain(..overflow) {
|
||||
pool.stats.increment_pool_force_close_total();
|
||||
pool.remove_writer_and_close_clients(writer.id).await;
|
||||
force_close_writer_ids.push(writer.id);
|
||||
}
|
||||
}
|
||||
|
||||
let mut active_draining_writer_ids = HashSet::with_capacity(draining_writers.len());
|
||||
for writer in draining_writers {
|
||||
active_draining_writer_ids.insert(writer.id);
|
||||
let drain_started_at_epoch_secs = writer
|
||||
.draining_started_at_epoch_secs
|
||||
.load(std::sync::atomic::Ordering::Relaxed);
|
||||
@@ -191,10 +203,59 @@ async fn reap_draining_writers(
|
||||
.load(std::sync::atomic::Ordering::Relaxed);
|
||||
if deadline_epoch_secs != 0 && now_epoch_secs >= deadline_epoch_secs {
|
||||
warn!(writer_id = writer.id, "Drain timeout, force-closing");
|
||||
pool.stats.increment_pool_force_close_total();
|
||||
pool.remove_writer_and_close_clients(writer.id).await;
|
||||
force_close_writer_ids.push(writer.id);
|
||||
active_draining_writer_ids.remove(&writer.id);
|
||||
}
|
||||
}
|
||||
|
||||
warn_next_allowed.retain(|writer_id, _| active_draining_writer_ids.contains(writer_id));
|
||||
|
||||
let close_budget = health_drain_close_budget();
|
||||
let requested_force_close = force_close_writer_ids.len();
|
||||
let requested_empty_close = empty_writer_ids.len();
|
||||
let requested_close_total = requested_force_close.saturating_add(requested_empty_close);
|
||||
let mut closed_writer_ids = HashSet::<u64>::new();
|
||||
let mut closed_total = 0usize;
|
||||
for writer_id in force_close_writer_ids {
|
||||
if closed_total >= close_budget {
|
||||
break;
|
||||
}
|
||||
if !closed_writer_ids.insert(writer_id) {
|
||||
continue;
|
||||
}
|
||||
pool.stats.increment_pool_force_close_total();
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
closed_total = closed_total.saturating_add(1);
|
||||
}
|
||||
for writer_id in empty_writer_ids {
|
||||
if closed_total >= close_budget {
|
||||
break;
|
||||
}
|
||||
if !closed_writer_ids.insert(writer_id) {
|
||||
continue;
|
||||
}
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
closed_total = closed_total.saturating_add(1);
|
||||
}
|
||||
|
||||
let pending_close_total = requested_close_total.saturating_sub(closed_total);
|
||||
if pending_close_total > 0 {
|
||||
warn!(
|
||||
close_budget,
|
||||
closed_total,
|
||||
pending_close_total,
|
||||
"ME draining close backlog deferred to next health cycle"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn health_drain_close_budget() -> usize {
|
||||
let cpu_cores = std::thread::available_parallelism()
|
||||
.map(std::num::NonZeroUsize::get)
|
||||
.unwrap_or(1);
|
||||
cpu_cores
|
||||
.saturating_mul(HEALTH_DRAIN_CLOSE_BUDGET_PER_CORE)
|
||||
.clamp(HEALTH_DRAIN_CLOSE_BUDGET_MIN, HEALTH_DRAIN_CLOSE_BUDGET_MAX)
|
||||
}
|
||||
|
||||
fn should_emit_writer_warn(
|
||||
|
||||
437
src/transport/middle_proxy/health_adversarial_tests.rs
Normal file
437
src/transport/middle_proxy/health_adversarial_tests.rs
Normal file
@@ -0,0 +1,437 @@
|
||||
use std::collections::HashMap;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use super::codec::WriterCommand;
|
||||
use super::health::{health_drain_close_budget, reap_draining_writers};
|
||||
use super::pool::{MePool, MeWriter, WriterContour};
|
||||
use super::registry::ConnMeta;
|
||||
use super::me_health_monitor;
|
||||
use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode};
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::probe::NetworkDecision;
|
||||
use crate::stats::Stats;
|
||||
|
||||
async fn make_pool(
|
||||
me_pool_drain_threshold: u64,
|
||||
me_health_interval_ms_unhealthy: u64,
|
||||
me_health_interval_ms_healthy: u64,
|
||||
) -> (Arc<MePool>, Arc<SecureRandom>) {
|
||||
let general = GeneralConfig {
|
||||
me_pool_drain_threshold,
|
||||
me_health_interval_ms_unhealthy,
|
||||
me_health_interval_ms_healthy,
|
||||
..GeneralConfig::default()
|
||||
};
|
||||
|
||||
let rng = Arc::new(SecureRandom::new());
|
||||
let pool = MePool::new(
|
||||
None,
|
||||
vec![1u8; 32],
|
||||
None,
|
||||
false,
|
||||
None,
|
||||
Vec::new(),
|
||||
1,
|
||||
None,
|
||||
12,
|
||||
1200,
|
||||
HashMap::new(),
|
||||
HashMap::new(),
|
||||
None,
|
||||
NetworkDecision::default(),
|
||||
None,
|
||||
rng.clone(),
|
||||
Arc::new(Stats::default()),
|
||||
general.me_keepalive_enabled,
|
||||
general.me_keepalive_interval_secs,
|
||||
general.me_keepalive_jitter_secs,
|
||||
general.me_keepalive_payload_random,
|
||||
general.rpc_proxy_req_every,
|
||||
general.me_warmup_stagger_enabled,
|
||||
general.me_warmup_step_delay_ms,
|
||||
general.me_warmup_step_jitter_ms,
|
||||
general.me_reconnect_max_concurrent_per_dc,
|
||||
general.me_reconnect_backoff_base_ms,
|
||||
general.me_reconnect_backoff_cap_ms,
|
||||
general.me_reconnect_fast_retry_count,
|
||||
general.me_single_endpoint_shadow_writers,
|
||||
general.me_single_endpoint_outage_mode_enabled,
|
||||
general.me_single_endpoint_outage_disable_quarantine,
|
||||
general.me_single_endpoint_outage_backoff_min_ms,
|
||||
general.me_single_endpoint_outage_backoff_max_ms,
|
||||
general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
general.me_floor_mode,
|
||||
general.me_adaptive_floor_idle_secs,
|
||||
general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
general.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
general.me_adaptive_floor_recover_grace_secs,
|
||||
general.me_adaptive_floor_writers_per_core_total,
|
||||
general.me_adaptive_floor_cpu_cores_override,
|
||||
general.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
general.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
general.me_adaptive_floor_max_active_writers_per_core,
|
||||
general.me_adaptive_floor_max_warm_writers_per_core,
|
||||
general.me_adaptive_floor_max_active_writers_global,
|
||||
general.me_adaptive_floor_max_warm_writers_global,
|
||||
general.hardswap,
|
||||
general.me_pool_drain_ttl_secs,
|
||||
general.me_pool_drain_threshold,
|
||||
general.effective_me_pool_force_close_secs(),
|
||||
general.me_pool_min_fresh_ratio,
|
||||
general.me_hardswap_warmup_delay_min_ms,
|
||||
general.me_hardswap_warmup_delay_max_ms,
|
||||
general.me_hardswap_warmup_extra_passes,
|
||||
general.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
general.me_bind_stale_mode,
|
||||
general.me_bind_stale_ttl_secs,
|
||||
general.me_secret_atomic_snapshot,
|
||||
general.me_deterministic_writer_sort,
|
||||
MeWriterPickMode::default(),
|
||||
general.me_writer_pick_sample_size,
|
||||
MeSocksKdfPolicy::default(),
|
||||
general.me_writer_cmd_channel_capacity,
|
||||
general.me_route_channel_capacity,
|
||||
general.me_route_backpressure_base_timeout_ms,
|
||||
general.me_route_backpressure_high_timeout_ms,
|
||||
general.me_route_backpressure_high_watermark_pct,
|
||||
general.me_reader_route_data_wait_ms,
|
||||
general.me_health_interval_ms_unhealthy,
|
||||
general.me_health_interval_ms_healthy,
|
||||
general.me_warn_rate_limit_ms,
|
||||
MeRouteNoWriterMode::default(),
|
||||
general.me_route_no_writer_wait_ms,
|
||||
general.me_route_inline_recovery_attempts,
|
||||
general.me_route_inline_recovery_wait_ms,
|
||||
);
|
||||
|
||||
(pool, rng)
|
||||
}
|
||||
|
||||
async fn insert_draining_writer(
|
||||
pool: &Arc<MePool>,
|
||||
writer_id: u64,
|
||||
drain_started_at_epoch_secs: u64,
|
||||
bound_clients: usize,
|
||||
drain_deadline_epoch_secs: u64,
|
||||
) {
|
||||
let (tx, _writer_rx) = mpsc::channel::<WriterCommand>(8);
|
||||
let writer = MeWriter {
|
||||
id: writer_id,
|
||||
addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 6000 + writer_id as u16),
|
||||
source_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
writer_dc: 2,
|
||||
generation: 1,
|
||||
contour: Arc::new(AtomicU8::new(WriterContour::Draining.as_u8())),
|
||||
created_at: Instant::now() - Duration::from_secs(writer_id),
|
||||
tx: tx.clone(),
|
||||
cancel: CancellationToken::new(),
|
||||
degraded: Arc::new(AtomicBool::new(false)),
|
||||
rtt_ema_ms_x10: Arc::new(AtomicU32::new(0)),
|
||||
draining: Arc::new(AtomicBool::new(true)),
|
||||
draining_started_at_epoch_secs: Arc::new(AtomicU64::new(drain_started_at_epoch_secs)),
|
||||
drain_deadline_epoch_secs: Arc::new(AtomicU64::new(drain_deadline_epoch_secs)),
|
||||
allow_drain_fallback: Arc::new(AtomicBool::new(false)),
|
||||
};
|
||||
|
||||
pool.writers.write().await.push(writer);
|
||||
pool.registry.register_writer(writer_id, tx).await;
|
||||
pool.conn_count.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
for idx in 0..bound_clients {
|
||||
let (conn_id, _rx) = pool.registry.register().await;
|
||||
assert!(
|
||||
pool.registry
|
||||
.bind_writer(
|
||||
conn_id,
|
||||
writer_id,
|
||||
ConnMeta {
|
||||
target_dc: 2,
|
||||
client_addr: SocketAddr::new(
|
||||
IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
8000 + idx as u16,
|
||||
),
|
||||
our_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443),
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async fn writer_count(pool: &Arc<MePool>) -> usize {
|
||||
pool.writers.read().await.len()
|
||||
}
|
||||
|
||||
async fn sorted_writer_ids(pool: &Arc<MePool>) -> Vec<u64> {
|
||||
let mut ids = pool
|
||||
.writers
|
||||
.read()
|
||||
.await
|
||||
.iter()
|
||||
.map(|writer| writer.id)
|
||||
.collect::<Vec<_>>();
|
||||
ids.sort_unstable();
|
||||
ids
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_clears_warn_state_when_pool_empty() {
|
||||
let (pool, _rng) = make_pool(128, 1, 1).await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
warn_next_allowed.insert(11, Instant::now() + Duration::from_secs(5));
|
||||
warn_next_allowed.insert(22, Instant::now() + Duration::from_secs(5));
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert!(warn_next_allowed.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_respects_threshold_across_multiple_overflow_cycles() {
|
||||
let threshold = 3u64;
|
||||
let (pool, _rng) = make_pool(threshold, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
|
||||
for writer_id in 1..=60u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(600).saturating_add(writer_id),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
for _ in 0..64 {
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
if writer_count(&pool).await <= threshold as usize {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(writer_count(&pool).await, threshold as usize);
|
||||
assert_eq!(sorted_writer_ids(&pool).await, vec![58, 59, 60]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_handles_large_empty_writer_population() {
|
||||
let (pool, _rng) = make_pool(128, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let total = health_drain_close_budget().saturating_mul(3).saturating_add(27);
|
||||
|
||||
for writer_id in 1..=total as u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(120),
|
||||
0,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
for _ in 0..24 {
|
||||
if writer_count(&pool).await == 0 {
|
||||
break;
|
||||
}
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
}
|
||||
|
||||
assert_eq!(writer_count(&pool).await, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_processes_mass_deadline_expiry_without_unbounded_growth() {
|
||||
let (pool, _rng) = make_pool(128, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let total = health_drain_close_budget().saturating_mul(4).saturating_add(31);
|
||||
|
||||
for writer_id in 1..=total as u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(180),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
for _ in 0..40 {
|
||||
if writer_count(&pool).await == 0 {
|
||||
break;
|
||||
}
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
}
|
||||
|
||||
assert_eq!(writer_count(&pool).await, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_maintains_warn_state_subset_property_under_bulk_churn() {
|
||||
let (pool, _rng) = make_pool(128, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
for wave in 0..40u64 {
|
||||
for offset in 0..8u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
wave * 100 + offset,
|
||||
now_epoch_secs.saturating_sub(400 + offset),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
assert!(warn_next_allowed.len() <= writer_count(&pool).await);
|
||||
|
||||
let ids = sorted_writer_ids(&pool).await;
|
||||
for writer_id in ids.into_iter().take(3) {
|
||||
let _ = pool.remove_writer_and_close_clients(writer_id).await;
|
||||
}
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
assert!(warn_next_allowed.len() <= writer_count(&pool).await);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_budgeted_cleanup_never_increases_pool_size() {
|
||||
let (pool, _rng) = make_pool(5, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
|
||||
for writer_id in 1..=200u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(240).saturating_add(writer_id),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
let mut previous = writer_count(&pool).await;
|
||||
for _ in 0..32 {
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
let current = writer_count(&pool).await;
|
||||
assert!(current <= previous);
|
||||
previous = current;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn me_health_monitor_converges_to_threshold_under_live_injection_churn() {
|
||||
let threshold = 7u64;
|
||||
let (pool, rng) = make_pool(threshold, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
|
||||
for writer_id in 1..=40u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(300).saturating_add(writer_id),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let monitor = tokio::spawn(me_health_monitor(pool.clone(), rng, 0));
|
||||
|
||||
for wave in 0..8u64 {
|
||||
for offset in 0..10u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
1000 + wave * 100 + offset,
|
||||
now_epoch_secs.saturating_sub(120).saturating_add(offset),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(5)).await;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(120)).await;
|
||||
monitor.abort();
|
||||
let _ = monitor.await;
|
||||
|
||||
assert!(writer_count(&pool).await <= threshold as usize);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn me_health_monitor_drains_deadline_storm_with_budgeted_progress() {
|
||||
let (pool, rng) = make_pool(128, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
|
||||
for writer_id in 1..=220u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(120),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let monitor = tokio::spawn(me_health_monitor(pool.clone(), rng, 0));
|
||||
tokio::time::sleep(Duration::from_millis(120)).await;
|
||||
monitor.abort();
|
||||
let _ = monitor.await;
|
||||
|
||||
assert_eq!(writer_count(&pool).await, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn me_health_monitor_eliminates_mixed_empty_and_deadline_backlog() {
|
||||
let threshold = 12u64;
|
||||
let (pool, rng) = make_pool(threshold, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
|
||||
for writer_id in 1..=180u64 {
|
||||
let bound_clients = if writer_id % 3 == 0 { 0 } else { 1 };
|
||||
let deadline = if writer_id % 2 == 0 {
|
||||
now_epoch_secs.saturating_sub(1)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(250).saturating_add(writer_id),
|
||||
bound_clients,
|
||||
deadline,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let monitor = tokio::spawn(me_health_monitor(pool.clone(), rng, 0));
|
||||
tokio::time::sleep(Duration::from_millis(140)).await;
|
||||
monitor.abort();
|
||||
let _ = monitor.await;
|
||||
|
||||
assert!(writer_count(&pool).await <= threshold as usize);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn health_drain_close_budget_is_within_expected_bounds() {
|
||||
let budget = health_drain_close_budget();
|
||||
assert!((16..=256).contains(&budget));
|
||||
}
|
||||
227
src/transport/middle_proxy/health_integration_tests.rs
Normal file
227
src/transport/middle_proxy/health_integration_tests.rs
Normal file
@@ -0,0 +1,227 @@
|
||||
use std::collections::HashMap;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use super::codec::WriterCommand;
|
||||
use super::health::health_drain_close_budget;
|
||||
use super::pool::{MePool, MeWriter, WriterContour};
|
||||
use super::registry::ConnMeta;
|
||||
use super::me_health_monitor;
|
||||
use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode};
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::probe::NetworkDecision;
|
||||
use crate::stats::Stats;
|
||||
|
||||
async fn make_pool(
|
||||
me_pool_drain_threshold: u64,
|
||||
me_health_interval_ms_unhealthy: u64,
|
||||
me_health_interval_ms_healthy: u64,
|
||||
) -> (Arc<MePool>, Arc<SecureRandom>) {
|
||||
let general = GeneralConfig {
|
||||
me_pool_drain_threshold,
|
||||
me_health_interval_ms_unhealthy,
|
||||
me_health_interval_ms_healthy,
|
||||
..GeneralConfig::default()
|
||||
};
|
||||
let rng = Arc::new(SecureRandom::new());
|
||||
let pool = MePool::new(
|
||||
None,
|
||||
vec![1u8; 32],
|
||||
None,
|
||||
false,
|
||||
None,
|
||||
Vec::new(),
|
||||
1,
|
||||
None,
|
||||
12,
|
||||
1200,
|
||||
HashMap::new(),
|
||||
HashMap::new(),
|
||||
None,
|
||||
NetworkDecision::default(),
|
||||
None,
|
||||
rng.clone(),
|
||||
Arc::new(Stats::default()),
|
||||
general.me_keepalive_enabled,
|
||||
general.me_keepalive_interval_secs,
|
||||
general.me_keepalive_jitter_secs,
|
||||
general.me_keepalive_payload_random,
|
||||
general.rpc_proxy_req_every,
|
||||
general.me_warmup_stagger_enabled,
|
||||
general.me_warmup_step_delay_ms,
|
||||
general.me_warmup_step_jitter_ms,
|
||||
general.me_reconnect_max_concurrent_per_dc,
|
||||
general.me_reconnect_backoff_base_ms,
|
||||
general.me_reconnect_backoff_cap_ms,
|
||||
general.me_reconnect_fast_retry_count,
|
||||
general.me_single_endpoint_shadow_writers,
|
||||
general.me_single_endpoint_outage_mode_enabled,
|
||||
general.me_single_endpoint_outage_disable_quarantine,
|
||||
general.me_single_endpoint_outage_backoff_min_ms,
|
||||
general.me_single_endpoint_outage_backoff_max_ms,
|
||||
general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
general.me_floor_mode,
|
||||
general.me_adaptive_floor_idle_secs,
|
||||
general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
general.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
general.me_adaptive_floor_recover_grace_secs,
|
||||
general.me_adaptive_floor_writers_per_core_total,
|
||||
general.me_adaptive_floor_cpu_cores_override,
|
||||
general.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
general.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
general.me_adaptive_floor_max_active_writers_per_core,
|
||||
general.me_adaptive_floor_max_warm_writers_per_core,
|
||||
general.me_adaptive_floor_max_active_writers_global,
|
||||
general.me_adaptive_floor_max_warm_writers_global,
|
||||
general.hardswap,
|
||||
general.me_pool_drain_ttl_secs,
|
||||
general.me_pool_drain_threshold,
|
||||
general.effective_me_pool_force_close_secs(),
|
||||
general.me_pool_min_fresh_ratio,
|
||||
general.me_hardswap_warmup_delay_min_ms,
|
||||
general.me_hardswap_warmup_delay_max_ms,
|
||||
general.me_hardswap_warmup_extra_passes,
|
||||
general.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
general.me_bind_stale_mode,
|
||||
general.me_bind_stale_ttl_secs,
|
||||
general.me_secret_atomic_snapshot,
|
||||
general.me_deterministic_writer_sort,
|
||||
MeWriterPickMode::default(),
|
||||
general.me_writer_pick_sample_size,
|
||||
MeSocksKdfPolicy::default(),
|
||||
general.me_writer_cmd_channel_capacity,
|
||||
general.me_route_channel_capacity,
|
||||
general.me_route_backpressure_base_timeout_ms,
|
||||
general.me_route_backpressure_high_timeout_ms,
|
||||
general.me_route_backpressure_high_watermark_pct,
|
||||
general.me_reader_route_data_wait_ms,
|
||||
general.me_health_interval_ms_unhealthy,
|
||||
general.me_health_interval_ms_healthy,
|
||||
general.me_warn_rate_limit_ms,
|
||||
MeRouteNoWriterMode::default(),
|
||||
general.me_route_no_writer_wait_ms,
|
||||
general.me_route_inline_recovery_attempts,
|
||||
general.me_route_inline_recovery_wait_ms,
|
||||
);
|
||||
(pool, rng)
|
||||
}
|
||||
|
||||
async fn insert_draining_writer(
|
||||
pool: &Arc<MePool>,
|
||||
writer_id: u64,
|
||||
drain_started_at_epoch_secs: u64,
|
||||
bound_clients: usize,
|
||||
drain_deadline_epoch_secs: u64,
|
||||
) {
|
||||
let (tx, _writer_rx) = mpsc::channel::<WriterCommand>(8);
|
||||
let writer = MeWriter {
|
||||
id: writer_id,
|
||||
addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 5500 + writer_id as u16),
|
||||
source_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
writer_dc: 2,
|
||||
generation: 1,
|
||||
contour: Arc::new(AtomicU8::new(WriterContour::Draining.as_u8())),
|
||||
created_at: Instant::now() - Duration::from_secs(writer_id),
|
||||
tx: tx.clone(),
|
||||
cancel: CancellationToken::new(),
|
||||
degraded: Arc::new(AtomicBool::new(false)),
|
||||
rtt_ema_ms_x10: Arc::new(AtomicU32::new(0)),
|
||||
draining: Arc::new(AtomicBool::new(true)),
|
||||
draining_started_at_epoch_secs: Arc::new(AtomicU64::new(drain_started_at_epoch_secs)),
|
||||
drain_deadline_epoch_secs: Arc::new(AtomicU64::new(drain_deadline_epoch_secs)),
|
||||
allow_drain_fallback: Arc::new(AtomicBool::new(false)),
|
||||
};
|
||||
pool.writers.write().await.push(writer);
|
||||
pool.registry.register_writer(writer_id, tx).await;
|
||||
pool.conn_count.fetch_add(1, Ordering::Relaxed);
|
||||
for idx in 0..bound_clients {
|
||||
let (conn_id, _rx) = pool.registry.register().await;
|
||||
assert!(
|
||||
pool.registry
|
||||
.bind_writer(
|
||||
conn_id,
|
||||
writer_id,
|
||||
ConnMeta {
|
||||
target_dc: 2,
|
||||
client_addr: SocketAddr::new(
|
||||
IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
7200 + idx as u16,
|
||||
),
|
||||
our_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443),
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn me_health_monitor_drains_expired_backlog_over_multiple_cycles() {
|
||||
let (pool, rng) = make_pool(128, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let writer_total = health_drain_close_budget().saturating_mul(2).saturating_add(9);
|
||||
for writer_id in 1..=writer_total as u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(120),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let monitor = tokio::spawn(me_health_monitor(pool.clone(), rng, 0));
|
||||
tokio::time::sleep(Duration::from_millis(60)).await;
|
||||
monitor.abort();
|
||||
let _ = monitor.await;
|
||||
|
||||
assert!(pool.writers.read().await.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn me_health_monitor_cleans_empty_draining_writers_without_force_close() {
|
||||
let (pool, rng) = make_pool(128, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
for writer_id in 1..=24u64 {
|
||||
insert_draining_writer(&pool, writer_id, now_epoch_secs.saturating_sub(60), 0, 0).await;
|
||||
}
|
||||
|
||||
let monitor = tokio::spawn(me_health_monitor(pool.clone(), rng, 0));
|
||||
tokio::time::sleep(Duration::from_millis(30)).await;
|
||||
monitor.abort();
|
||||
let _ = monitor.await;
|
||||
|
||||
assert!(pool.writers.read().await.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn me_health_monitor_converges_retry_like_threshold_backlog_to_empty() {
|
||||
let threshold = 4u64;
|
||||
let (pool, rng) = make_pool(threshold, 1, 1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let writer_total = threshold as usize + health_drain_close_budget().saturating_add(11);
|
||||
for writer_id in 1..=writer_total as u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(300).saturating_add(writer_id),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let monitor = tokio::spawn(me_health_monitor(pool.clone(), rng, 0));
|
||||
tokio::time::sleep(Duration::from_millis(60)).await;
|
||||
monitor.abort();
|
||||
let _ = monitor.await;
|
||||
|
||||
assert!(pool.writers.read().await.is_empty());
|
||||
}
|
||||
462
src/transport/middle_proxy/health_regression_tests.rs
Normal file
462
src/transport/middle_proxy/health_regression_tests.rs
Normal file
@@ -0,0 +1,462 @@
|
||||
use std::collections::HashMap;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use super::codec::WriterCommand;
|
||||
use super::health::{health_drain_close_budget, reap_draining_writers};
|
||||
use super::pool::{MePool, MeWriter, WriterContour};
|
||||
use super::registry::ConnMeta;
|
||||
use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode};
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::probe::NetworkDecision;
|
||||
use crate::stats::Stats;
|
||||
|
||||
async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> {
|
||||
let general = GeneralConfig {
|
||||
me_pool_drain_threshold,
|
||||
..GeneralConfig::default()
|
||||
};
|
||||
|
||||
MePool::new(
|
||||
None,
|
||||
vec![1u8; 32],
|
||||
None,
|
||||
false,
|
||||
None,
|
||||
Vec::new(),
|
||||
1,
|
||||
None,
|
||||
12,
|
||||
1200,
|
||||
HashMap::new(),
|
||||
HashMap::new(),
|
||||
None,
|
||||
NetworkDecision::default(),
|
||||
None,
|
||||
Arc::new(SecureRandom::new()),
|
||||
Arc::new(Stats::default()),
|
||||
general.me_keepalive_enabled,
|
||||
general.me_keepalive_interval_secs,
|
||||
general.me_keepalive_jitter_secs,
|
||||
general.me_keepalive_payload_random,
|
||||
general.rpc_proxy_req_every,
|
||||
general.me_warmup_stagger_enabled,
|
||||
general.me_warmup_step_delay_ms,
|
||||
general.me_warmup_step_jitter_ms,
|
||||
general.me_reconnect_max_concurrent_per_dc,
|
||||
general.me_reconnect_backoff_base_ms,
|
||||
general.me_reconnect_backoff_cap_ms,
|
||||
general.me_reconnect_fast_retry_count,
|
||||
general.me_single_endpoint_shadow_writers,
|
||||
general.me_single_endpoint_outage_mode_enabled,
|
||||
general.me_single_endpoint_outage_disable_quarantine,
|
||||
general.me_single_endpoint_outage_backoff_min_ms,
|
||||
general.me_single_endpoint_outage_backoff_max_ms,
|
||||
general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
general.me_floor_mode,
|
||||
general.me_adaptive_floor_idle_secs,
|
||||
general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
general.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
general.me_adaptive_floor_recover_grace_secs,
|
||||
general.me_adaptive_floor_writers_per_core_total,
|
||||
general.me_adaptive_floor_cpu_cores_override,
|
||||
general.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
general.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
general.me_adaptive_floor_max_active_writers_per_core,
|
||||
general.me_adaptive_floor_max_warm_writers_per_core,
|
||||
general.me_adaptive_floor_max_active_writers_global,
|
||||
general.me_adaptive_floor_max_warm_writers_global,
|
||||
general.hardswap,
|
||||
general.me_pool_drain_ttl_secs,
|
||||
general.me_pool_drain_threshold,
|
||||
general.effective_me_pool_force_close_secs(),
|
||||
general.me_pool_min_fresh_ratio,
|
||||
general.me_hardswap_warmup_delay_min_ms,
|
||||
general.me_hardswap_warmup_delay_max_ms,
|
||||
general.me_hardswap_warmup_extra_passes,
|
||||
general.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
general.me_bind_stale_mode,
|
||||
general.me_bind_stale_ttl_secs,
|
||||
general.me_secret_atomic_snapshot,
|
||||
general.me_deterministic_writer_sort,
|
||||
MeWriterPickMode::default(),
|
||||
general.me_writer_pick_sample_size,
|
||||
MeSocksKdfPolicy::default(),
|
||||
general.me_writer_cmd_channel_capacity,
|
||||
general.me_route_channel_capacity,
|
||||
general.me_route_backpressure_base_timeout_ms,
|
||||
general.me_route_backpressure_high_timeout_ms,
|
||||
general.me_route_backpressure_high_watermark_pct,
|
||||
general.me_reader_route_data_wait_ms,
|
||||
general.me_health_interval_ms_unhealthy,
|
||||
general.me_health_interval_ms_healthy,
|
||||
general.me_warn_rate_limit_ms,
|
||||
MeRouteNoWriterMode::default(),
|
||||
general.me_route_no_writer_wait_ms,
|
||||
general.me_route_inline_recovery_attempts,
|
||||
general.me_route_inline_recovery_wait_ms,
|
||||
)
|
||||
}
|
||||
|
||||
async fn insert_draining_writer(
|
||||
pool: &Arc<MePool>,
|
||||
writer_id: u64,
|
||||
drain_started_at_epoch_secs: u64,
|
||||
bound_clients: usize,
|
||||
drain_deadline_epoch_secs: u64,
|
||||
) -> Vec<u64> {
|
||||
let mut conn_ids = Vec::with_capacity(bound_clients);
|
||||
let (tx, _writer_rx) = mpsc::channel::<WriterCommand>(8);
|
||||
let writer = MeWriter {
|
||||
id: writer_id,
|
||||
addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 4500 + writer_id as u16),
|
||||
source_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
writer_dc: 2,
|
||||
generation: 1,
|
||||
contour: Arc::new(AtomicU8::new(WriterContour::Draining.as_u8())),
|
||||
created_at: Instant::now() - Duration::from_secs(writer_id),
|
||||
tx: tx.clone(),
|
||||
cancel: CancellationToken::new(),
|
||||
degraded: Arc::new(AtomicBool::new(false)),
|
||||
rtt_ema_ms_x10: Arc::new(AtomicU32::new(0)),
|
||||
draining: Arc::new(AtomicBool::new(true)),
|
||||
draining_started_at_epoch_secs: Arc::new(AtomicU64::new(drain_started_at_epoch_secs)),
|
||||
drain_deadline_epoch_secs: Arc::new(AtomicU64::new(drain_deadline_epoch_secs)),
|
||||
allow_drain_fallback: Arc::new(AtomicBool::new(false)),
|
||||
};
|
||||
pool.writers.write().await.push(writer);
|
||||
pool.registry.register_writer(writer_id, tx).await;
|
||||
pool.conn_count.fetch_add(1, Ordering::Relaxed);
|
||||
for idx in 0..bound_clients {
|
||||
let (conn_id, _rx) = pool.registry.register().await;
|
||||
assert!(
|
||||
pool.registry
|
||||
.bind_writer(
|
||||
conn_id,
|
||||
writer_id,
|
||||
ConnMeta {
|
||||
target_dc: 2,
|
||||
client_addr: SocketAddr::new(
|
||||
IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
6200 + idx as u16,
|
||||
),
|
||||
our_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443),
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await
|
||||
);
|
||||
conn_ids.push(conn_id);
|
||||
}
|
||||
conn_ids
|
||||
}
|
||||
|
||||
async fn current_writer_ids(pool: &Arc<MePool>) -> Vec<u64> {
|
||||
let mut writer_ids = pool
|
||||
.writers
|
||||
.read()
|
||||
.await
|
||||
.iter()
|
||||
.map(|writer| writer.id)
|
||||
.collect::<Vec<_>>();
|
||||
writer_ids.sort_unstable();
|
||||
writer_ids
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_drops_warn_state_for_removed_writer() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let conn_ids =
|
||||
insert_draining_writer(&pool, 7, now_epoch_secs.saturating_sub(180), 1, 0).await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
assert!(warn_next_allowed.contains_key(&7));
|
||||
|
||||
let _ = pool.remove_writer_and_close_clients(7).await;
|
||||
assert!(pool.registry.get_writer(conn_ids[0]).await.is_none());
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
assert!(!warn_next_allowed.contains_key(&7));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_removes_empty_draining_writers() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
insert_draining_writer(&pool, 1, now_epoch_secs.saturating_sub(40), 0, 0).await;
|
||||
insert_draining_writer(&pool, 2, now_epoch_secs.saturating_sub(30), 0, 0).await;
|
||||
insert_draining_writer(&pool, 3, now_epoch_secs.saturating_sub(20), 1, 0).await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert_eq!(current_writer_ids(&pool).await, vec![3]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_overflow_closes_oldest_non_empty_writers() {
|
||||
let pool = make_pool(2).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
insert_draining_writer(&pool, 11, now_epoch_secs.saturating_sub(40), 1, 0).await;
|
||||
insert_draining_writer(&pool, 22, now_epoch_secs.saturating_sub(30), 1, 0).await;
|
||||
insert_draining_writer(&pool, 33, now_epoch_secs.saturating_sub(20), 1, 0).await;
|
||||
insert_draining_writer(&pool, 44, now_epoch_secs.saturating_sub(10), 1, 0).await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert_eq!(current_writer_ids(&pool).await, vec![33, 44]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_deadline_force_close_applies_under_threshold() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
50,
|
||||
now_epoch_secs.saturating_sub(15),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert!(current_writer_ids(&pool).await.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_limits_closes_per_health_tick() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let close_budget = health_drain_close_budget();
|
||||
let writer_total = close_budget.saturating_add(19);
|
||||
for writer_id in 1..=writer_total as u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(20),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert_eq!(pool.writers.read().await.len(), writer_total - close_budget);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_backlog_drains_across_ticks() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let close_budget = health_drain_close_budget();
|
||||
let writer_total = close_budget.saturating_mul(2).saturating_add(7);
|
||||
for writer_id in 1..=writer_total as u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(20),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
for _ in 0..8 {
|
||||
if pool.writers.read().await.is_empty() {
|
||||
break;
|
||||
}
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
}
|
||||
|
||||
assert!(pool.writers.read().await.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_threshold_backlog_converges_to_threshold() {
|
||||
let threshold = 5u64;
|
||||
let pool = make_pool(threshold).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let close_budget = health_drain_close_budget();
|
||||
let writer_total = threshold as usize + close_budget.saturating_add(12);
|
||||
for writer_id in 1..=writer_total as u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(200).saturating_add(writer_id),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
for _ in 0..16 {
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
if pool.writers.read().await.len() <= threshold as usize {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(pool.writers.read().await.len(), threshold as usize);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_threshold_zero_preserves_non_expired_non_empty_writers() {
|
||||
let pool = make_pool(0).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
insert_draining_writer(&pool, 10, now_epoch_secs.saturating_sub(40), 1, 0).await;
|
||||
insert_draining_writer(&pool, 20, now_epoch_secs.saturating_sub(30), 1, 0).await;
|
||||
insert_draining_writer(&pool, 30, now_epoch_secs.saturating_sub(20), 1, 0).await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert_eq!(current_writer_ids(&pool).await, vec![10, 20, 30]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_prioritizes_force_close_before_empty_cleanup() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let close_budget = health_drain_close_budget();
|
||||
for writer_id in 1..=close_budget as u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(20),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let empty_writer_id = close_budget as u64 + 1;
|
||||
insert_draining_writer(&pool, empty_writer_id, now_epoch_secs.saturating_sub(20), 0, 0).await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert_eq!(current_writer_ids(&pool).await, vec![empty_writer_id]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_empty_cleanup_does_not_increment_force_close_metric() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
insert_draining_writer(&pool, 1, now_epoch_secs.saturating_sub(60), 0, 0).await;
|
||||
insert_draining_writer(&pool, 2, now_epoch_secs.saturating_sub(50), 0, 0).await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert!(current_writer_ids(&pool).await.is_empty());
|
||||
assert_eq!(pool.stats.get_pool_force_close_total(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_handles_duplicate_force_close_requests_for_same_writer() {
|
||||
let pool = make_pool(1).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
10,
|
||||
now_epoch_secs.saturating_sub(30),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
20,
|
||||
now_epoch_secs.saturating_sub(20),
|
||||
1,
|
||||
now_epoch_secs.saturating_sub(1),
|
||||
)
|
||||
.await;
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
|
||||
assert!(current_writer_ids(&pool).await.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_warn_state_never_exceeds_live_draining_population_under_churn() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
for wave in 0..12u64 {
|
||||
for offset in 0..9u64 {
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
wave * 100 + offset,
|
||||
now_epoch_secs.saturating_sub(120 + offset),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
assert!(warn_next_allowed.len() <= pool.writers.read().await.len());
|
||||
|
||||
let existing_writer_ids = current_writer_ids(&pool).await;
|
||||
for writer_id in existing_writer_ids.into_iter().take(4) {
|
||||
let _ = pool.remove_writer_and_close_clients(writer_id).await;
|
||||
}
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
assert!(warn_next_allowed.len() <= pool.writers.read().await.len());
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_mixed_backlog_converges_without_leaking_warn_state() {
|
||||
let pool = make_pool(6).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
|
||||
for writer_id in 1..=18u64 {
|
||||
let bound_clients = if writer_id % 3 == 0 { 0 } else { 1 };
|
||||
let deadline = if writer_id % 2 == 0 {
|
||||
now_epoch_secs.saturating_sub(1)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
insert_draining_writer(
|
||||
&pool,
|
||||
writer_id,
|
||||
now_epoch_secs.saturating_sub(300).saturating_add(writer_id),
|
||||
bound_clients,
|
||||
deadline,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
for _ in 0..16 {
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||
if pool.writers.read().await.len() <= 6 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert!(pool.writers.read().await.len() <= 6);
|
||||
assert!(warn_next_allowed.len() <= pool.writers.read().await.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn general_config_default_drain_threshold_remains_enabled() {
|
||||
assert_eq!(GeneralConfig::default().me_pool_drain_threshold, 128);
|
||||
}
|
||||
@@ -21,6 +21,12 @@ mod secret;
|
||||
mod selftest;
|
||||
mod wire;
|
||||
mod pool_status;
|
||||
#[cfg(test)]
|
||||
mod health_regression_tests;
|
||||
#[cfg(test)]
|
||||
mod health_integration_tests;
|
||||
#[cfg(test)]
|
||||
mod health_adversarial_tests;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user