mirror of https://github.com/telemt/telemt.git
ME Writer Draining-state fixes
This commit is contained in:
parent
d78360982c
commit
2e8be87ccf
|
|
@ -115,59 +115,109 @@ async fn reap_draining_writers(
|
||||||
pool: &Arc<MePool>,
|
pool: &Arc<MePool>,
|
||||||
warn_next_allowed: &mut HashMap<u64, Instant>,
|
warn_next_allowed: &mut HashMap<u64, Instant>,
|
||||||
) {
|
) {
|
||||||
|
if pool.draining_active_runtime() == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let now_epoch_secs = MePool::now_epoch_secs();
|
let now_epoch_secs = MePool::now_epoch_secs();
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let drain_ttl_secs = pool.me_pool_drain_ttl_secs.load(std::sync::atomic::Ordering::Relaxed);
|
let drain_ttl_secs = pool.me_pool_drain_ttl_secs.load(std::sync::atomic::Ordering::Relaxed);
|
||||||
let drain_threshold = pool
|
let drain_threshold = pool
|
||||||
.me_pool_drain_threshold
|
.me_pool_drain_threshold
|
||||||
.load(std::sync::atomic::Ordering::Relaxed);
|
.load(std::sync::atomic::Ordering::Relaxed);
|
||||||
let writers = pool.writers.read().await.clone();
|
let mut draining_writers = {
|
||||||
let mut draining_writers = Vec::new();
|
let writers = pool.writers.read().await;
|
||||||
for writer in writers {
|
let mut draining_writers = Vec::<DrainingWriterSnapshot>::new();
|
||||||
if !writer.draining.load(std::sync::atomic::Ordering::Relaxed) {
|
for writer in writers.iter() {
|
||||||
continue;
|
if !writer.draining.load(std::sync::atomic::Ordering::Relaxed) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
draining_writers.push(DrainingWriterSnapshot {
|
||||||
|
id: writer.id,
|
||||||
|
writer_dc: writer.writer_dc,
|
||||||
|
addr: writer.addr,
|
||||||
|
generation: writer.generation,
|
||||||
|
created_at: writer.created_at,
|
||||||
|
draining_started_at_epoch_secs: writer
|
||||||
|
.draining_started_at_epoch_secs
|
||||||
|
.load(std::sync::atomic::Ordering::Relaxed),
|
||||||
|
drain_deadline_epoch_secs: writer
|
||||||
|
.drain_deadline_epoch_secs
|
||||||
|
.load(std::sync::atomic::Ordering::Relaxed),
|
||||||
|
allow_drain_fallback: writer
|
||||||
|
.allow_drain_fallback
|
||||||
|
.load(std::sync::atomic::Ordering::Relaxed),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
let is_empty = pool.registry.is_writer_empty(writer.id).await;
|
draining_writers
|
||||||
if is_empty {
|
};
|
||||||
pool.remove_writer_and_close_clients(writer.id).await;
|
|
||||||
continue;
|
if draining_writers.is_empty() {
|
||||||
}
|
return;
|
||||||
draining_writers.push(writer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if drain_threshold > 0 && draining_writers.len() > drain_threshold as usize {
|
let draining_ids: Vec<u64> = draining_writers.iter().map(|writer| writer.id).collect();
|
||||||
draining_writers.sort_by(|left, right| {
|
let non_empty_writer_ids = pool.registry.non_empty_writer_ids(&draining_ids).await;
|
||||||
let left_started = left
|
let mut non_empty_draining_writers =
|
||||||
.draining_started_at_epoch_secs
|
Vec::<DrainingWriterSnapshot>::with_capacity(draining_writers.len());
|
||||||
.load(std::sync::atomic::Ordering::Relaxed);
|
for writer in draining_writers.drain(..) {
|
||||||
let right_started = right
|
if non_empty_writer_ids.contains(&writer.id) {
|
||||||
.draining_started_at_epoch_secs
|
non_empty_draining_writers.push(writer);
|
||||||
.load(std::sync::atomic::Ordering::Relaxed);
|
} else {
|
||||||
left_started
|
|
||||||
.cmp(&right_started)
|
|
||||||
.then_with(|| left.created_at.cmp(&right.created_at))
|
|
||||||
.then_with(|| left.id.cmp(&right.id))
|
|
||||||
});
|
|
||||||
let overflow = draining_writers.len().saturating_sub(drain_threshold as usize);
|
|
||||||
warn!(
|
|
||||||
draining_writers = draining_writers.len(),
|
|
||||||
me_pool_drain_threshold = drain_threshold,
|
|
||||||
removing_writers = overflow,
|
|
||||||
"ME draining writer threshold exceeded, force-closing oldest draining writers"
|
|
||||||
);
|
|
||||||
for writer in draining_writers.drain(..overflow) {
|
|
||||||
pool.stats.increment_pool_force_close_total();
|
|
||||||
pool.remove_writer_and_close_clients(writer.id).await;
|
pool.remove_writer_and_close_clients(writer.id).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
draining_writers = non_empty_draining_writers;
|
||||||
|
if draining_writers.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let overflow = if drain_threshold > 0 && draining_writers.len() > drain_threshold as usize {
|
||||||
|
draining_writers.len().saturating_sub(drain_threshold as usize)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
let has_deadline_expired = draining_writers.iter().any(|writer| {
|
||||||
|
writer.drain_deadline_epoch_secs != 0 && now_epoch_secs >= writer.drain_deadline_epoch_secs
|
||||||
|
});
|
||||||
|
let can_drop_with_replacement = if overflow > 0 || has_deadline_expired {
|
||||||
|
pool.has_non_draining_writer_per_desired_dc_group().await
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
|
|
||||||
|
if overflow > 0 {
|
||||||
|
if can_drop_with_replacement {
|
||||||
|
draining_writers.sort_by(|left, right| {
|
||||||
|
left.draining_started_at_epoch_secs
|
||||||
|
.cmp(&right.draining_started_at_epoch_secs)
|
||||||
|
.then_with(|| left.created_at.cmp(&right.created_at))
|
||||||
|
.then_with(|| left.id.cmp(&right.id))
|
||||||
|
});
|
||||||
|
warn!(
|
||||||
|
draining_writers = draining_writers.len(),
|
||||||
|
me_pool_drain_threshold = drain_threshold,
|
||||||
|
removing_writers = overflow,
|
||||||
|
"ME draining writer threshold exceeded, force-closing oldest draining writers"
|
||||||
|
);
|
||||||
|
for writer in draining_writers.drain(..overflow) {
|
||||||
|
pool.stats.increment_pool_force_close_total();
|
||||||
|
pool.remove_writer_and_close_clients(writer.id).await;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
draining_writers = draining_writers.len(),
|
||||||
|
me_pool_drain_threshold = drain_threshold,
|
||||||
|
overflow,
|
||||||
|
"ME draining threshold exceeded, but replacement coverage is incomplete; keeping draining writers"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for writer in draining_writers {
|
for writer in draining_writers {
|
||||||
let drain_started_at_epoch_secs = writer
|
|
||||||
.draining_started_at_epoch_secs
|
|
||||||
.load(std::sync::atomic::Ordering::Relaxed);
|
|
||||||
if drain_ttl_secs > 0
|
if drain_ttl_secs > 0
|
||||||
&& drain_started_at_epoch_secs != 0
|
&& writer.draining_started_at_epoch_secs != 0
|
||||||
&& now_epoch_secs.saturating_sub(drain_started_at_epoch_secs) > drain_ttl_secs
|
&& now_epoch_secs.saturating_sub(writer.draining_started_at_epoch_secs) > drain_ttl_secs
|
||||||
&& should_emit_writer_warn(
|
&& should_emit_writer_warn(
|
||||||
warn_next_allowed,
|
warn_next_allowed,
|
||||||
writer.id,
|
writer.id,
|
||||||
|
|
@ -182,21 +232,45 @@ async fn reap_draining_writers(
|
||||||
generation = writer.generation,
|
generation = writer.generation,
|
||||||
drain_ttl_secs,
|
drain_ttl_secs,
|
||||||
force_close_secs = pool.me_pool_force_close_secs.load(std::sync::atomic::Ordering::Relaxed),
|
force_close_secs = pool.me_pool_force_close_secs.load(std::sync::atomic::Ordering::Relaxed),
|
||||||
allow_drain_fallback = writer.allow_drain_fallback.load(std::sync::atomic::Ordering::Relaxed),
|
allow_drain_fallback = writer.allow_drain_fallback,
|
||||||
"ME draining writer remains non-empty past drain TTL"
|
"ME draining writer remains non-empty past drain TTL"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
let deadline_epoch_secs = writer
|
if writer.drain_deadline_epoch_secs != 0 && now_epoch_secs >= writer.drain_deadline_epoch_secs
|
||||||
.drain_deadline_epoch_secs
|
{
|
||||||
.load(std::sync::atomic::Ordering::Relaxed);
|
if can_drop_with_replacement {
|
||||||
if deadline_epoch_secs != 0 && now_epoch_secs >= deadline_epoch_secs {
|
warn!(writer_id = writer.id, "Drain timeout, force-closing");
|
||||||
warn!(writer_id = writer.id, "Drain timeout, force-closing");
|
pool.stats.increment_pool_force_close_total();
|
||||||
pool.stats.increment_pool_force_close_total();
|
pool.remove_writer_and_close_clients(writer.id).await;
|
||||||
pool.remove_writer_and_close_clients(writer.id).await;
|
} else if should_emit_writer_warn(
|
||||||
|
warn_next_allowed,
|
||||||
|
writer.id,
|
||||||
|
now,
|
||||||
|
pool.warn_rate_limit_duration(),
|
||||||
|
) {
|
||||||
|
warn!(
|
||||||
|
writer_id = writer.id,
|
||||||
|
writer_dc = writer.writer_dc,
|
||||||
|
endpoint = %writer.addr,
|
||||||
|
"Drain timeout reached, but replacement coverage is incomplete; keeping draining writer"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct DrainingWriterSnapshot {
|
||||||
|
id: u64,
|
||||||
|
writer_dc: i32,
|
||||||
|
addr: SocketAddr,
|
||||||
|
generation: u64,
|
||||||
|
created_at: Instant,
|
||||||
|
draining_started_at_epoch_secs: u64,
|
||||||
|
drain_deadline_epoch_secs: u64,
|
||||||
|
allow_drain_fallback: bool,
|
||||||
|
}
|
||||||
|
|
||||||
fn should_emit_writer_warn(
|
fn should_emit_writer_warn(
|
||||||
next_allowed: &mut HashMap<u64, Instant>,
|
next_allowed: &mut HashMap<u64, Instant>,
|
||||||
writer_id: u64,
|
writer_id: u64,
|
||||||
|
|
@ -1330,6 +1404,15 @@ mod tests {
|
||||||
me_pool_drain_threshold,
|
me_pool_drain_threshold,
|
||||||
..GeneralConfig::default()
|
..GeneralConfig::default()
|
||||||
};
|
};
|
||||||
|
let mut proxy_map_v4 = HashMap::new();
|
||||||
|
proxy_map_v4.insert(
|
||||||
|
2,
|
||||||
|
vec![(IpAddr::V4(Ipv4Addr::new(203, 0, 113, 10)), 443)],
|
||||||
|
);
|
||||||
|
let decision = NetworkDecision {
|
||||||
|
ipv4_me: true,
|
||||||
|
..NetworkDecision::default()
|
||||||
|
};
|
||||||
MePool::new(
|
MePool::new(
|
||||||
None,
|
None,
|
||||||
vec![1u8; 32],
|
vec![1u8; 32],
|
||||||
|
|
@ -1341,10 +1424,10 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
12,
|
12,
|
||||||
1200,
|
1200,
|
||||||
HashMap::new(),
|
proxy_map_v4,
|
||||||
HashMap::new(),
|
HashMap::new(),
|
||||||
None,
|
None,
|
||||||
NetworkDecision::default(),
|
decision,
|
||||||
None,
|
None,
|
||||||
Arc::new(SecureRandom::new()),
|
Arc::new(SecureRandom::new()),
|
||||||
Arc::new(Stats::default()),
|
Arc::new(Stats::default()),
|
||||||
|
|
@ -1438,6 +1521,7 @@ mod tests {
|
||||||
pool.writers.write().await.push(writer);
|
pool.writers.write().await.push(writer);
|
||||||
pool.registry.register_writer(writer_id, tx).await;
|
pool.registry.register_writer(writer_id, tx).await;
|
||||||
pool.conn_count.fetch_add(1, Ordering::Relaxed);
|
pool.conn_count.fetch_add(1, Ordering::Relaxed);
|
||||||
|
pool.increment_draining_active_runtime();
|
||||||
assert!(
|
assert!(
|
||||||
pool.registry
|
pool.registry
|
||||||
.bind_writer(
|
.bind_writer(
|
||||||
|
|
@ -1455,8 +1539,56 @@ mod tests {
|
||||||
conn_id
|
conn_id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn insert_live_writer(pool: &Arc<MePool>, writer_id: u64, writer_dc: i32) {
|
||||||
|
let (tx, _writer_rx) = mpsc::channel::<WriterCommand>(8);
|
||||||
|
let writer = MeWriter {
|
||||||
|
id: writer_id,
|
||||||
|
addr: SocketAddr::new(
|
||||||
|
IpAddr::V4(Ipv4Addr::new(203, 0, 113, (writer_id as u8).saturating_add(1))),
|
||||||
|
4000 + writer_id as u16,
|
||||||
|
),
|
||||||
|
source_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||||
|
writer_dc,
|
||||||
|
generation: 2,
|
||||||
|
contour: Arc::new(AtomicU8::new(WriterContour::Active.as_u8())),
|
||||||
|
created_at: Instant::now(),
|
||||||
|
tx: tx.clone(),
|
||||||
|
cancel: CancellationToken::new(),
|
||||||
|
degraded: Arc::new(AtomicBool::new(false)),
|
||||||
|
rtt_ema_ms_x10: Arc::new(AtomicU32::new(0)),
|
||||||
|
draining: Arc::new(AtomicBool::new(false)),
|
||||||
|
draining_started_at_epoch_secs: Arc::new(AtomicU64::new(0)),
|
||||||
|
drain_deadline_epoch_secs: Arc::new(AtomicU64::new(0)),
|
||||||
|
allow_drain_fallback: Arc::new(AtomicBool::new(false)),
|
||||||
|
};
|
||||||
|
pool.writers.write().await.push(writer);
|
||||||
|
pool.registry.register_writer(writer_id, tx).await;
|
||||||
|
pool.conn_count.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn reap_draining_writers_force_closes_oldest_over_threshold() {
|
async fn reap_draining_writers_force_closes_oldest_over_threshold() {
|
||||||
|
let pool = make_pool(2).await;
|
||||||
|
insert_live_writer(&pool, 1, 2).await;
|
||||||
|
assert!(pool.has_non_draining_writer_per_desired_dc_group().await);
|
||||||
|
let now_epoch_secs = MePool::now_epoch_secs();
|
||||||
|
let conn_a = insert_draining_writer(&pool, 10, now_epoch_secs.saturating_sub(30)).await;
|
||||||
|
let conn_b = insert_draining_writer(&pool, 20, now_epoch_secs.saturating_sub(20)).await;
|
||||||
|
let conn_c = insert_draining_writer(&pool, 30, now_epoch_secs.saturating_sub(10)).await;
|
||||||
|
let mut warn_next_allowed = HashMap::new();
|
||||||
|
|
||||||
|
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||||
|
|
||||||
|
let mut writer_ids: Vec<u64> = pool.writers.read().await.iter().map(|writer| writer.id).collect();
|
||||||
|
writer_ids.sort_unstable();
|
||||||
|
assert_eq!(writer_ids, vec![1, 20, 30]);
|
||||||
|
assert!(pool.registry.get_writer(conn_a).await.is_none());
|
||||||
|
assert_eq!(pool.registry.get_writer(conn_b).await.unwrap().writer_id, 20);
|
||||||
|
assert_eq!(pool.registry.get_writer(conn_c).await.unwrap().writer_id, 30);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn reap_draining_writers_does_not_force_close_overflow_without_replacement() {
|
||||||
let pool = make_pool(2).await;
|
let pool = make_pool(2).await;
|
||||||
let now_epoch_secs = MePool::now_epoch_secs();
|
let now_epoch_secs = MePool::now_epoch_secs();
|
||||||
let conn_a = insert_draining_writer(&pool, 10, now_epoch_secs.saturating_sub(30)).await;
|
let conn_a = insert_draining_writer(&pool, 10, now_epoch_secs.saturating_sub(30)).await;
|
||||||
|
|
@ -1466,9 +1598,10 @@ mod tests {
|
||||||
|
|
||||||
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
reap_draining_writers(&pool, &mut warn_next_allowed).await;
|
||||||
|
|
||||||
let writer_ids: Vec<u64> = pool.writers.read().await.iter().map(|writer| writer.id).collect();
|
let mut writer_ids: Vec<u64> = pool.writers.read().await.iter().map(|writer| writer.id).collect();
|
||||||
assert_eq!(writer_ids, vec![20, 30]);
|
writer_ids.sort_unstable();
|
||||||
assert!(pool.registry.get_writer(conn_a).await.is_none());
|
assert_eq!(writer_ids, vec![10, 20, 30]);
|
||||||
|
assert_eq!(pool.registry.get_writer(conn_a).await.unwrap().writer_id, 10);
|
||||||
assert_eq!(pool.registry.get_writer(conn_b).await.unwrap().writer_id, 20);
|
assert_eq!(pool.registry.get_writer(conn_b).await.unwrap().writer_id, 20);
|
||||||
assert_eq!(pool.registry.get_writer(conn_c).await.unwrap().writer_id, 30);
|
assert_eq!(pool.registry.get_writer(conn_c).await.unwrap().writer_id, 30);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -160,6 +160,7 @@ pub struct MePool {
|
||||||
pub(super) refill_inflight: Arc<Mutex<HashSet<RefillEndpointKey>>>,
|
pub(super) refill_inflight: Arc<Mutex<HashSet<RefillEndpointKey>>>,
|
||||||
pub(super) refill_inflight_dc: Arc<Mutex<HashSet<RefillDcKey>>>,
|
pub(super) refill_inflight_dc: Arc<Mutex<HashSet<RefillDcKey>>>,
|
||||||
pub(super) conn_count: AtomicUsize,
|
pub(super) conn_count: AtomicUsize,
|
||||||
|
pub(super) draining_active_runtime: AtomicU64,
|
||||||
pub(super) stats: Arc<crate::stats::Stats>,
|
pub(super) stats: Arc<crate::stats::Stats>,
|
||||||
pub(super) generation: AtomicU64,
|
pub(super) generation: AtomicU64,
|
||||||
pub(super) active_generation: AtomicU64,
|
pub(super) active_generation: AtomicU64,
|
||||||
|
|
@ -438,6 +439,7 @@ impl MePool {
|
||||||
refill_inflight: Arc::new(Mutex::new(HashSet::new())),
|
refill_inflight: Arc::new(Mutex::new(HashSet::new())),
|
||||||
refill_inflight_dc: Arc::new(Mutex::new(HashSet::new())),
|
refill_inflight_dc: Arc::new(Mutex::new(HashSet::new())),
|
||||||
conn_count: AtomicUsize::new(0),
|
conn_count: AtomicUsize::new(0),
|
||||||
|
draining_active_runtime: AtomicU64::new(0),
|
||||||
generation: AtomicU64::new(1),
|
generation: AtomicU64::new(1),
|
||||||
active_generation: AtomicU64::new(1),
|
active_generation: AtomicU64::new(1),
|
||||||
warm_generation: AtomicU64::new(0),
|
warm_generation: AtomicU64::new(0),
|
||||||
|
|
@ -690,6 +692,32 @@ impl MePool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) fn draining_active_runtime(&self) -> u64 {
|
||||||
|
self.draining_active_runtime.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn increment_draining_active_runtime(&self) {
|
||||||
|
self.draining_active_runtime.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn decrement_draining_active_runtime(&self) {
|
||||||
|
let mut current = self.draining_active_runtime.load(Ordering::Relaxed);
|
||||||
|
loop {
|
||||||
|
if current == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
match self.draining_active_runtime.compare_exchange_weak(
|
||||||
|
current,
|
||||||
|
current - 1,
|
||||||
|
Ordering::Relaxed,
|
||||||
|
Ordering::Relaxed,
|
||||||
|
) {
|
||||||
|
Ok(_) => break,
|
||||||
|
Err(actual) => current = actual,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(super) async fn key_selector(&self) -> u32 {
|
pub(super) async fn key_selector(&self) -> u32 {
|
||||||
self.proxy_secret.read().await.key_selector
|
self.proxy_secret.read().await.key_selector
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -141,6 +141,38 @@ impl MePool {
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) async fn has_non_draining_writer_per_desired_dc_group(&self) -> bool {
|
||||||
|
let desired_by_dc = self.desired_dc_endpoints().await;
|
||||||
|
let required_dcs: HashSet<i32> = desired_by_dc
|
||||||
|
.iter()
|
||||||
|
.filter_map(|(dc, endpoints)| {
|
||||||
|
if endpoints.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(*dc)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
if required_dcs.is_empty() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ws = self.writers.read().await;
|
||||||
|
let mut covered_dcs = HashSet::<i32>::with_capacity(required_dcs.len());
|
||||||
|
for writer in ws.iter() {
|
||||||
|
if writer.draining.load(Ordering::Relaxed) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if required_dcs.contains(&writer.writer_dc) {
|
||||||
|
covered_dcs.insert(writer.writer_dc);
|
||||||
|
if covered_dcs.len() == required_dcs.len() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
fn hardswap_warmup_connect_delay_ms(&self) -> u64 {
|
fn hardswap_warmup_connect_delay_ms(&self) -> u64 {
|
||||||
let min_ms = self.me_hardswap_warmup_delay_min_ms.load(Ordering::Relaxed);
|
let min_ms = self.me_hardswap_warmup_delay_min_ms.load(Ordering::Relaxed);
|
||||||
let max_ms = self.me_hardswap_warmup_delay_max_ms.load(Ordering::Relaxed);
|
let max_ms = self.me_hardswap_warmup_delay_max_ms.load(Ordering::Relaxed);
|
||||||
|
|
@ -475,12 +507,30 @@ impl MePool {
|
||||||
coverage_ratio = format_args!("{coverage_ratio:.3}"),
|
coverage_ratio = format_args!("{coverage_ratio:.3}"),
|
||||||
min_ratio = format_args!("{min_ratio:.3}"),
|
min_ratio = format_args!("{min_ratio:.3}"),
|
||||||
drain_timeout_secs,
|
drain_timeout_secs,
|
||||||
"ME reinit cycle covered; draining stale writers"
|
"ME reinit cycle covered; processing stale writers"
|
||||||
);
|
);
|
||||||
self.stats.increment_pool_swap_total();
|
self.stats.increment_pool_swap_total();
|
||||||
|
let can_drop_with_replacement = self
|
||||||
|
.has_non_draining_writer_per_desired_dc_group()
|
||||||
|
.await;
|
||||||
|
if can_drop_with_replacement {
|
||||||
|
info!(
|
||||||
|
stale_writers = stale_writer_ids.len(),
|
||||||
|
"ME reinit stale writers: replacement coverage ready, force-closing clients for fast rebind"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
stale_writers = stale_writer_ids.len(),
|
||||||
|
"ME reinit stale writers: replacement coverage incomplete, keeping draining fallback"
|
||||||
|
);
|
||||||
|
}
|
||||||
for writer_id in stale_writer_ids {
|
for writer_id in stale_writer_ids {
|
||||||
self.mark_writer_draining_with_timeout(writer_id, drain_timeout, !hardswap)
|
self.mark_writer_draining_with_timeout(writer_id, drain_timeout, !hardswap)
|
||||||
.await;
|
.await;
|
||||||
|
if can_drop_with_replacement {
|
||||||
|
self.stats.increment_pool_force_close_total();
|
||||||
|
self.remove_writer_and_close_clients(writer_id).await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if hardswap {
|
if hardswap {
|
||||||
self.clear_pending_hardswap_state();
|
self.clear_pending_hardswap_state();
|
||||||
|
|
|
||||||
|
|
@ -514,6 +514,7 @@ impl MePool {
|
||||||
let was_draining = w.draining.load(Ordering::Relaxed);
|
let was_draining = w.draining.load(Ordering::Relaxed);
|
||||||
if was_draining {
|
if was_draining {
|
||||||
self.stats.decrement_pool_drain_active();
|
self.stats.decrement_pool_drain_active();
|
||||||
|
self.decrement_draining_active_runtime();
|
||||||
}
|
}
|
||||||
self.stats.increment_me_writer_removed_total();
|
self.stats.increment_me_writer_removed_total();
|
||||||
w.cancel.cancel();
|
w.cancel.cancel();
|
||||||
|
|
@ -572,6 +573,7 @@ impl MePool {
|
||||||
.store(drain_deadline_epoch_secs, Ordering::Relaxed);
|
.store(drain_deadline_epoch_secs, Ordering::Relaxed);
|
||||||
if !already_draining {
|
if !already_draining {
|
||||||
self.stats.increment_pool_drain_active();
|
self.stats.increment_pool_drain_active();
|
||||||
|
self.increment_draining_active_runtime();
|
||||||
}
|
}
|
||||||
w.contour
|
w.contour
|
||||||
.store(WriterContour::Draining.as_u8(), Ordering::Relaxed);
|
.store(WriterContour::Draining.as_u8(), Ordering::Relaxed);
|
||||||
|
|
|
||||||
|
|
@ -436,6 +436,19 @@ impl ConnRegistry {
|
||||||
.map(|s| s.is_empty())
|
.map(|s| s.is_empty())
|
||||||
.unwrap_or(true)
|
.unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) async fn non_empty_writer_ids(&self, writer_ids: &[u64]) -> HashSet<u64> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
let mut out = HashSet::<u64>::with_capacity(writer_ids.len());
|
||||||
|
for writer_id in writer_ids {
|
||||||
|
if let Some(conns) = inner.conns_for_writer.get(writer_id)
|
||||||
|
&& !conns.is_empty()
|
||||||
|
{
|
||||||
|
out.insert(*writer_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
@ -634,4 +647,35 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert!(registry.get_writer(conn_id).await.is_none());
|
assert!(registry.get_writer(conn_id).await.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn non_empty_writer_ids_returns_only_writers_with_bound_clients() {
|
||||||
|
let registry = ConnRegistry::new();
|
||||||
|
let (conn_id, _rx) = registry.register().await;
|
||||||
|
let (writer_tx_a, _writer_rx_a) = tokio::sync::mpsc::channel(8);
|
||||||
|
let (writer_tx_b, _writer_rx_b) = tokio::sync::mpsc::channel(8);
|
||||||
|
registry.register_writer(10, writer_tx_a).await;
|
||||||
|
registry.register_writer(20, writer_tx_b).await;
|
||||||
|
|
||||||
|
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443);
|
||||||
|
assert!(
|
||||||
|
registry
|
||||||
|
.bind_writer(
|
||||||
|
conn_id,
|
||||||
|
10,
|
||||||
|
ConnMeta {
|
||||||
|
target_dc: 2,
|
||||||
|
client_addr: addr,
|
||||||
|
our_addr: addr,
|
||||||
|
proto_flags: 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
);
|
||||||
|
|
||||||
|
let non_empty = registry.non_empty_writer_ids(&[10, 20, 30]).await;
|
||||||
|
assert!(non_empty.contains(&10));
|
||||||
|
assert!(!non_empty.contains(&20));
|
||||||
|
assert!(!non_empty.contains(&30));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue