mirror of
https://github.com/telemt/telemt.git
synced 2026-04-20 20:14:09 +03:00
Merge branch 'main' into feat/shadowsocks-upstream
This commit is contained in:
@@ -314,6 +314,8 @@ pub(super) async fn reap_draining_writers(
|
||||
}
|
||||
pool.stats.increment_pool_force_close_total();
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
pool.stats
|
||||
.increment_me_draining_writers_reap_progress_total();
|
||||
closed_total = closed_total.saturating_add(1);
|
||||
}
|
||||
for writer_id in empty_writer_ids {
|
||||
@@ -324,6 +326,8 @@ pub(super) async fn reap_draining_writers(
|
||||
continue;
|
||||
}
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
pool.stats
|
||||
.increment_me_draining_writers_reap_progress_total();
|
||||
closed_total = closed_total.saturating_add(1);
|
||||
}
|
||||
|
||||
@@ -1574,6 +1578,8 @@ mod tests {
|
||||
general.me_warn_rate_limit_ms,
|
||||
MeRouteNoWriterMode::default(),
|
||||
general.me_route_no_writer_wait_ms,
|
||||
general.me_route_hybrid_max_wait_ms,
|
||||
general.me_route_blocking_send_timeout_ms,
|
||||
general.me_route_inline_recovery_attempts,
|
||||
general.me_route_inline_recovery_wait_ms,
|
||||
)
|
||||
|
||||
@@ -111,6 +111,8 @@ async fn make_pool(
|
||||
general.me_warn_rate_limit_ms,
|
||||
MeRouteNoWriterMode::default(),
|
||||
general.me_route_no_writer_wait_ms,
|
||||
general.me_route_hybrid_max_wait_ms,
|
||||
general.me_route_blocking_send_timeout_ms,
|
||||
general.me_route_inline_recovery_attempts,
|
||||
general.me_route_inline_recovery_wait_ms,
|
||||
);
|
||||
|
||||
@@ -110,6 +110,8 @@ async fn make_pool(
|
||||
general.me_warn_rate_limit_ms,
|
||||
MeRouteNoWriterMode::default(),
|
||||
general.me_route_no_writer_wait_ms,
|
||||
general.me_route_hybrid_max_wait_ms,
|
||||
general.me_route_blocking_send_timeout_ms,
|
||||
general.me_route_inline_recovery_attempts,
|
||||
general.me_route_inline_recovery_wait_ms,
|
||||
);
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use bytes::Bytes;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
@@ -103,6 +104,8 @@ async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> {
|
||||
general.me_warn_rate_limit_ms,
|
||||
MeRouteNoWriterMode::default(),
|
||||
general.me_route_no_writer_wait_ms,
|
||||
general.me_route_hybrid_max_wait_ms,
|
||||
general.me_route_blocking_send_timeout_ms,
|
||||
general.me_route_inline_recovery_attempts,
|
||||
general.me_route_inline_recovery_wait_ms,
|
||||
)
|
||||
@@ -207,6 +210,89 @@ async fn reap_draining_writers_removes_empty_draining_writers() {
|
||||
assert_eq!(current_writer_ids(&pool).await, vec![3]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_does_not_block_on_stuck_writer_close_signal() {
|
||||
let pool = make_pool(128).await;
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
|
||||
let (blocked_tx, blocked_rx) = mpsc::channel::<WriterCommand>(1);
|
||||
assert!(
|
||||
blocked_tx
|
||||
.try_send(WriterCommand::Data(Bytes::from_static(b"stuck")))
|
||||
.is_ok()
|
||||
);
|
||||
let blocked_rx_guard = tokio::spawn(async move {
|
||||
let _hold_rx = blocked_rx;
|
||||
tokio::time::sleep(Duration::from_secs(30)).await;
|
||||
});
|
||||
|
||||
let blocked_writer_id = 90u64;
|
||||
let blocked_writer = MeWriter {
|
||||
id: blocked_writer_id,
|
||||
addr: SocketAddr::new(
|
||||
IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
4500 + blocked_writer_id as u16,
|
||||
),
|
||||
source_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
|
||||
writer_dc: 2,
|
||||
generation: 1,
|
||||
contour: Arc::new(AtomicU8::new(WriterContour::Draining.as_u8())),
|
||||
created_at: Instant::now() - Duration::from_secs(blocked_writer_id),
|
||||
tx: blocked_tx.clone(),
|
||||
cancel: CancellationToken::new(),
|
||||
degraded: Arc::new(AtomicBool::new(false)),
|
||||
rtt_ema_ms_x10: Arc::new(AtomicU32::new(0)),
|
||||
draining: Arc::new(AtomicBool::new(true)),
|
||||
draining_started_at_epoch_secs: Arc::new(AtomicU64::new(
|
||||
now_epoch_secs.saturating_sub(120),
|
||||
)),
|
||||
drain_deadline_epoch_secs: Arc::new(AtomicU64::new(0)),
|
||||
allow_drain_fallback: Arc::new(AtomicBool::new(false)),
|
||||
};
|
||||
pool.writers.write().await.push(blocked_writer);
|
||||
pool.registry
|
||||
.register_writer(blocked_writer_id, blocked_tx)
|
||||
.await;
|
||||
pool.conn_count.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
insert_draining_writer(&pool, 91, now_epoch_secs.saturating_sub(110), 0, 0).await;
|
||||
|
||||
let mut warn_next_allowed = HashMap::new();
|
||||
let mut soft_evict_next_allowed = HashMap::new();
|
||||
|
||||
let reap_res = tokio::time::timeout(
|
||||
Duration::from_millis(500),
|
||||
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed),
|
||||
)
|
||||
.await;
|
||||
blocked_rx_guard.abort();
|
||||
|
||||
assert!(reap_res.is_ok(), "reap should not block on close signal");
|
||||
assert!(current_writer_ids(&pool).await.is_empty());
|
||||
assert_eq!(pool.stats.get_me_writer_close_signal_drop_total(), 2);
|
||||
assert_eq!(pool.stats.get_me_writer_close_signal_channel_full_total(), 1);
|
||||
assert_eq!(pool.stats.get_me_draining_writers_reap_progress_total(), 2);
|
||||
let activity = pool.registry.writer_activity_snapshot().await;
|
||||
assert!(!activity.bound_clients_by_writer.contains_key(&blocked_writer_id));
|
||||
assert!(!activity.bound_clients_by_writer.contains_key(&91));
|
||||
let (probe_conn_id, _rx) = pool.registry.register().await;
|
||||
assert!(
|
||||
!pool.registry
|
||||
.bind_writer(
|
||||
probe_conn_id,
|
||||
blocked_writer_id,
|
||||
ConnMeta {
|
||||
target_dc: 2,
|
||||
client_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 6400),
|
||||
our_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443),
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await
|
||||
);
|
||||
let _ = pool.registry.unregister(probe_conn_id).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reap_draining_writers_overflow_closes_oldest_non_empty_writers() {
|
||||
let pool = make_pool(2).await;
|
||||
|
||||
@@ -193,6 +193,8 @@ pub struct MePool {
|
||||
pub(super) me_reader_route_data_wait_ms: Arc<AtomicU64>,
|
||||
pub(super) me_route_no_writer_mode: AtomicU8,
|
||||
pub(super) me_route_no_writer_wait: Duration,
|
||||
pub(super) me_route_hybrid_max_wait: Duration,
|
||||
pub(super) me_route_blocking_send_timeout: Duration,
|
||||
pub(super) me_route_inline_recovery_attempts: u32,
|
||||
pub(super) me_route_inline_recovery_wait: Duration,
|
||||
pub(super) me_health_interval_ms_unhealthy: AtomicU64,
|
||||
@@ -307,6 +309,8 @@ impl MePool {
|
||||
me_warn_rate_limit_ms: u64,
|
||||
me_route_no_writer_mode: MeRouteNoWriterMode,
|
||||
me_route_no_writer_wait_ms: u64,
|
||||
me_route_hybrid_max_wait_ms: u64,
|
||||
me_route_blocking_send_timeout_ms: u64,
|
||||
me_route_inline_recovery_attempts: u32,
|
||||
me_route_inline_recovery_wait_ms: u64,
|
||||
) -> Arc<Self> {
|
||||
@@ -490,6 +494,10 @@ impl MePool {
|
||||
me_reader_route_data_wait_ms: Arc::new(AtomicU64::new(me_reader_route_data_wait_ms)),
|
||||
me_route_no_writer_mode: AtomicU8::new(me_route_no_writer_mode.as_u8()),
|
||||
me_route_no_writer_wait: Duration::from_millis(me_route_no_writer_wait_ms),
|
||||
me_route_hybrid_max_wait: Duration::from_millis(me_route_hybrid_max_wait_ms),
|
||||
me_route_blocking_send_timeout: Duration::from_millis(
|
||||
me_route_blocking_send_timeout_ms,
|
||||
),
|
||||
me_route_inline_recovery_attempts,
|
||||
me_route_inline_recovery_wait: Duration::from_millis(me_route_inline_recovery_wait_ms),
|
||||
me_health_interval_ms_unhealthy: AtomicU64::new(me_health_interval_ms_unhealthy.max(1)),
|
||||
|
||||
@@ -8,6 +8,7 @@ use bytes::Bytes;
|
||||
use bytes::BytesMut;
|
||||
use rand::Rng;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
@@ -312,41 +313,28 @@ impl MePool {
|
||||
let mut p = Vec::with_capacity(12);
|
||||
p.extend_from_slice(&RPC_PING_U32.to_le_bytes());
|
||||
p.extend_from_slice(&sent_id.to_le_bytes());
|
||||
{
|
||||
let mut tracker = ping_tracker_ping.lock().await;
|
||||
let now_epoch_ms = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64;
|
||||
let mut run_cleanup = false;
|
||||
if let Some(pool) = pool_ping.upgrade() {
|
||||
let last_cleanup_ms = pool
|
||||
let now_epoch_ms = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64;
|
||||
let mut run_cleanup = false;
|
||||
if let Some(pool) = pool_ping.upgrade() {
|
||||
let last_cleanup_ms = pool
|
||||
.ping_tracker_last_cleanup_epoch_ms
|
||||
.load(Ordering::Relaxed);
|
||||
if now_epoch_ms.saturating_sub(last_cleanup_ms) >= 30_000
|
||||
&& pool
|
||||
.ping_tracker_last_cleanup_epoch_ms
|
||||
.load(Ordering::Relaxed);
|
||||
if now_epoch_ms.saturating_sub(last_cleanup_ms) >= 30_000
|
||||
&& pool
|
||||
.ping_tracker_last_cleanup_epoch_ms
|
||||
.compare_exchange(
|
||||
last_cleanup_ms,
|
||||
now_epoch_ms,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Relaxed,
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
run_cleanup = true;
|
||||
}
|
||||
.compare_exchange(
|
||||
last_cleanup_ms,
|
||||
now_epoch_ms,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Relaxed,
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
run_cleanup = true;
|
||||
}
|
||||
|
||||
if run_cleanup {
|
||||
let before = tracker.len();
|
||||
tracker.retain(|_, (ts, _)| ts.elapsed() < Duration::from_secs(120));
|
||||
let expired = before.saturating_sub(tracker.len());
|
||||
if expired > 0 {
|
||||
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
|
||||
}
|
||||
}
|
||||
tracker.insert(sent_id, (std::time::Instant::now(), writer_id));
|
||||
}
|
||||
ping_id = ping_id.wrapping_add(1);
|
||||
stats_ping.increment_me_keepalive_sent();
|
||||
@@ -367,6 +355,16 @@ impl MePool {
|
||||
}
|
||||
break;
|
||||
}
|
||||
let mut tracker = ping_tracker_ping.lock().await;
|
||||
if run_cleanup {
|
||||
let before = tracker.len();
|
||||
tracker.retain(|_, (ts, _)| ts.elapsed() < Duration::from_secs(120));
|
||||
let expired = before.saturating_sub(tracker.len());
|
||||
if expired > 0 {
|
||||
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
|
||||
}
|
||||
}
|
||||
tracker.insert(sent_id, (std::time::Instant::now(), writer_id));
|
||||
}
|
||||
});
|
||||
|
||||
@@ -494,11 +492,9 @@ impl MePool {
|
||||
}
|
||||
|
||||
pub(crate) async fn remove_writer_and_close_clients(self: &Arc<Self>, writer_id: u64) {
|
||||
let conns = self.remove_writer_only(writer_id).await;
|
||||
for bound in conns {
|
||||
let _ = self.registry.route(bound.conn_id, super::MeResponse::Close).await;
|
||||
let _ = self.registry.unregister(bound.conn_id).await;
|
||||
}
|
||||
// Full client cleanup now happens inside `registry.writer_lost` to keep
|
||||
// writer reap/remove paths strictly non-blocking per connection.
|
||||
let _ = self.remove_writer_only(writer_id).await;
|
||||
}
|
||||
|
||||
async fn remove_writer_only(self: &Arc<Self>, writer_id: u64) -> Vec<BoundConn> {
|
||||
@@ -528,6 +524,11 @@ impl MePool {
|
||||
self.conn_count.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
// State invariant:
|
||||
// - writer is removed from `self.writers` (pool visibility),
|
||||
// - writer is removed from registry routing/binding maps via `writer_lost`.
|
||||
// The close command below is only a best-effort accelerator for task shutdown.
|
||||
// Cleanup progress must never depend on command-channel availability.
|
||||
let conns = self.registry.writer_lost(writer_id).await;
|
||||
{
|
||||
let mut tracker = self.ping_tracker.lock().await;
|
||||
@@ -535,7 +536,25 @@ impl MePool {
|
||||
}
|
||||
self.rtt_stats.lock().await.remove(&writer_id);
|
||||
if let Some(tx) = close_tx {
|
||||
let _ = tx.send(WriterCommand::Close).await;
|
||||
match tx.try_send(WriterCommand::Close) {
|
||||
Ok(()) => {}
|
||||
Err(TrySendError::Full(_)) => {
|
||||
self.stats.increment_me_writer_close_signal_drop_total();
|
||||
self.stats
|
||||
.increment_me_writer_close_signal_channel_full_total();
|
||||
debug!(
|
||||
writer_id,
|
||||
"Skipping close signal for removed writer: command channel is full"
|
||||
);
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
self.stats.increment_me_writer_close_signal_drop_total();
|
||||
debug!(
|
||||
writer_id,
|
||||
"Skipping close signal for removed writer: command channel is closed"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if trigger_refill
|
||||
&& let Some(addr) = removed_addr
|
||||
|
||||
@@ -8,6 +8,7 @@ use bytes::{Bytes, BytesMut};
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::{Mutex, mpsc};
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, trace, warn};
|
||||
|
||||
@@ -173,12 +174,12 @@ pub(crate) async fn reader_loop(
|
||||
} else if pt == RPC_CLOSE_EXT_U32 && body.len() >= 8 {
|
||||
let cid = u64::from_le_bytes(body[0..8].try_into().unwrap());
|
||||
debug!(cid, "RPC_CLOSE_EXT from ME");
|
||||
reg.route(cid, MeResponse::Close).await;
|
||||
let _ = reg.route_nowait(cid, MeResponse::Close).await;
|
||||
reg.unregister(cid).await;
|
||||
} else if pt == RPC_CLOSE_CONN_U32 && body.len() >= 8 {
|
||||
let cid = u64::from_le_bytes(body[0..8].try_into().unwrap());
|
||||
debug!(cid, "RPC_CLOSE_CONN from ME");
|
||||
reg.route(cid, MeResponse::Close).await;
|
||||
let _ = reg.route_nowait(cid, MeResponse::Close).await;
|
||||
reg.unregister(cid).await;
|
||||
} else if pt == RPC_PING_U32 && body.len() >= 8 {
|
||||
let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap());
|
||||
@@ -186,13 +187,15 @@ pub(crate) async fn reader_loop(
|
||||
let mut pong = Vec::with_capacity(12);
|
||||
pong.extend_from_slice(&RPC_PONG_U32.to_le_bytes());
|
||||
pong.extend_from_slice(&ping_id.to_le_bytes());
|
||||
if tx
|
||||
.send(WriterCommand::DataAndFlush(Bytes::from(pong)))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
warn!("PONG send failed");
|
||||
break;
|
||||
match tx.try_send(WriterCommand::DataAndFlush(Bytes::from(pong))) {
|
||||
Ok(()) => {}
|
||||
Err(TrySendError::Full(_)) => {
|
||||
debug!(ping_id, "PONG dropped: writer command channel is full");
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
warn!("PONG send failed: writer channel closed");
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if pt == RPC_PONG_U32 && body.len() >= 8 {
|
||||
let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap());
|
||||
@@ -232,6 +235,13 @@ async fn send_close_conn(tx: &mpsc::Sender<WriterCommand>, conn_id: u64) {
|
||||
let mut p = Vec::with_capacity(12);
|
||||
p.extend_from_slice(&RPC_CLOSE_CONN_U32.to_le_bytes());
|
||||
p.extend_from_slice(&conn_id.to_le_bytes());
|
||||
|
||||
let _ = tx.send(WriterCommand::DataAndFlush(Bytes::from(p))).await;
|
||||
match tx.try_send(WriterCommand::DataAndFlush(Bytes::from(p))) {
|
||||
Ok(()) => {}
|
||||
Err(TrySendError::Full(_)) => {
|
||||
debug!(conn_id, "ME close_conn signal skipped: writer command channel is full");
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
debug!(conn_id, "ME close_conn signal skipped: writer command channel is closed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,6 +169,7 @@ impl ConnRegistry {
|
||||
None
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub async fn route(&self, id: u64, resp: MeResponse) -> RouteResult {
|
||||
let tx = {
|
||||
let inner = self.inner.read().await;
|
||||
@@ -445,30 +446,38 @@ impl ConnRegistry {
|
||||
}
|
||||
|
||||
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.writers.remove(&writer_id);
|
||||
inner.last_meta_for_writer.remove(&writer_id);
|
||||
inner.writer_idle_since_epoch_secs.remove(&writer_id);
|
||||
let conns = inner
|
||||
.conns_for_writer
|
||||
.remove(&writer_id)
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut close_txs = Vec::<mpsc::Sender<MeResponse>>::new();
|
||||
let mut out = Vec::new();
|
||||
for conn_id in conns {
|
||||
if inner.writer_for_conn.get(&conn_id).copied() != Some(writer_id) {
|
||||
continue;
|
||||
}
|
||||
inner.writer_for_conn.remove(&conn_id);
|
||||
if let Some(m) = inner.meta.get(&conn_id) {
|
||||
out.push(BoundConn {
|
||||
conn_id,
|
||||
meta: m.clone(),
|
||||
});
|
||||
{
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.writers.remove(&writer_id);
|
||||
inner.last_meta_for_writer.remove(&writer_id);
|
||||
inner.writer_idle_since_epoch_secs.remove(&writer_id);
|
||||
let conns = inner
|
||||
.conns_for_writer
|
||||
.remove(&writer_id)
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for conn_id in conns {
|
||||
if inner.writer_for_conn.get(&conn_id).copied() != Some(writer_id) {
|
||||
continue;
|
||||
}
|
||||
inner.writer_for_conn.remove(&conn_id);
|
||||
if let Some(client_tx) = inner.map.remove(&conn_id) {
|
||||
close_txs.push(client_tx);
|
||||
}
|
||||
if let Some(meta) = inner.meta.remove(&conn_id) {
|
||||
out.push(BoundConn { conn_id, meta });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for client_tx in close_txs {
|
||||
let _ = client_tx.try_send(MeResponse::Close);
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
@@ -491,6 +500,7 @@ impl ConnRegistry {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::ConnMeta;
|
||||
use super::ConnRegistry;
|
||||
@@ -663,6 +673,39 @@ mod tests {
|
||||
assert!(registry.is_writer_empty(20).await);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn writer_lost_removes_bound_conn_from_registry_and_signals_close() {
|
||||
let registry = ConnRegistry::new();
|
||||
let (conn_id, mut rx) = registry.register().await;
|
||||
let (writer_tx, _writer_rx) = tokio::sync::mpsc::channel(8);
|
||||
registry.register_writer(10, writer_tx).await;
|
||||
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443);
|
||||
|
||||
assert!(
|
||||
registry
|
||||
.bind_writer(
|
||||
conn_id,
|
||||
10,
|
||||
ConnMeta {
|
||||
target_dc: 2,
|
||||
client_addr: addr,
|
||||
our_addr: addr,
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await
|
||||
);
|
||||
|
||||
let lost = registry.writer_lost(10).await;
|
||||
assert_eq!(lost.len(), 1);
|
||||
assert_eq!(lost[0].conn_id, conn_id);
|
||||
assert!(registry.get_writer(conn_id).await.is_none());
|
||||
assert!(registry.get_meta(conn_id).await.is_none());
|
||||
assert_eq!(registry.unregister(conn_id).await, None);
|
||||
let close = tokio::time::timeout(Duration::from_millis(50), rx.recv()).await;
|
||||
assert!(matches!(close, Ok(Some(MeResponse::Close))));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bind_writer_rejects_unregistered_writer() {
|
||||
let registry = ConnRegistry::new();
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::sync::atomic::Ordering;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use bytes::Bytes;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
@@ -29,6 +30,29 @@ const PICK_PENALTY_DRAINING: u64 = 600;
|
||||
const PICK_PENALTY_STALE: u64 = 300;
|
||||
const PICK_PENALTY_DEGRADED: u64 = 250;
|
||||
|
||||
enum TimedSendError<T> {
|
||||
Closed(T),
|
||||
Timeout(T),
|
||||
}
|
||||
|
||||
async fn send_writer_command_with_timeout(
|
||||
tx: &mpsc::Sender<WriterCommand>,
|
||||
cmd: WriterCommand,
|
||||
timeout: Duration,
|
||||
) -> std::result::Result<(), TimedSendError<WriterCommand>> {
|
||||
if timeout.is_zero() {
|
||||
return tx.send(cmd).await.map_err(|err| TimedSendError::Closed(err.0));
|
||||
}
|
||||
match tokio::time::timeout(timeout, tx.reserve()).await {
|
||||
Ok(Ok(permit)) => {
|
||||
permit.send(cmd);
|
||||
Ok(())
|
||||
}
|
||||
Ok(Err(_)) => Err(TimedSendError::Closed(cmd)),
|
||||
Err(_) => Err(TimedSendError::Timeout(cmd)),
|
||||
}
|
||||
}
|
||||
|
||||
impl MePool {
|
||||
/// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default.
|
||||
pub async fn send_proxy_req(
|
||||
@@ -78,8 +102,18 @@ impl MePool {
|
||||
let mut hybrid_last_recovery_at: Option<Instant> = None;
|
||||
let hybrid_wait_step = self.me_route_no_writer_wait.max(Duration::from_millis(50));
|
||||
let mut hybrid_wait_current = hybrid_wait_step;
|
||||
let hybrid_deadline = Instant::now() + self.me_route_hybrid_max_wait;
|
||||
|
||||
loop {
|
||||
if matches!(no_writer_mode, MeRouteNoWriterMode::HybridAsyncPersistent)
|
||||
&& Instant::now() >= hybrid_deadline
|
||||
{
|
||||
self.stats.increment_me_no_writer_failfast_total();
|
||||
return Err(ProxyError::Proxy(
|
||||
"No ME writer available in hybrid wait window".into(),
|
||||
));
|
||||
}
|
||||
let mut skip_writer_id: Option<u64> = None;
|
||||
let current_meta = self
|
||||
.registry
|
||||
.get_meta(conn_id)
|
||||
@@ -90,12 +124,30 @@ impl MePool {
|
||||
match current.tx.try_send(WriterCommand::Data(current_payload.clone())) {
|
||||
Ok(()) => return Ok(()),
|
||||
Err(TrySendError::Full(cmd)) => {
|
||||
if current.tx.send(cmd).await.is_ok() {
|
||||
return Ok(());
|
||||
match send_writer_command_with_timeout(
|
||||
¤t.tx,
|
||||
cmd,
|
||||
self.me_route_blocking_send_timeout,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => return Ok(()),
|
||||
Err(TimedSendError::Closed(_)) => {
|
||||
warn!(writer_id = current.writer_id, "ME writer channel closed");
|
||||
self.remove_writer_and_close_clients(current.writer_id).await;
|
||||
continue;
|
||||
}
|
||||
Err(TimedSendError::Timeout(_)) => {
|
||||
debug!(
|
||||
conn_id,
|
||||
writer_id = current.writer_id,
|
||||
timeout_ms = self.me_route_blocking_send_timeout.as_millis()
|
||||
as u64,
|
||||
"ME writer send timed out for bound writer, trying reroute"
|
||||
);
|
||||
skip_writer_id = Some(current.writer_id);
|
||||
}
|
||||
}
|
||||
warn!(writer_id = current.writer_id, "ME writer channel closed");
|
||||
self.remove_writer_and_close_clients(current.writer_id).await;
|
||||
continue;
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
warn!(writer_id = current.writer_id, "ME writer channel closed");
|
||||
@@ -200,6 +252,9 @@ impl MePool {
|
||||
.candidate_indices_for_dc(&writers_snapshot, routed_dc, true)
|
||||
.await;
|
||||
}
|
||||
if let Some(skip_writer_id) = skip_writer_id {
|
||||
candidate_indices.retain(|idx| writers_snapshot[*idx].id != skip_writer_id);
|
||||
}
|
||||
if candidate_indices.is_empty() {
|
||||
let pick_mode = self.writer_pick_mode();
|
||||
match no_writer_mode {
|
||||
@@ -422,7 +477,13 @@ impl MePool {
|
||||
self.stats.increment_me_writer_pick_blocking_fallback_total();
|
||||
let effective_our_addr = SocketAddr::new(w.source_ip, our_addr.port());
|
||||
let (payload, meta) = build_routed_payload(effective_our_addr);
|
||||
match w.tx.send(WriterCommand::Data(payload.clone())).await {
|
||||
match send_writer_command_with_timeout(
|
||||
&w.tx,
|
||||
WriterCommand::Data(payload.clone()),
|
||||
self.me_route_blocking_send_timeout,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
self.stats
|
||||
.increment_me_writer_pick_success_fallback_total(pick_mode);
|
||||
@@ -439,11 +500,20 @@ impl MePool {
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
Err(_) => {
|
||||
Err(TimedSendError::Closed(_)) => {
|
||||
self.stats.increment_me_writer_pick_closed_total(pick_mode);
|
||||
warn!(writer_id = w.id, "ME writer channel closed (blocking)");
|
||||
self.remove_writer_and_close_clients(w.id).await;
|
||||
}
|
||||
Err(TimedSendError::Timeout(_)) => {
|
||||
self.stats.increment_me_writer_pick_full_total(pick_mode);
|
||||
debug!(
|
||||
conn_id,
|
||||
writer_id = w.id,
|
||||
timeout_ms = self.me_route_blocking_send_timeout.as_millis() as u64,
|
||||
"ME writer blocking fallback send timed out"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -573,13 +643,19 @@ impl MePool {
|
||||
let mut p = Vec::with_capacity(12);
|
||||
p.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
|
||||
p.extend_from_slice(&conn_id.to_le_bytes());
|
||||
if w.tx
|
||||
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
debug!("ME close write failed");
|
||||
self.remove_writer_and_close_clients(w.writer_id).await;
|
||||
match w.tx.try_send(WriterCommand::DataAndFlush(Bytes::from(p))) {
|
||||
Ok(()) => {}
|
||||
Err(TrySendError::Full(_)) => {
|
||||
debug!(
|
||||
conn_id,
|
||||
writer_id = w.writer_id,
|
||||
"ME close skipped: writer command channel is full"
|
||||
);
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
debug!("ME close write failed");
|
||||
self.remove_writer_and_close_clients(w.writer_id).await;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
debug!(conn_id, "ME close skipped (writer missing)");
|
||||
@@ -596,8 +672,12 @@ impl MePool {
|
||||
p.extend_from_slice(&conn_id.to_le_bytes());
|
||||
match w.tx.try_send(WriterCommand::DataAndFlush(Bytes::from(p))) {
|
||||
Ok(()) => {}
|
||||
Err(TrySendError::Full(cmd)) => {
|
||||
let _ = tokio::time::timeout(Duration::from_millis(50), w.tx.send(cmd)).await;
|
||||
Err(TrySendError::Full(_)) => {
|
||||
debug!(
|
||||
conn_id,
|
||||
writer_id = w.writer_id,
|
||||
"ME close_conn skipped: writer command channel is full"
|
||||
);
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
debug!(conn_id, "ME close_conn skipped: writer channel closed");
|
||||
|
||||
Reference in New Issue
Block a user