ME: Bound writer queue waits under backpressure

This commit is contained in:
Alexey
2026-05-25 00:28:29 +03:00
parent dc8951eae8
commit d4adf0ef9a
9 changed files with 300 additions and 84 deletions
+57 -16
View File
@@ -8,6 +8,7 @@ use std::time::{Duration, Instant};
use bytes::BytesMut;
use rand::RngExt;
use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError;
use tokio_util::sync::CancellationToken;
use tracing::{debug, info, warn};
@@ -26,6 +27,7 @@ const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
const ME_PING_TRACKER_CLEANUP_EVERY: u32 = 32;
const ME_SERVICE_SIGNAL_SEND_TIMEOUT_MS: u64 = 50;
#[derive(Clone, Copy)]
enum WriterTeardownMode {
@@ -45,6 +47,11 @@ enum WriterLifecycleExit {
Cancelled,
}
enum ServiceWriterCommandSendError {
Closed,
TimedOut,
}
async fn writer_command_loop(
mut rx: mpsc::Receiver<WriterCommand>,
mut rpc_writer: RpcWriter,
@@ -75,6 +82,27 @@ async fn writer_command_loop(
}
}
async fn send_service_writer_command(
tx: &mpsc::Sender<WriterCommand>,
cmd: WriterCommand,
) -> std::result::Result<(), ServiceWriterCommandSendError> {
match tx.try_send(cmd) {
Ok(()) => Ok(()),
Err(TrySendError::Closed(_)) => Err(ServiceWriterCommandSendError::Closed),
Err(TrySendError::Full(cmd)) => {
let wait = Duration::from_millis(ME_SERVICE_SIGNAL_SEND_TIMEOUT_MS);
match tokio::time::timeout(wait, tx.reserve()).await {
Ok(Ok(permit)) => {
permit.send(cmd);
Ok(())
}
Ok(Err(_)) => Err(ServiceWriterCommandSendError::Closed),
Err(_) => Err(ServiceWriterCommandSendError::TimedOut),
}
}
}
}
#[allow(clippy::too_many_arguments)]
async fn ping_loop(
pool_ping: std::sync::Weak<MePool>,
@@ -154,14 +182,24 @@ async fn ping_loop(
}
ping_id = ping_id.wrapping_add(1);
stats_ping.increment_me_keepalive_sent();
if tx_ping
.send(WriterCommand::ControlAndFlush(payload))
.await
.is_err()
if let Err(error) =
send_service_writer_command(&tx_ping, WriterCommand::ControlAndFlush(payload)).await
{
{
let mut tracker = ping_tracker_ping.lock().await;
tracker.remove(&sent_id);
}
stats_ping.increment_me_keepalive_failed();
debug!("ME ping failed, removing dead writer");
return;
match error {
ServiceWriterCommandSendError::Closed => {
debug!("ME ping failed, removing dead writer");
return;
}
ServiceWriterCommandSendError::TimedOut => {
debug!("ME ping skipped: writer command channel is full");
continue;
}
}
}
}
}
@@ -238,14 +276,15 @@ async fn rpc_proxy_req_signal_loop(
meta.proto_flags,
);
if tx_signal
.send(WriterCommand::DataAndFlush(payload))
.await
.is_err()
if let Err(error) =
send_service_writer_command(&tx_signal, WriterCommand::DataAndFlush(payload)).await
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
return;
match error {
ServiceWriterCommandSendError::Closed => return,
ServiceWriterCommandSendError::TimedOut => continue,
}
}
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
@@ -263,14 +302,16 @@ async fn rpc_proxy_req_signal_loop(
let close_payload = build_control_payload(RPC_CLOSE_EXT_U32, conn_id);
if tx_signal
.send(WriterCommand::ControlAndFlush(close_payload))
.await
.is_err()
if let Err(error) =
send_service_writer_command(&tx_signal, WriterCommand::ControlAndFlush(close_payload))
.await
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
return;
match error {
ServiceWriterCommandSendError::Closed => return,
ServiceWriterCommandSendError::TimedOut => continue,
}
}
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
+13 -8
View File
@@ -14,7 +14,9 @@ use super::{
impl ConnRegistry {
fn set_writer_bound_count(&self, writer_id: u64, count: usize) {
self.binding.bound_clients_by_writer.insert(writer_id, count);
self.binding
.bound_clients_by_writer
.insert(writer_id, count);
if count == 0 {
self.binding
.writer_idle_since_epoch_secs
@@ -38,8 +40,11 @@ impl ConnRegistry {
return;
}
let remove =
if let Some(mut count) = self.binding.active_sessions_by_target_dc.get_mut(&target_dc) {
let remove = if let Some(mut count) = self
.binding
.active_sessions_by_target_dc
.get_mut(&target_dc)
{
let decrement = delta.unsigned_abs();
*count = count.saturating_sub(decrement);
*count == 0
@@ -57,7 +62,10 @@ impl ConnRegistry {
.conns_for_writer
.entry(writer_id)
.or_insert_with(HashSet::new);
self.binding.bound_clients_by_writer.entry(writer_id).or_insert(0);
self.binding
.bound_clients_by_writer
.entry(writer_id)
.or_insert(0);
self.binding
.writer_idle_since_epoch_secs
.entry(writer_id)
@@ -474,10 +482,7 @@ impl ConnRegistry {
self.hot_binding.map.remove(&conn_id);
}
if let Some(m) = meta {
out.push(BoundConn {
conn_id,
meta: m,
});
out.push(BoundConn { conn_id, meta: m });
}
}
out
+80 -32
View File
@@ -6,6 +6,7 @@ use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::time::{Duration, Instant};
use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError;
use tracing::{debug, warn};
@@ -33,6 +34,11 @@ mod close;
mod recovery;
mod selection;
enum WriterCommandReserveError {
Closed,
TimedOut,
}
fn proxy_tag_array(tag: Option<&[u8]>) -> Option<[u8; 16]> {
tag.and_then(|tag| <[u8; 16]>::try_from(tag).ok())
}
@@ -44,6 +50,21 @@ fn proxy_req_payload_from_command(cmd: WriterCommand) -> Option<PooledBuffer> {
}
}
async fn reserve_writer_command_slot(
tx: &mpsc::Sender<WriterCommand>,
wait: Option<Duration>,
) -> std::result::Result<mpsc::OwnedPermit<WriterCommand>, WriterCommandReserveError> {
let reserve = tx.clone().reserve_owned();
match wait {
Some(wait) => match tokio::time::timeout(wait, reserve).await {
Ok(Ok(permit)) => Ok(permit),
Ok(Err(_)) => Err(WriterCommandReserveError::Closed),
Err(_) => Err(WriterCommandReserveError::TimedOut),
},
None => reserve.await.map_err(|_| WriterCommandReserveError::Closed),
}
}
impl MePool {
/// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default.
pub async fn send_proxy_req(
@@ -104,9 +125,25 @@ impl MePool {
return Ok(());
}
Err(TrySendError::Full(cmd)) => {
if current.tx.send(cmd).await.is_ok() {
self.note_hybrid_route_success();
return Ok(());
match reserve_writer_command_slot(
&current.tx,
self.route_runtime.me_route_blocking_send_timeout,
)
.await
{
Ok(permit) => {
permit.send(cmd);
self.note_hybrid_route_success();
return Ok(());
}
Err(WriterCommandReserveError::TimedOut) => {
self.stats
.increment_me_writer_pick_full_total(self.writer_pick_mode());
return Err(ProxyError::Proxy(
"ME writer channel full within blocking send timeout".into(),
));
}
Err(WriterCommandReserveError::Closed) => {}
}
warn!(writer_id = current.writer_id, "ME writer channel closed");
self.remove_writer_and_close_clients(current.writer_id)
@@ -156,11 +193,7 @@ impl MePool {
for (dc, addrs) in preferred.iter() {
for addr in addrs {
let _ = self
.connect_one_for_dc(
*addr,
*dc,
self.rng.as_ref(),
)
.connect_one_for_dc(*addr, *dc, self.rng.as_ref())
.await;
}
}
@@ -558,33 +591,48 @@ impl MePool {
self.note_hybrid_route_success();
return Ok(());
}
Err(TrySendError::Full(cmd)) => match current.tx.send(cmd).await {
Ok(()) => {
self.note_hybrid_route_success();
return Ok(());
}
Err(send_err) => {
let Some(payload) = proxy_req_payload_from_command(send_err.0) else {
Err(TrySendError::Full(cmd)) => {
match reserve_writer_command_slot(
&current.tx,
self.route_runtime.me_route_blocking_send_timeout,
)
.await
{
Ok(permit) => {
permit.send(cmd);
self.note_hybrid_route_success();
return Ok(());
}
Err(WriterCommandReserveError::TimedOut) => {
self.stats
.increment_me_writer_pick_full_total(self.writer_pick_mode());
return Err(ProxyError::Proxy(
"ME writer rejected unexpected command type".into(),
"ME writer channel full within blocking send timeout".into(),
));
};
warn!(writer_id = current.writer_id, "ME writer channel closed");
self.remove_writer_and_close_clients(current.writer_id)
.await;
return self
.send_proxy_req(
conn_id,
target_dc,
client_addr,
our_addr,
payload.as_ref(),
proto_flags,
tag.as_ref().map(|tag| tag.as_slice()),
)
.await;
}
Err(WriterCommandReserveError::Closed) => {
let Some(payload) = proxy_req_payload_from_command(cmd) else {
return Err(ProxyError::Proxy(
"ME writer rejected unexpected command type".into(),
));
};
warn!(writer_id = current.writer_id, "ME writer channel closed");
self.remove_writer_and_close_clients(current.writer_id)
.await;
return self
.send_proxy_req(
conn_id,
target_dc,
client_addr,
our_addr,
payload.as_ref(),
proto_flags,
tag.as_ref().map(|tag| tag.as_slice()),
)
.await;
}
}
},
}
Err(TrySendError::Closed(cmd)) => {
let Some(payload) = proxy_req_payload_from_command(cmd) else {
return Err(ProxyError::Proxy(
+38 -8
View File
@@ -10,18 +10,43 @@ use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32};
use super::super::MePool;
use super::super::codec::{WriterCommand, build_control_payload};
use super::{WriterCommandReserveError, reserve_writer_command_slot};
const ME_CLOSE_SIGNAL_SEND_TIMEOUT: Duration = Duration::from_millis(50);
impl MePool {
/// Sends an extended close signal for a client-bound ME connection.
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await {
let payload = build_control_payload(RPC_CLOSE_EXT_U32, conn_id);
if w.tx
.send(WriterCommand::ControlAndFlush(payload))
.await
.is_err()
{
debug!("ME close write failed");
self.remove_writer_and_close_clients(w.writer_id).await;
match w.tx.try_send(WriterCommand::ControlAndFlush(payload)) {
Ok(()) => {}
Err(TrySendError::Full(cmd)) => {
match reserve_writer_command_slot(&w.tx, Some(ME_CLOSE_SIGNAL_SEND_TIMEOUT))
.await
{
Ok(permit) => {
permit.send(cmd);
}
Err(WriterCommandReserveError::TimedOut) => {
debug!(conn_id, "ME close skipped: writer command channel is full");
}
Err(WriterCommandReserveError::Closed) => {
debug!(
conn_id,
"ME close skipped: writer command channel is closed"
);
self.remove_writer_and_close_clients(w.writer_id).await;
}
}
}
Err(TrySendError::Closed(_)) => {
debug!(
conn_id,
"ME close skipped: writer command channel is closed"
);
self.remove_writer_and_close_clients(w.writer_id).await;
}
}
} else {
debug!(conn_id, "ME close skipped (writer missing)");
@@ -31,13 +56,16 @@ impl MePool {
Ok(())
}
/// Sends the compact close signal used by ME-side forced connection teardown.
pub async fn send_close_conn(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await {
let payload = build_control_payload(RPC_CLOSE_CONN_U32, conn_id);
match w.tx.try_send(WriterCommand::ControlAndFlush(payload)) {
Ok(()) => {}
Err(TrySendError::Full(cmd)) => {
let _ = tokio::time::timeout(Duration::from_millis(50), w.tx.send(cmd)).await;
let _ = reserve_writer_command_slot(&w.tx, Some(ME_CLOSE_SIGNAL_SEND_TIMEOUT))
.await
.map(|permit| permit.send(cmd));
}
Err(TrySendError::Closed(_)) => {
debug!(conn_id, "ME close_conn skipped: writer channel closed");
@@ -51,6 +79,7 @@ impl MePool {
Ok(())
}
/// Sends close signals for all currently registered ME-bound connections during shutdown.
pub async fn shutdown_send_close_conn_all(self: &Arc<Self>) -> usize {
let conn_ids = self.registry.active_conn_ids().await;
let total = conn_ids.len();
@@ -60,6 +89,7 @@ impl MePool {
total
}
/// Returns the current number of active ME writers tracked by the pool.
pub fn connection_count(&self) -> usize {
self.conn_count.load(Ordering::Relaxed)
}