Compare commits

..

4 Commits
3.4.12 ... flow

Author SHA1 Message Date
Alexey
d4adf0ef9a ME: Bound writer queue waits under backpressure 2026-05-25 00:28:29 +03:00
Alexey
dc8951eae8 Reduce MR + ME Routing hot-path contention 2026-05-22 20:19:09 +03:00
Alexey
77a7f89075 Reuse ME reader scratch buffer across read loop iterations 2026-05-22 19:56:38 +03:00
Alexey
9abaf9006c Prioritize Cancellation in MP select paths 2026-05-22 16:47:54 +03:00
16 changed files with 594 additions and 360 deletions

View File

@@ -1,6 +1,7 @@
use http_body_util::{BodyExt, Full}; use http_body_util::{BodyExt, Full};
use hyper::StatusCode; use hyper::StatusCode;
use hyper::body::{Bytes, Incoming}; use hyper::body::{Bytes, Incoming};
use hyper::header::ALLOW;
use serde::Serialize; use serde::Serialize;
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
@@ -25,6 +26,8 @@ pub(super) fn success_response<T: Serialize>(
} }
pub(super) fn error_response(request_id: u64, failure: ApiFailure) -> hyper::Response<Full<Bytes>> { pub(super) fn error_response(request_id: u64, failure: ApiFailure) -> hyper::Response<Full<Bytes>> {
let status = failure.status;
let allow = failure.allow;
let payload = ErrorResponse { let payload = ErrorResponse {
ok: false, ok: false,
error: ErrorBody { error: ErrorBody {
@@ -40,11 +43,13 @@ pub(super) fn error_response(request_id: u64, failure: ApiFailure) -> hyper::Res
) )
.into_bytes() .into_bytes()
}); });
hyper::Response::builder() let mut builder = hyper::Response::builder()
.status(failure.status) .status(status)
.header("content-type", "application/json; charset=utf-8") .header("content-type", "application/json; charset=utf-8");
.body(Full::new(Bytes::from(body))) if let Some(allow) = allow {
.unwrap() builder = builder.header(ALLOW, allow);
}
builder.body(Full::new(Bytes::from(body))).unwrap()
} }
pub(super) async fn read_json<T: DeserializeOwned>( pub(super) async fn read_json<T: DeserializeOwned>(

View File

@@ -41,7 +41,9 @@ mod runtime_watch;
mod runtime_zero; mod runtime_zero;
mod users; mod users;
use config_store::{current_revision, load_config_from_disk, parse_if_match}; use config_store::{
current_revision, ensure_expected_revision, load_config_from_disk, parse_if_match,
};
use events::ApiEventStore; use events::ApiEventStore;
use http_utils::{error_response, read_json, read_optional_json, success_response}; use http_utils::{error_response, read_json, read_optional_json, success_response};
use model::{ use model::{
@@ -75,6 +77,10 @@ use users::{
const API_MAX_CONTROL_CONNECTIONS: usize = 1024; const API_MAX_CONTROL_CONNECTIONS: usize = 1024;
const API_HTTP_CONNECTION_TIMEOUT: Duration = Duration::from_secs(15); const API_HTTP_CONNECTION_TIMEOUT: Duration = Duration::from_secs(15);
const ROUTE_USERNAME_ERROR: &str = "username must match [A-Za-z0-9_.-] and be 1..64 chars"; const ROUTE_USERNAME_ERROR: &str = "username must match [A-Za-z0-9_.-] and be 1..64 chars";
const ALLOW_GET: &str = "GET";
const ALLOW_POST: &str = "POST";
const ALLOW_GET_POST: &str = "GET, POST";
const ALLOW_GET_PATCH_DELETE: &str = "GET, PATCH, DELETE";
pub(super) struct ApiRuntimeState { pub(super) struct ApiRuntimeState {
pub(super) process_started_at_epoch_secs: u64, pub(super) process_started_at_epoch_secs: u64,
@@ -125,6 +131,57 @@ fn parse_route_username(user: &str) -> Result<&str, ApiFailure> {
} }
} }
fn user_action_route_matches(path: &str, suffix: &str) -> bool {
path.strip_prefix("/v1/users/")
.and_then(|path| path.strip_suffix(suffix))
.map(|user| !user.is_empty() && !user.contains('/'))
.unwrap_or(false)
}
fn allowed_methods_for_path(path: &str) -> Option<&'static str> {
match path {
"/v1/health"
| "/v1/health/ready"
| "/v1/system/info"
| "/v1/runtime/gates"
| "/v1/runtime/initialization"
| "/v1/limits/effective"
| "/v1/security/posture"
| "/v1/security/whitelist"
| "/v1/stats/summary"
| "/v1/stats/zero/all"
| "/v1/stats/upstreams"
| "/v1/stats/minimal/all"
| "/v1/stats/me-writers"
| "/v1/stats/dcs"
| "/v1/runtime/me-pool-state"
| "/v1/runtime/me_pool_state"
| "/v1/runtime/me-quality"
| "/v1/runtime/me_quality"
| "/v1/runtime/upstream-quality"
| "/v1/runtime/upstream_quality"
| "/v1/runtime/nat-stun"
| "/v1/runtime/nat_stun"
| "/v1/runtime/me-selftest"
| "/v1/runtime/connections/summary"
| "/v1/runtime/events/recent"
| "/v1/stats/users/active-ips"
| "/v1/stats/users/quota"
| "/v1/stats/users" => Some(ALLOW_GET),
"/v1/users" => Some(ALLOW_GET_POST),
_ if user_action_route_matches(path, "/reset-quota") => Some(ALLOW_POST),
_ if user_action_route_matches(path, "/rotate-secret") => Some(ALLOW_POST),
_ if path
.strip_prefix("/v1/users/")
.map(|user| !user.is_empty() && !user.contains('/'))
.unwrap_or(false) =>
{
Some(ALLOW_GET_PATCH_DELETE)
}
_ => None,
}
}
pub async fn serve( pub async fn serve(
listen: SocketAddr, listen: SocketAddr,
stats: Arc<Stats>, stats: Arc<Stats>,
@@ -435,22 +492,22 @@ async fn handle(
let data = build_dcs_data(shared.as_ref(), api_cfg).await; let data = build_dcs_data(shared.as_ref(), api_cfg).await;
Ok(success_response(StatusCode::OK, data, revision)) Ok(success_response(StatusCode::OK, data, revision))
} }
("GET", "/v1/runtime/me_pool_state") => { ("GET", "/v1/runtime/me-pool-state") | ("GET", "/v1/runtime/me_pool_state") => {
let revision = current_revision(&shared.config_path).await?; let revision = current_revision(&shared.config_path).await?;
let data = build_runtime_me_pool_state_data(shared.as_ref()).await; let data = build_runtime_me_pool_state_data(shared.as_ref()).await;
Ok(success_response(StatusCode::OK, data, revision)) Ok(success_response(StatusCode::OK, data, revision))
} }
("GET", "/v1/runtime/me_quality") => { ("GET", "/v1/runtime/me-quality") | ("GET", "/v1/runtime/me_quality") => {
let revision = current_revision(&shared.config_path).await?; let revision = current_revision(&shared.config_path).await?;
let data = build_runtime_me_quality_data(shared.as_ref()).await; let data = build_runtime_me_quality_data(shared.as_ref()).await;
Ok(success_response(StatusCode::OK, data, revision)) Ok(success_response(StatusCode::OK, data, revision))
} }
("GET", "/v1/runtime/upstream_quality") => { ("GET", "/v1/runtime/upstream-quality") | ("GET", "/v1/runtime/upstream_quality") => {
let revision = current_revision(&shared.config_path).await?; let revision = current_revision(&shared.config_path).await?;
let data = build_runtime_upstream_quality_data(shared.as_ref()).await; let data = build_runtime_upstream_quality_data(shared.as_ref()).await;
Ok(success_response(StatusCode::OK, data, revision)) Ok(success_response(StatusCode::OK, data, revision))
} }
("GET", "/v1/runtime/nat_stun") => { ("GET", "/v1/runtime/nat-stun") | ("GET", "/v1/runtime/nat_stun") => {
let revision = current_revision(&shared.config_path).await?; let revision = current_revision(&shared.config_path).await?;
let data = build_runtime_nat_stun_data(shared.as_ref()).await; let data = build_runtime_nat_stun_data(shared.as_ref()).await;
Ok(success_response(StatusCode::OK, data, revision)) Ok(success_response(StatusCode::OK, data, revision))
@@ -506,7 +563,7 @@ async fn handle(
.await; .await;
Ok(success_response(StatusCode::OK, users, revision)) Ok(success_response(StatusCode::OK, users, revision))
} }
("GET", "/v1/users/quota") => { ("GET", "/v1/stats/users/quota") => {
let revision = current_revision(&shared.config_path).await?; let revision = current_revision(&shared.config_path).await?;
let disk_cfg = load_config_from_disk(&shared.config_path).await?; let disk_cfg = load_config_from_disk(&shared.config_path).await?;
let data = build_user_quota_list(&disk_cfg, shared.stats.as_ref()); let data = build_user_quota_list(&disk_cfg, shared.stats.as_ref());
@@ -567,6 +624,16 @@ async fn handle(
), ),
)); ));
} }
let expected_revision = parse_if_match(req.headers());
let disk_cfg = load_config_from_disk(&shared.config_path).await?;
ensure_expected_revision(&shared.config_path, expected_revision.as_deref())
.await?;
if !disk_cfg.access.users.contains_key(user) {
return Ok(error_response(
request_id,
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "User not found"),
));
}
let snapshot = match crate::quota_state::reset_user_quota( let snapshot = match crate::quota_state::reset_user_quota(
&shared.quota_state_path, &shared.quota_state_path,
shared.stats.as_ref(), shared.stats.as_ref(),
@@ -761,16 +828,18 @@ async fn handle(
if method == Method::POST { if method == Method::POST {
return Ok(error_response( return Ok(error_response(
request_id, request_id,
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "Route not found"), ApiFailure::method_not_allowed(ALLOW_GET_PATCH_DELETE),
)); ));
} }
return Ok(error_response( return Ok(error_response(
request_id, request_id,
ApiFailure::new( ApiFailure::method_not_allowed(ALLOW_GET_PATCH_DELETE),
StatusCode::METHOD_NOT_ALLOWED, ));
"method_not_allowed", }
"Unsupported HTTP method for this route", if let Some(allow) = allowed_methods_for_path(normalized_path) {
), return Ok(error_response(
request_id,
ApiFailure::method_not_allowed(allow),
)); ));
} }
debug!( debug!(

View File

@@ -15,6 +15,7 @@ pub(super) struct ApiFailure {
pub(super) status: StatusCode, pub(super) status: StatusCode,
pub(super) code: &'static str, pub(super) code: &'static str,
pub(super) message: String, pub(super) message: String,
pub(super) allow: Option<&'static str>,
} }
impl ApiFailure { impl ApiFailure {
@@ -23,6 +24,7 @@ impl ApiFailure {
status, status,
code, code,
message: message.into(), message: message.into(),
allow: None,
} }
} }
@@ -33,6 +35,15 @@ impl ApiFailure {
pub(super) fn bad_request(message: impl Into<String>) -> Self { pub(super) fn bad_request(message: impl Into<String>) -> Self {
Self::new(StatusCode::BAD_REQUEST, "bad_request", message) Self::new(StatusCode::BAD_REQUEST, "bad_request", message)
} }
pub(super) fn method_not_allowed(allow: &'static str) -> Self {
Self {
status: StatusCode::METHOD_NOT_ALLOWED,
code: "method_not_allowed",
message: "Unsupported HTTP method for this route".to_string(),
allow: Some(allow),
}
}
} }
#[derive(Serialize)] #[derive(Serialize)]

View File

@@ -1,6 +1,6 @@
use std::collections::BTreeSet;
#[cfg(test)] #[cfg(test)]
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::collections::{BTreeSet, HashMap};
#[cfg(test)] #[cfg(test)]
use std::future::Future; use std::future::Future;
#[cfg(test)] #[cfg(test)]

View File

@@ -1,4 +1,5 @@
use super::*; use super::*;
use dashmap::DashMap;
mod read; mod read;
@@ -10,10 +11,10 @@ pub(crate) use self::read::{
#[derive(Default)] #[derive(Default)]
pub(crate) struct RelayIdleCandidateRegistry { pub(crate) struct RelayIdleCandidateRegistry {
pub(in crate::proxy::middle_relay) by_conn_id: HashMap<u64, RelayIdleCandidateMeta>, pub(in crate::proxy::middle_relay) by_conn_id: DashMap<u64, RelayIdleCandidateMeta>,
pub(in crate::proxy::middle_relay) ordered: BTreeSet<(u64, u64)>, pub(in crate::proxy::middle_relay) ordered: parking_lot::Mutex<BTreeSet<(u64, u64)>>,
pressure_event_seq: u64, pressure_event_seq: AtomicU64,
pressure_consumed_seq: u64, pressure_consumed_seq: AtomicU64,
} }
/// Queue metadata used to preserve FIFO ordering for idle relay eviction. /// Queue metadata used to preserve FIFO ordering for idle relay eviction.
@@ -23,25 +24,10 @@ pub(in crate::proxy::middle_relay) struct RelayIdleCandidateMeta {
pub(in crate::proxy::middle_relay) mark_pressure_seq: u64, pub(in crate::proxy::middle_relay) mark_pressure_seq: u64,
} }
pub(super) fn relay_idle_candidate_registry_lock_in(
shared: &ProxySharedState,
) -> std::sync::MutexGuard<'_, RelayIdleCandidateRegistry> {
let registry = &shared.middle_relay.relay_idle_registry;
match registry.lock() {
Ok(guard) => guard,
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = RelayIdleCandidateRegistry::default();
registry.clear_poison();
guard
}
}
}
pub(super) fn mark_relay_idle_candidate_in(shared: &ProxySharedState, conn_id: u64) -> bool { pub(super) fn mark_relay_idle_candidate_in(shared: &ProxySharedState, conn_id: u64) -> bool {
let mut guard = relay_idle_candidate_registry_lock_in(shared); let registry = &shared.middle_relay.relay_idle_registry;
if guard.by_conn_id.contains_key(&conn_id) { if registry.by_conn_id.contains_key(&conn_id) {
return false; return false;
} }
@@ -52,24 +38,38 @@ pub(super) fn mark_relay_idle_candidate_in(shared: &ProxySharedState, conn_id: u
.saturating_add(1); .saturating_add(1);
let meta = RelayIdleCandidateMeta { let meta = RelayIdleCandidateMeta {
mark_order_seq, mark_order_seq,
mark_pressure_seq: guard.pressure_event_seq, mark_pressure_seq: registry.pressure_event_seq.load(Ordering::Relaxed),
}; };
guard.by_conn_id.insert(conn_id, meta); match registry.by_conn_id.entry(conn_id) {
guard.ordered.insert((meta.mark_order_seq, conn_id)); dashmap::mapref::entry::Entry::Occupied(_) => false,
dashmap::mapref::entry::Entry::Vacant(entry) => {
entry.insert(meta);
registry
.ordered
.lock()
.insert((meta.mark_order_seq, conn_id));
true true
} }
}
}
pub(super) fn clear_relay_idle_candidate_in(shared: &ProxySharedState, conn_id: u64) { pub(super) fn clear_relay_idle_candidate_in(shared: &ProxySharedState, conn_id: u64) {
let mut guard = relay_idle_candidate_registry_lock_in(shared); let registry = &shared.middle_relay.relay_idle_registry;
if let Some(meta) = guard.by_conn_id.remove(&conn_id) { if let Some((_, meta)) = registry.by_conn_id.remove(&conn_id) {
guard.ordered.remove(&(meta.mark_order_seq, conn_id)); registry
.ordered
.lock()
.remove(&(meta.mark_order_seq, conn_id));
} }
} }
pub(super) fn note_relay_pressure_event_in(shared: &ProxySharedState) { pub(super) fn note_relay_pressure_event_in(shared: &ProxySharedState) {
let mut guard = relay_idle_candidate_registry_lock_in(shared); shared
guard.pressure_event_seq = guard.pressure_event_seq.wrapping_add(1); .middle_relay
.relay_idle_registry
.pressure_event_seq
.fetch_add(1, Ordering::Relaxed);
} }
pub(crate) fn note_global_relay_pressure(shared: &ProxySharedState) { pub(crate) fn note_global_relay_pressure(shared: &ProxySharedState) {
@@ -77,8 +77,11 @@ pub(crate) fn note_global_relay_pressure(shared: &ProxySharedState) {
} }
pub(super) fn relay_pressure_event_seq_in(shared: &ProxySharedState) -> u64 { pub(super) fn relay_pressure_event_seq_in(shared: &ProxySharedState) -> u64 {
let guard = relay_idle_candidate_registry_lock_in(shared); shared
guard.pressure_event_seq .middle_relay
.relay_idle_registry
.pressure_event_seq
.load(Ordering::Relaxed)
} }
pub(super) fn maybe_evict_idle_candidate_on_pressure_in( pub(super) fn maybe_evict_idle_candidate_on_pressure_in(
@@ -87,33 +90,47 @@ pub(super) fn maybe_evict_idle_candidate_on_pressure_in(
seen_pressure_seq: &mut u64, seen_pressure_seq: &mut u64,
stats: &Stats, stats: &Stats,
) -> bool { ) -> bool {
let mut guard = relay_idle_candidate_registry_lock_in(shared); let registry = &shared.middle_relay.relay_idle_registry;
let latest_pressure_seq = guard.pressure_event_seq; let latest_pressure_seq = registry.pressure_event_seq.load(Ordering::Relaxed);
if latest_pressure_seq == *seen_pressure_seq { if latest_pressure_seq == *seen_pressure_seq {
return false; return false;
} }
*seen_pressure_seq = latest_pressure_seq; *seen_pressure_seq = latest_pressure_seq;
if latest_pressure_seq == guard.pressure_consumed_seq { if latest_pressure_seq == registry.pressure_consumed_seq.load(Ordering::Relaxed) {
return false; return false;
} }
if guard.ordered.is_empty() { let oldest = {
guard.pressure_consumed_seq = latest_pressure_seq; let mut ordered = registry.ordered.lock();
loop {
let Some((mark_order_seq, candidate_conn_id)) = ordered.iter().next().copied() else {
registry
.pressure_consumed_seq
.store(latest_pressure_seq, Ordering::Relaxed);
return false; return false;
};
let Some(candidate_meta) = registry.by_conn_id.get(&candidate_conn_id) else {
ordered.remove(&(mark_order_seq, candidate_conn_id));
continue;
};
if candidate_meta.mark_order_seq != mark_order_seq {
ordered.remove(&(mark_order_seq, candidate_conn_id));
continue;
} }
break Some(candidate_conn_id);
let oldest = guard }
.ordered };
.iter()
.next()
.map(|(_, candidate_conn_id)| *candidate_conn_id);
if oldest != Some(conn_id) { if oldest != Some(conn_id) {
return false; return false;
} }
let Some(candidate_meta) = guard.by_conn_id.get(&conn_id).copied() else { let Some(candidate_meta) = registry
.by_conn_id
.get(&conn_id)
.map(|entry| *entry.value())
else {
return false; return false;
}; };
@@ -121,10 +138,15 @@ pub(super) fn maybe_evict_idle_candidate_on_pressure_in(
return false; return false;
} }
if let Some(meta) = guard.by_conn_id.remove(&conn_id) { if let Some((_, meta)) = registry.by_conn_id.remove(&conn_id) {
guard.ordered.remove(&(meta.mark_order_seq, conn_id)); registry
.ordered
.lock()
.remove(&(meta.mark_order_seq, conn_id));
} }
guard.pressure_consumed_seq = latest_pressure_seq; registry
.pressure_consumed_seq
.store(latest_pressure_seq, Ordering::Relaxed);
stats.increment_relay_pressure_evict_total(); stats.increment_relay_pressure_evict_total();
true true
} }
@@ -220,72 +242,32 @@ pub(crate) fn mark_relay_idle_candidate_for_testing(
shared: &ProxySharedState, shared: &ProxySharedState,
conn_id: u64, conn_id: u64,
) -> bool { ) -> bool {
let registry = &shared.middle_relay.relay_idle_registry; mark_relay_idle_candidate_in(shared, conn_id)
let mut guard = match registry.lock() {
Ok(guard) => guard,
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = RelayIdleCandidateRegistry::default();
registry.clear_poison();
guard
}
};
if guard.by_conn_id.contains_key(&conn_id) {
return false;
}
let mark_order_seq = shared
.middle_relay
.relay_idle_mark_seq
.fetch_add(1, Ordering::Relaxed);
let mark_pressure_seq = guard.pressure_event_seq;
let meta = RelayIdleCandidateMeta {
mark_order_seq,
mark_pressure_seq,
};
guard.by_conn_id.insert(conn_id, meta);
guard.ordered.insert((mark_order_seq, conn_id));
true
} }
#[cfg(test)] #[cfg(test)]
pub(crate) fn oldest_relay_idle_candidate_for_testing(shared: &ProxySharedState) -> Option<u64> { pub(crate) fn oldest_relay_idle_candidate_for_testing(shared: &ProxySharedState) -> Option<u64> {
let registry = &shared.middle_relay.relay_idle_registry; let registry = &shared.middle_relay.relay_idle_registry;
let guard = match registry.lock() { registry
Ok(guard) => guard, .ordered
Err(poisoned) => { .lock()
let mut guard = poisoned.into_inner(); .iter()
*guard = RelayIdleCandidateRegistry::default(); .next()
registry.clear_poison(); .map(|(_, conn_id)| *conn_id)
guard
}
};
guard.ordered.iter().next().map(|(_, conn_id)| *conn_id)
} }
#[cfg(test)] #[cfg(test)]
pub(crate) fn clear_relay_idle_candidate_for_testing(shared: &ProxySharedState, conn_id: u64) { pub(crate) fn clear_relay_idle_candidate_for_testing(shared: &ProxySharedState, conn_id: u64) {
let registry = &shared.middle_relay.relay_idle_registry; clear_relay_idle_candidate_in(shared, conn_id);
let mut guard = match registry.lock() {
Ok(guard) => guard,
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = RelayIdleCandidateRegistry::default();
registry.clear_poison();
guard
}
};
if let Some(meta) = guard.by_conn_id.remove(&conn_id) {
guard.ordered.remove(&(meta.mark_order_seq, conn_id));
}
} }
#[cfg(test)] #[cfg(test)]
pub(crate) fn clear_relay_idle_pressure_state_for_testing_in_shared(shared: &ProxySharedState) { pub(crate) fn clear_relay_idle_pressure_state_for_testing_in_shared(shared: &ProxySharedState) {
if let Ok(mut guard) = shared.middle_relay.relay_idle_registry.lock() { let registry = &shared.middle_relay.relay_idle_registry;
*guard = RelayIdleCandidateRegistry::default(); registry.by_conn_id.clear();
} registry.ordered.lock().clear();
registry.pressure_event_seq.store(0, Ordering::Relaxed);
registry.pressure_consumed_seq.store(0, Ordering::Relaxed);
shared shared
.middle_relay .middle_relay
.relay_idle_mark_seq .relay_idle_mark_seq
@@ -327,15 +309,10 @@ pub(crate) fn set_relay_pressure_state_for_testing(
pressure_consumed_seq: u64, pressure_consumed_seq: u64,
) { ) {
let registry = &shared.middle_relay.relay_idle_registry; let registry = &shared.middle_relay.relay_idle_registry;
let mut guard = match registry.lock() { registry
Ok(guard) => guard, .pressure_event_seq
Err(poisoned) => { .store(pressure_event_seq, Ordering::Relaxed);
let mut guard = poisoned.into_inner(); registry
*guard = RelayIdleCandidateRegistry::default(); .pressure_consumed_seq
registry.clear_poison(); .store(pressure_consumed_seq, Ordering::Relaxed);
guard
}
};
guard.pressure_event_seq = pressure_event_seq;
guard.pressure_consumed_seq = pressure_consumed_seq;
} }

View File

@@ -41,11 +41,12 @@ pub(super) async fn reserve_user_quota_with_yield(
return Err(MiddleQuotaReserveError::DeadlineExceeded); return Err(MiddleQuotaReserveError::DeadlineExceeded);
} }
tokio::select! { tokio::select! {
_ = tokio::time::sleep(Duration::from_millis(backoff_ms)) => {} biased;
_ = cancel.cancelled() => { _ = cancel.cancelled() => {
stats.increment_quota_acquire_cancelled_total(); stats.increment_quota_acquire_cancelled_total();
return Err(MiddleQuotaReserveError::Cancelled); return Err(MiddleQuotaReserveError::Cancelled);
} }
_ = tokio::time::sleep(Duration::from_millis(backoff_ms)) => {}
} }
backoff_rounds = backoff_rounds.saturating_add(1); backoff_rounds = backoff_rounds.saturating_add(1);
if backoff_rounds >= QUOTA_RESERVE_MAX_BACKOFF_ROUNDS { if backoff_rounds >= QUOTA_RESERVE_MAX_BACKOFF_ROUNDS {
@@ -128,11 +129,12 @@ pub(super) async fn wait_for_traffic_budget_or_cancel(
return Err(ProxyError::TrafficBudgetWaitDeadlineExceeded); return Err(ProxyError::TrafficBudgetWaitDeadlineExceeded);
} }
tokio::select! { tokio::select! {
_ = tokio::time::sleep(next_refill_delay()) => {} biased;
_ = cancel.cancelled() => { _ = cancel.cancelled() => {
stats.increment_flow_wait_middle_rate_limit_cancelled_total(); stats.increment_flow_wait_middle_rate_limit_cancelled_total();
return Err(ProxyError::TrafficBudgetWaitCancelled); return Err(ProxyError::TrafficBudgetWaitCancelled);
} }
_ = tokio::time::sleep(next_refill_delay()) => {}
} }
let wait_ms = wait_started_at let wait_ms = wait_started_at
.elapsed() .elapsed()

View File

@@ -59,7 +59,7 @@ pub(crate) struct MiddleRelaySharedState {
pub(crate) desync_hasher: RandomState, pub(crate) desync_hasher: RandomState,
pub(crate) desync_full_cache_last_emit_at: Mutex<Option<Instant>>, pub(crate) desync_full_cache_last_emit_at: Mutex<Option<Instant>>,
pub(crate) desync_dedup_rotation_state: Mutex<DesyncDedupRotationState>, pub(crate) desync_dedup_rotation_state: Mutex<DesyncDedupRotationState>,
pub(crate) relay_idle_registry: Mutex<RelayIdleCandidateRegistry>, pub(crate) relay_idle_registry: RelayIdleCandidateRegistry,
pub(crate) relay_idle_mark_seq: AtomicU64, pub(crate) relay_idle_mark_seq: AtomicU64,
} }
@@ -97,7 +97,7 @@ impl ProxySharedState {
desync_hasher: RandomState::new(), desync_hasher: RandomState::new(),
desync_full_cache_last_emit_at: Mutex::new(None), desync_full_cache_last_emit_at: Mutex::new(None),
desync_dedup_rotation_state: Mutex::new(DesyncDedupRotationState::default()), desync_dedup_rotation_state: Mutex::new(DesyncDedupRotationState::default()),
relay_idle_registry: Mutex::new(RelayIdleCandidateRegistry::default()), relay_idle_registry: RelayIdleCandidateRegistry::default(),
relay_idle_mark_seq: AtomicU64::new(0), relay_idle_mark_seq: AtomicU64::new(0),
}, },
traffic_limiter: TrafficLimiter::new(), traffic_limiter: TrafficLimiter::new(),

View File

@@ -1,33 +1,21 @@
use super::*; use super::*;
use std::panic::{AssertUnwindSafe, catch_unwind};
#[test] #[test]
fn blackhat_registry_poison_recovers_with_fail_closed_reset_and_pressure_accounting() { fn blackhat_registry_stale_order_entry_is_skipped_and_pressure_accounting_continues() {
let shared = ProxySharedState::new(); let shared = ProxySharedState::new();
clear_relay_idle_pressure_state_for_testing_in_shared(shared.as_ref()); clear_relay_idle_pressure_state_for_testing_in_shared(shared.as_ref());
let _ = catch_unwind(AssertUnwindSafe(|| { shared
let mut guard = shared
.middle_relay .middle_relay
.relay_idle_registry .relay_idle_registry
.ordered
.lock() .lock()
.expect("registry lock must be acquired before poison"); .insert((0, 999));
guard.by_conn_id.insert(
999,
RelayIdleCandidateMeta {
mark_order_seq: 1,
mark_pressure_seq: 0,
},
);
guard.ordered.insert((1, 999));
panic!("intentional poison for idle-registry recovery");
}));
// Helper lock must recover from poison, reset stale state, and continue.
assert!(mark_relay_idle_candidate_for_testing(shared.as_ref(), 42)); assert!(mark_relay_idle_candidate_for_testing(shared.as_ref(), 42));
assert_eq!( assert_eq!(
oldest_relay_idle_candidate_for_testing(shared.as_ref()), oldest_relay_idle_candidate_for_testing(shared.as_ref()),
Some(42) Some(999)
); );
let before = relay_pressure_event_seq_for_testing(shared.as_ref()); let before = relay_pressure_event_seq_for_testing(shared.as_ref());
@@ -35,25 +23,43 @@ fn blackhat_registry_poison_recovers_with_fail_closed_reset_and_pressure_account
let after = relay_pressure_event_seq_for_testing(shared.as_ref()); let after = relay_pressure_event_seq_for_testing(shared.as_ref());
assert!( assert!(
after > before, after > before,
"pressure accounting must still advance after poison" "pressure accounting must still advance with stale ordered entries"
);
let mut seen_pressure_seq = before;
assert!(maybe_evict_idle_candidate_on_pressure_for_testing(
shared.as_ref(),
42,
&mut seen_pressure_seq,
&Stats::new()
));
assert_eq!(
oldest_relay_idle_candidate_for_testing(shared.as_ref()),
None
); );
clear_relay_idle_pressure_state_for_testing_in_shared(shared.as_ref()); clear_relay_idle_pressure_state_for_testing_in_shared(shared.as_ref());
} }
#[test] #[test]
fn clear_state_helper_must_reset_poisoned_registry_for_deterministic_fifo_tests() { fn clear_state_helper_must_reset_split_registry_for_deterministic_fifo_tests() {
let shared = ProxySharedState::new(); let shared = ProxySharedState::new();
clear_relay_idle_pressure_state_for_testing_in_shared(shared.as_ref()); clear_relay_idle_pressure_state_for_testing_in_shared(shared.as_ref());
let _ = catch_unwind(AssertUnwindSafe(|| { shared.middle_relay.relay_idle_registry.by_conn_id.insert(
let _guard = shared 999,
RelayIdleCandidateMeta {
mark_order_seq: 1,
mark_pressure_seq: 0,
},
);
shared
.middle_relay .middle_relay
.relay_idle_registry .relay_idle_registry
.ordered
.lock() .lock()
.expect("registry lock must be acquired before poison"); .insert((1, 999));
panic!("intentional poison while lock held"); set_relay_pressure_state_for_testing(shared.as_ref(), 7, 6);
}));
clear_relay_idle_pressure_state_for_testing_in_shared(shared.as_ref()); clear_relay_idle_pressure_state_for_testing_in_shared(shared.as_ref());

View File

@@ -8,6 +8,7 @@ use std::time::{Duration, Instant};
use bytes::BytesMut; use bytes::BytesMut;
use rand::RngExt; use rand::RngExt;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::{debug, info, warn}; use tracing::{debug, info, warn};
@@ -26,6 +27,7 @@ const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5; const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700; const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
const ME_PING_TRACKER_CLEANUP_EVERY: u32 = 32; const ME_PING_TRACKER_CLEANUP_EVERY: u32 = 32;
const ME_SERVICE_SIGNAL_SEND_TIMEOUT_MS: u64 = 50;
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
enum WriterTeardownMode { enum WriterTeardownMode {
@@ -45,6 +47,11 @@ enum WriterLifecycleExit {
Cancelled, Cancelled,
} }
enum ServiceWriterCommandSendError {
Closed,
TimedOut,
}
async fn writer_command_loop( async fn writer_command_loop(
mut rx: mpsc::Receiver<WriterCommand>, mut rx: mpsc::Receiver<WriterCommand>,
mut rpc_writer: RpcWriter, mut rpc_writer: RpcWriter,
@@ -52,6 +59,8 @@ async fn writer_command_loop(
) -> Result<()> { ) -> Result<()> {
loop { loop {
tokio::select! { tokio::select! {
biased;
_ = cancel.cancelled() => return Ok(()),
cmd = rx.recv() => { cmd = rx.recv() => {
match cmd { match cmd {
Some(WriterCommand::Data(payload)) => { Some(WriterCommand::Data(payload)) => {
@@ -69,7 +78,27 @@ async fn writer_command_loop(
Some(WriterCommand::Close) | None => return Ok(()), Some(WriterCommand::Close) | None => return Ok(()),
} }
} }
_ = cancel.cancelled() => return Ok(()), }
}
}
async fn send_service_writer_command(
tx: &mpsc::Sender<WriterCommand>,
cmd: WriterCommand,
) -> std::result::Result<(), ServiceWriterCommandSendError> {
match tx.try_send(cmd) {
Ok(()) => Ok(()),
Err(TrySendError::Closed(_)) => Err(ServiceWriterCommandSendError::Closed),
Err(TrySendError::Full(cmd)) => {
let wait = Duration::from_millis(ME_SERVICE_SIGNAL_SEND_TIMEOUT_MS);
match tokio::time::timeout(wait, tx.reserve()).await {
Ok(Ok(permit)) => {
permit.send(cmd);
Ok(())
}
Ok(Err(_)) => Err(ServiceWriterCommandSendError::Closed),
Err(_) => Err(ServiceWriterCommandSendError::TimedOut),
}
} }
} }
} }
@@ -108,6 +137,7 @@ async fn ping_loop(
Duration::from_secs(wait) Duration::from_secs(wait)
}; };
tokio::select! { tokio::select! {
biased;
_ = cancel_ping_token.cancelled() => return, _ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(startup_jitter) => {} _ = tokio::time::sleep(startup_jitter) => {}
} }
@@ -131,6 +161,7 @@ async fn ping_loop(
Duration::from_secs(secs) Duration::from_secs(secs)
}; };
tokio::select! { tokio::select! {
biased;
_ = cancel_ping_token.cancelled() => return, _ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(wait) => {} _ = tokio::time::sleep(wait) => {}
} }
@@ -151,15 +182,25 @@ async fn ping_loop(
} }
ping_id = ping_id.wrapping_add(1); ping_id = ping_id.wrapping_add(1);
stats_ping.increment_me_keepalive_sent(); stats_ping.increment_me_keepalive_sent();
if tx_ping if let Err(error) =
.send(WriterCommand::ControlAndFlush(payload)) send_service_writer_command(&tx_ping, WriterCommand::ControlAndFlush(payload)).await
.await
.is_err()
{ {
{
let mut tracker = ping_tracker_ping.lock().await;
tracker.remove(&sent_id);
}
stats_ping.increment_me_keepalive_failed(); stats_ping.increment_me_keepalive_failed();
match error {
ServiceWriterCommandSendError::Closed => {
debug!("ME ping failed, removing dead writer"); debug!("ME ping failed, removing dead writer");
return; return;
} }
ServiceWriterCommandSendError::TimedOut => {
debug!("ME ping skipped: writer command channel is full");
continue;
}
}
}
} }
} }
@@ -191,6 +232,7 @@ async fn rpc_proxy_req_signal_loop(
}; };
tokio::select! { tokio::select! {
biased;
_ = cancel_signal.cancelled() => return, _ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {} _ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
} }
@@ -207,6 +249,7 @@ async fn rpc_proxy_req_signal_loop(
}; };
tokio::select! { tokio::select! {
biased;
_ = cancel_signal.cancelled() => return, _ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(wait) => {} _ = tokio::time::sleep(wait) => {}
} }
@@ -233,14 +276,15 @@ async fn rpc_proxy_req_signal_loop(
meta.proto_flags, meta.proto_flags,
); );
if tx_signal if let Err(error) =
.send(WriterCommand::DataAndFlush(payload)) send_service_writer_command(&tx_signal, WriterCommand::DataAndFlush(payload)).await
.await
.is_err()
{ {
stats_signal.increment_me_rpc_proxy_req_signal_failed_total(); stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await; let _ = pool.registry.unregister(conn_id).await;
return; match error {
ServiceWriterCommandSendError::Closed => return,
ServiceWriterCommandSendError::TimedOut => continue,
}
} }
stats_signal.increment_me_rpc_proxy_req_signal_sent_total(); stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
@@ -258,14 +302,16 @@ async fn rpc_proxy_req_signal_loop(
let close_payload = build_control_payload(RPC_CLOSE_EXT_U32, conn_id); let close_payload = build_control_payload(RPC_CLOSE_EXT_U32, conn_id);
if tx_signal if let Err(error) =
.send(WriterCommand::ControlAndFlush(close_payload)) send_service_writer_command(&tx_signal, WriterCommand::ControlAndFlush(close_payload))
.await .await
.is_err()
{ {
stats_signal.increment_me_rpc_proxy_req_signal_failed_total(); stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await; let _ = pool.registry.unregister(conn_id).await;
return; match error {
ServiceWriterCommandSendError::Closed => return,
ServiceWriterCommandSendError::TimedOut => continue,
}
} }
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total(); stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();

View File

@@ -242,6 +242,7 @@ pub(crate) async fn reader_loop(
let mut raw = enc_leftover; let mut raw = enc_leftover;
let mut expected_seq: i32 = 0; let mut expected_seq: i32 = 0;
let mut data_route_queue_full_streak = HashMap::<u64, u8>::new(); let mut data_route_queue_full_streak = HashMap::<u64, u8>::new();
let mut tmp = [0u8; 65_536];
let mut fairness = WorkerFairnessState::new( let mut fairness = WorkerFairnessState::new(
WorkerFairnessConfig { WorkerFairnessConfig {
worker_id: (writer_id as u16).saturating_add(1), worker_id: (writer_id as u16).saturating_add(1),
@@ -263,18 +264,18 @@ pub(crate) async fn reader_loop(
let fairshare_enabled = route_fairshare_enabled.load(Ordering::Relaxed); let fairshare_enabled = route_fairshare_enabled.load(Ordering::Relaxed);
fairness.set_backpressure_enabled(backpressure_enabled); fairness.set_backpressure_enabled(backpressure_enabled);
let fairness_has_backlog = should_schedule_fairness_retry(&fairness_snapshot); let fairness_has_backlog = should_schedule_fairness_retry(&fairness_snapshot);
let mut tmp = [0u8; 65_536];
let backlog_retry_enabled = fairness_has_backlog; let backlog_retry_enabled = fairness_has_backlog;
let backlog_retry_delay = let backlog_retry_delay =
fairness_retry_delay(reader_route_data_wait_ms.load(Ordering::Relaxed)); fairness_retry_delay(reader_route_data_wait_ms.load(Ordering::Relaxed));
let mut retry_only = false; let mut retry_only = false;
let n = tokio::select! { let n = tokio::select! {
biased;
_ = cancel.cancelled() => return Ok(()),
res = rd.read(&mut tmp) => res.map_err(ProxyError::Io)?, res = rd.read(&mut tmp) => res.map_err(ProxyError::Io)?,
_ = tokio::time::sleep(backlog_retry_delay), if backlog_retry_enabled => { _ = tokio::time::sleep(backlog_retry_delay), if backlog_retry_enabled => {
retry_only = true; retry_only = true;
0usize 0usize
}, },
_ = cancel.cancelled() => return Ok(()),
}; };
if retry_only { if retry_only {
let route_wait_ms = reader_route_data_wait_ms.load(Ordering::Relaxed); let route_wait_ms = reader_route_data_wait_ms.load(Ordering::Relaxed);

View File

@@ -77,26 +77,24 @@ struct HotBindingTable {
struct BindingState { struct BindingState {
inner: Mutex<BindingInner>, inner: Mutex<BindingInner>,
writer_idle_since_epoch_secs: DashMap<u64, u64>,
bound_clients_by_writer: DashMap<u64, usize>,
active_sessions_by_target_dc: DashMap<i16, usize>,
last_meta_for_writer: DashMap<u64, ConnMeta>,
} }
struct BindingInner { struct BindingInner {
writers: HashMap<u64, mpsc::Sender<WriterCommand>>,
writer_for_conn: HashMap<u64, u64>, writer_for_conn: HashMap<u64, u64>,
conns_for_writer: HashMap<u64, HashSet<u64>>, conns_for_writer: HashMap<u64, HashSet<u64>>,
meta: HashMap<u64, ConnMeta>, meta: HashMap<u64, ConnMeta>,
last_meta_for_writer: HashMap<u64, ConnMeta>,
writer_idle_since_epoch_secs: HashMap<u64, u64>,
} }
impl BindingInner { impl BindingInner {
fn new() -> Self { fn new() -> Self {
Self { Self {
writers: HashMap::new(),
writer_for_conn: HashMap::new(), writer_for_conn: HashMap::new(),
conns_for_writer: HashMap::new(), conns_for_writer: HashMap::new(),
meta: HashMap::new(), meta: HashMap::new(),
last_meta_for_writer: HashMap::new(),
writer_idle_since_epoch_secs: HashMap::new(),
} }
} }
} }
@@ -149,6 +147,10 @@ impl ConnRegistry {
}, },
binding: BindingState { binding: BindingState {
inner: Mutex::new(BindingInner::new()), inner: Mutex::new(BindingInner::new()),
writer_idle_since_epoch_secs: DashMap::new(),
bound_clients_by_writer: DashMap::new(),
active_sessions_by_target_dc: DashMap::new(),
last_meta_for_writer: DashMap::new(),
}, },
next_id: AtomicU64::new(start), next_id: AtomicU64::new(start),
route_channel_capacity, route_channel_capacity,

View File

@@ -13,13 +13,63 @@ use super::{
}; };
impl ConnRegistry { impl ConnRegistry {
fn set_writer_bound_count(&self, writer_id: u64, count: usize) {
self.binding
.bound_clients_by_writer
.insert(writer_id, count);
if count == 0 {
self.binding
.writer_idle_since_epoch_secs
.entry(writer_id)
.or_insert_with(Self::now_epoch_secs);
} else {
self.binding.writer_idle_since_epoch_secs.remove(&writer_id);
}
}
fn adjust_active_target_dc(&self, target_dc: i16, delta: isize) {
if target_dc == 0 || delta == 0 {
return;
}
if delta > 0 {
self.binding
.active_sessions_by_target_dc
.entry(target_dc)
.and_modify(|count| *count = count.saturating_add(delta as usize))
.or_insert(delta as usize);
return;
}
let remove = if let Some(mut count) = self
.binding
.active_sessions_by_target_dc
.get_mut(&target_dc)
{
let decrement = delta.unsigned_abs();
*count = count.saturating_sub(decrement);
*count == 0
} else {
false
};
if remove {
self.binding.active_sessions_by_target_dc.remove(&target_dc);
}
}
pub async fn register_writer(&self, writer_id: u64, tx: mpsc::Sender<WriterCommand>) { pub async fn register_writer(&self, writer_id: u64, tx: mpsc::Sender<WriterCommand>) {
let mut binding = self.binding.inner.lock().await; let mut binding = self.binding.inner.lock().await;
binding.writers.insert(writer_id, tx.clone());
binding binding
.conns_for_writer .conns_for_writer
.entry(writer_id) .entry(writer_id)
.or_insert_with(HashSet::new); .or_insert_with(HashSet::new);
self.binding
.bound_clients_by_writer
.entry(writer_id)
.or_insert(0);
self.binding
.writer_idle_since_epoch_secs
.entry(writer_id)
.or_insert_with(Self::now_epoch_secs);
self.writers.map.insert(writer_id, tx); self.writers.map.insert(writer_id, tx);
} }
@@ -29,19 +79,18 @@ impl ConnRegistry {
self.routing.byte_budget.remove(&id); self.routing.byte_budget.remove(&id);
self.hot_binding.map.remove(&id); self.hot_binding.map.remove(&id);
let mut binding = self.binding.inner.lock().await; let mut binding = self.binding.inner.lock().await;
binding.meta.remove(&id); let previous_meta = binding.meta.remove(&id);
if let Some(writer_id) = binding.writer_for_conn.remove(&id) { if let Some(meta) = previous_meta.as_ref() {
let became_empty = if let Some(set) = binding.conns_for_writer.get_mut(&writer_id) { self.adjust_active_target_dc(meta.target_dc, -1);
set.remove(&id);
set.is_empty()
} else {
false
};
if became_empty {
binding
.writer_idle_since_epoch_secs
.insert(writer_id, Self::now_epoch_secs());
} }
if let Some(writer_id) = binding.writer_for_conn.remove(&id) {
let next_count = if let Some(set) = binding.conns_for_writer.get_mut(&writer_id) {
set.remove(&id);
set.len()
} else {
0
};
self.set_writer_bound_count(writer_id, next_count);
return Some(writer_id); return Some(writer_id);
} }
None None
@@ -248,7 +297,7 @@ impl ConnRegistry {
if !self.routing.map.contains_key(&conn_id) { if !self.routing.map.contains_key(&conn_id) {
return false; return false;
} }
if !binding.writers.contains_key(&writer_id) { if !self.writers.map.contains_key(&writer_id) {
return false; return false;
} }
@@ -256,28 +305,32 @@ impl ConnRegistry {
if let Some(previous_writer_id) = previous_writer_id if let Some(previous_writer_id) = previous_writer_id
&& previous_writer_id != writer_id && previous_writer_id != writer_id
{ {
let became_empty = let next_count =
if let Some(set) = binding.conns_for_writer.get_mut(&previous_writer_id) { if let Some(set) = binding.conns_for_writer.get_mut(&previous_writer_id) {
set.remove(&conn_id); set.remove(&conn_id);
set.is_empty() set.len()
} else { } else {
false 0
}; };
if became_empty { self.set_writer_bound_count(previous_writer_id, next_count);
binding
.writer_idle_since_epoch_secs
.insert(previous_writer_id, Self::now_epoch_secs());
}
} }
binding.meta.insert(conn_id, meta.clone()); if let Some(previous_meta) = binding.meta.insert(conn_id, meta.clone()) {
binding.last_meta_for_writer.insert(writer_id, meta.clone()); self.adjust_active_target_dc(previous_meta.target_dc, -1);
binding.writer_idle_since_epoch_secs.remove(&writer_id); }
binding self.adjust_active_target_dc(meta.target_dc, 1);
self.binding
.last_meta_for_writer
.insert(writer_id, meta.clone());
let next_count = {
let set = binding
.conns_for_writer .conns_for_writer
.entry(writer_id) .entry(writer_id)
.or_insert_with(HashSet::new) .or_insert_with(HashSet::new);
.insert(conn_id); set.insert(conn_id);
set.len()
};
self.set_writer_bound_count(writer_id, next_count);
self.hot_binding self.hot_binding
.map .map
.insert(conn_id, HotConnBinding { writer_id, meta }); .insert(conn_id, HotConnBinding { writer_id, meta });
@@ -290,27 +343,38 @@ impl ConnRegistry {
.conns_for_writer .conns_for_writer
.entry(writer_id) .entry(writer_id)
.or_insert_with(HashSet::new); .or_insert_with(HashSet::new);
binding let count = binding
.writer_idle_since_epoch_secs .conns_for_writer
.entry(writer_id) .get(&writer_id)
.or_insert(Self::now_epoch_secs()); .map(|set| set.len())
.unwrap_or(0);
self.set_writer_bound_count(writer_id, count);
} }
pub async fn get_last_writer_meta(&self, writer_id: u64) -> Option<ConnMeta> { pub async fn get_last_writer_meta(&self, writer_id: u64) -> Option<ConnMeta> {
let binding = self.binding.inner.lock().await; self.binding
binding.last_meta_for_writer.get(&writer_id).cloned() .last_meta_for_writer
.get(&writer_id)
.map(|entry| entry.value().clone())
} }
pub async fn writer_idle_since_snapshot(&self) -> HashMap<u64, u64> { pub async fn writer_idle_since_snapshot(&self) -> HashMap<u64, u64> {
let binding = self.binding.inner.lock().await; self.binding
binding.writer_idle_since_epoch_secs.clone() .writer_idle_since_epoch_secs
.iter()
.map(|entry| (*entry.key(), *entry.value()))
.collect()
} }
pub async fn writer_idle_since_for_writer_ids(&self, writer_ids: &[u64]) -> HashMap<u64, u64> { pub async fn writer_idle_since_for_writer_ids(&self, writer_ids: &[u64]) -> HashMap<u64, u64> {
let binding = self.binding.inner.lock().await;
let mut out = HashMap::<u64, u64>::with_capacity(writer_ids.len()); let mut out = HashMap::<u64, u64>::with_capacity(writer_ids.len());
for writer_id in writer_ids { for writer_id in writer_ids {
if let Some(idle_since) = binding.writer_idle_since_epoch_secs.get(writer_id).copied() { if let Some(idle_since) = self
.binding
.writer_idle_since_epoch_secs
.get(writer_id)
.map(|entry| *entry.value())
{
out.insert(*writer_id, idle_since); out.insert(*writer_id, idle_since);
} }
} }
@@ -320,25 +384,19 @@ impl ConnRegistry {
pub(in crate::transport::middle_proxy) async fn writer_activity_snapshot( pub(in crate::transport::middle_proxy) async fn writer_activity_snapshot(
&self, &self,
) -> WriterActivitySnapshot { ) -> WriterActivitySnapshot {
let binding = self.binding.inner.lock().await;
let mut bound_clients_by_writer = HashMap::<u64, usize>::new();
let mut active_sessions_by_target_dc = HashMap::<i16, usize>::new();
for (writer_id, conn_ids) in &binding.conns_for_writer {
bound_clients_by_writer.insert(*writer_id, conn_ids.len());
}
for conn_meta in binding.meta.values() {
if conn_meta.target_dc == 0 {
continue;
}
*active_sessions_by_target_dc
.entry(conn_meta.target_dc)
.or_insert(0) += 1;
}
WriterActivitySnapshot { WriterActivitySnapshot {
bound_clients_by_writer, bound_clients_by_writer: self
active_sessions_by_target_dc, .binding
.bound_clients_by_writer
.iter()
.map(|entry| (*entry.key(), *entry.value()))
.collect(),
active_sessions_by_target_dc: self
.binding
.active_sessions_by_target_dc
.iter()
.map(|entry| (*entry.key(), *entry.value()))
.collect(),
} }
} }
@@ -393,10 +451,10 @@ impl ConnRegistry {
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> { pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
let mut binding = self.binding.inner.lock().await; let mut binding = self.binding.inner.lock().await;
binding.writers.remove(&writer_id);
self.writers.map.remove(&writer_id); self.writers.map.remove(&writer_id);
binding.last_meta_for_writer.remove(&writer_id); self.binding.last_meta_for_writer.remove(&writer_id);
binding.writer_idle_since_epoch_secs.remove(&writer_id); self.binding.writer_idle_since_epoch_secs.remove(&writer_id);
self.binding.bound_clients_by_writer.remove(&writer_id);
let conns = binding let conns = binding
.conns_for_writer .conns_for_writer
.remove(&writer_id) .remove(&writer_id)
@@ -410,6 +468,10 @@ impl ConnRegistry {
continue; continue;
} }
binding.writer_for_conn.remove(&conn_id); binding.writer_for_conn.remove(&conn_id);
let meta = binding.meta.remove(&conn_id);
if let Some(meta) = meta.as_ref() {
self.adjust_active_target_dc(meta.target_dc, -1);
}
let remove_hot = self let remove_hot = self
.hot_binding .hot_binding
.map .map
@@ -419,11 +481,8 @@ impl ConnRegistry {
if remove_hot { if remove_hot {
self.hot_binding.map.remove(&conn_id); self.hot_binding.map.remove(&conn_id);
} }
if let Some(m) = binding.meta.get(&conn_id) { if let Some(m) = meta {
out.push(BoundConn { out.push(BoundConn { conn_id, meta: m });
conn_id,
meta: m.clone(),
});
} }
} }
out out
@@ -438,11 +497,10 @@ impl ConnRegistry {
} }
pub async fn is_writer_empty(&self, writer_id: u64) -> bool { pub async fn is_writer_empty(&self, writer_id: u64) -> bool {
let binding = self.binding.inner.lock().await; self.binding
binding .bound_clients_by_writer
.conns_for_writer
.get(&writer_id) .get(&writer_id)
.map(|s| s.is_empty()) .map(|count| *count.value() == 0)
.unwrap_or(true) .unwrap_or(true)
} }
@@ -457,21 +515,20 @@ impl ConnRegistry {
return false; return false;
} }
binding.writers.remove(&writer_id);
self.writers.map.remove(&writer_id); self.writers.map.remove(&writer_id);
binding.last_meta_for_writer.remove(&writer_id); self.binding.last_meta_for_writer.remove(&writer_id);
binding.writer_idle_since_epoch_secs.remove(&writer_id); self.binding.writer_idle_since_epoch_secs.remove(&writer_id);
self.binding.bound_clients_by_writer.remove(&writer_id);
binding.conns_for_writer.remove(&writer_id); binding.conns_for_writer.remove(&writer_id);
true true
} }
#[allow(dead_code)] #[allow(dead_code)]
pub(super) async fn non_empty_writer_ids(&self, writer_ids: &[u64]) -> HashSet<u64> { pub(super) async fn non_empty_writer_ids(&self, writer_ids: &[u64]) -> HashSet<u64> {
let binding = self.binding.inner.lock().await;
let mut out = HashSet::<u64>::with_capacity(writer_ids.len()); let mut out = HashSet::<u64>::with_capacity(writer_ids.len());
for writer_id in writer_ids { for writer_id in writer_ids {
if let Some(conns) = binding.conns_for_writer.get(writer_id) if let Some(count) = self.binding.bound_clients_by_writer.get(writer_id)
&& !conns.is_empty() && *count.value() > 0
{ {
out.insert(*writer_id); out.insert(*writer_id);
} }

View File

@@ -6,6 +6,7 @@ use std::sync::Arc;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError; use tokio::sync::mpsc::error::TrySendError;
use tracing::{debug, warn}; use tracing::{debug, warn};
@@ -15,7 +16,6 @@ use super::registry::ConnMeta;
use super::wire::build_proxy_req_payload; use super::wire::build_proxy_req_payload;
use crate::config::{MeRouteNoWriterMode, MeWriterPickMode}; use crate::config::{MeRouteNoWriterMode, MeWriterPickMode};
use crate::error::{ProxyError, Result}; use crate::error::{ProxyError, Result};
use crate::network::IpFamily;
use crate::stream::PooledBuffer; use crate::stream::PooledBuffer;
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
@@ -34,6 +34,11 @@ mod close;
mod recovery; mod recovery;
mod selection; mod selection;
enum WriterCommandReserveError {
Closed,
TimedOut,
}
fn proxy_tag_array(tag: Option<&[u8]>) -> Option<[u8; 16]> { fn proxy_tag_array(tag: Option<&[u8]>) -> Option<[u8; 16]> {
tag.and_then(|tag| <[u8; 16]>::try_from(tag).ok()) tag.and_then(|tag| <[u8; 16]>::try_from(tag).ok())
} }
@@ -45,6 +50,21 @@ fn proxy_req_payload_from_command(cmd: WriterCommand) -> Option<PooledBuffer> {
} }
} }
async fn reserve_writer_command_slot(
tx: &mpsc::Sender<WriterCommand>,
wait: Option<Duration>,
) -> std::result::Result<mpsc::OwnedPermit<WriterCommand>, WriterCommandReserveError> {
let reserve = tx.clone().reserve_owned();
match wait {
Some(wait) => match tokio::time::timeout(wait, reserve).await {
Ok(Ok(permit)) => Ok(permit),
Ok(Err(_)) => Err(WriterCommandReserveError::Closed),
Err(_) => Err(WriterCommandReserveError::TimedOut),
},
None => reserve.await.map_err(|_| WriterCommandReserveError::Closed),
}
}
impl MePool { impl MePool {
/// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default. /// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default.
pub async fn send_proxy_req( pub async fn send_proxy_req(
@@ -105,10 +125,26 @@ impl MePool {
return Ok(()); return Ok(());
} }
Err(TrySendError::Full(cmd)) => { Err(TrySendError::Full(cmd)) => {
if current.tx.send(cmd).await.is_ok() { match reserve_writer_command_slot(
&current.tx,
self.route_runtime.me_route_blocking_send_timeout,
)
.await
{
Ok(permit) => {
permit.send(cmd);
self.note_hybrid_route_success(); self.note_hybrid_route_success();
return Ok(()); return Ok(());
} }
Err(WriterCommandReserveError::TimedOut) => {
self.stats
.increment_me_writer_pick_full_total(self.writer_pick_mode());
return Err(ProxyError::Proxy(
"ME writer channel full within blocking send timeout".into(),
));
}
Err(WriterCommandReserveError::Closed) => {}
}
warn!(writer_id = current.writer_id, "ME writer channel closed"); warn!(writer_id = current.writer_id, "ME writer channel closed");
self.remove_writer_and_close_clients(current.writer_id) self.remove_writer_and_close_clients(current.writer_id)
.await; .await;
@@ -124,9 +160,8 @@ impl MePool {
} }
let mut writers_snapshot = { let mut writers_snapshot = {
let ws = self.writers.read().await; let ws = self.writers.snapshot();
if ws.is_empty() { if ws.is_empty() {
drop(ws);
match no_writer_mode { match no_writer_mode {
MeRouteNoWriterMode::AsyncRecoveryFailfast => { MeRouteNoWriterMode::AsyncRecoveryFailfast => {
let deadline = *no_writer_deadline.get_or_insert_with(|| { let deadline = *no_writer_deadline.get_or_insert_with(|| {
@@ -154,38 +189,28 @@ impl MePool {
for _ in for _ in
0..self.route_runtime.me_route_inline_recovery_attempts.max(1) 0..self.route_runtime.me_route_inline_recovery_attempts.max(1)
{ {
for family in self.family_order() { let preferred = self.preferred_endpoints_by_dc.load_full();
let map = match family { for (dc, addrs) in preferred.iter() {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(), for addr in addrs {
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
for (dc, addrs) in &map {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
let _ = self let _ = self
.connect_one_for_dc( .connect_one_for_dc(*addr, *dc, self.rng.as_ref())
addr,
*dc,
self.rng.as_ref(),
)
.await; .await;
} }
} }
} if !self.writers.snapshot().is_empty() {
if !self.writers.read().await.is_empty() {
break; break;
} }
} }
} }
if !self.writers.read().await.is_empty() { if !self.writers.snapshot().is_empty() {
continue; continue;
} }
let deadline = *no_writer_deadline.get_or_insert_with(|| { let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.route_runtime.me_route_inline_recovery_wait Instant::now() + self.route_runtime.me_route_inline_recovery_wait
}); });
if !self.wait_for_writer_until(deadline).await { if !self.wait_for_writer_until(deadline).await {
if !self.writers.read().await.is_empty() { if !self.writers.snapshot().is_empty() {
continue; continue;
} }
self.stats.increment_me_no_writer_failfast_total(); self.stats.increment_me_no_writer_failfast_total();
@@ -222,7 +247,7 @@ impl MePool {
} }
} }
} }
ws.clone() ws
}; };
let mut candidate_indices = self let mut candidate_indices = self
@@ -285,7 +310,12 @@ impl MePool {
)); ));
} }
emergency_attempts += 1; emergency_attempts += 1;
let mut endpoints = self.endpoint_candidates_for_target_dc(routed_dc).await; let mut endpoints = self
.preferred_endpoints_by_dc
.load()
.get(&routed_dc)
.cloned()
.unwrap_or_default();
endpoints.shuffle(&mut rand::rng()); endpoints.shuffle(&mut rand::rng());
for addr in endpoints { for addr in endpoints {
if self if self
@@ -298,9 +328,7 @@ impl MePool {
} }
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts as u64)) tokio::time::sleep(Duration::from_millis(100 * emergency_attempts as u64))
.await; .await;
let ws2 = self.writers.read().await; writers_snapshot = self.writers.snapshot();
writers_snapshot = ws2.clone();
drop(ws2);
candidate_indices = self candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, routed_dc, false) .candidate_indices_for_dc(&writers_snapshot, routed_dc, false)
.await; .await;
@@ -563,13 +591,27 @@ impl MePool {
self.note_hybrid_route_success(); self.note_hybrid_route_success();
return Ok(()); return Ok(());
} }
Err(TrySendError::Full(cmd)) => match current.tx.send(cmd).await { Err(TrySendError::Full(cmd)) => {
Ok(()) => { match reserve_writer_command_slot(
&current.tx,
self.route_runtime.me_route_blocking_send_timeout,
)
.await
{
Ok(permit) => {
permit.send(cmd);
self.note_hybrid_route_success(); self.note_hybrid_route_success();
return Ok(()); return Ok(());
} }
Err(send_err) => { Err(WriterCommandReserveError::TimedOut) => {
let Some(payload) = proxy_req_payload_from_command(send_err.0) else { self.stats
.increment_me_writer_pick_full_total(self.writer_pick_mode());
return Err(ProxyError::Proxy(
"ME writer channel full within blocking send timeout".into(),
));
}
Err(WriterCommandReserveError::Closed) => {
let Some(payload) = proxy_req_payload_from_command(cmd) else {
return Err(ProxyError::Proxy( return Err(ProxyError::Proxy(
"ME writer rejected unexpected command type".into(), "ME writer rejected unexpected command type".into(),
)); ));
@@ -589,7 +631,8 @@ impl MePool {
) )
.await; .await;
} }
}, }
}
Err(TrySendError::Closed(cmd)) => { Err(TrySendError::Closed(cmd)) => {
let Some(payload) = proxy_req_payload_from_command(cmd) else { let Some(payload) = proxy_req_payload_from_command(cmd) else {
return Err(ProxyError::Proxy( return Err(ProxyError::Proxy(

View File

@@ -10,19 +10,44 @@ use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32};
use super::super::MePool; use super::super::MePool;
use super::super::codec::{WriterCommand, build_control_payload}; use super::super::codec::{WriterCommand, build_control_payload};
use super::{WriterCommandReserveError, reserve_writer_command_slot};
const ME_CLOSE_SIGNAL_SEND_TIMEOUT: Duration = Duration::from_millis(50);
impl MePool { impl MePool {
/// Sends an extended close signal for a client-bound ME connection.
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> { pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await { if let Some(w) = self.registry.get_writer(conn_id).await {
let payload = build_control_payload(RPC_CLOSE_EXT_U32, conn_id); let payload = build_control_payload(RPC_CLOSE_EXT_U32, conn_id);
if w.tx match w.tx.try_send(WriterCommand::ControlAndFlush(payload)) {
.send(WriterCommand::ControlAndFlush(payload)) Ok(()) => {}
Err(TrySendError::Full(cmd)) => {
match reserve_writer_command_slot(&w.tx, Some(ME_CLOSE_SIGNAL_SEND_TIMEOUT))
.await .await
.is_err()
{ {
debug!("ME close write failed"); Ok(permit) => {
permit.send(cmd);
}
Err(WriterCommandReserveError::TimedOut) => {
debug!(conn_id, "ME close skipped: writer command channel is full");
}
Err(WriterCommandReserveError::Closed) => {
debug!(
conn_id,
"ME close skipped: writer command channel is closed"
);
self.remove_writer_and_close_clients(w.writer_id).await; self.remove_writer_and_close_clients(w.writer_id).await;
} }
}
}
Err(TrySendError::Closed(_)) => {
debug!(
conn_id,
"ME close skipped: writer command channel is closed"
);
self.remove_writer_and_close_clients(w.writer_id).await;
}
}
} else { } else {
debug!(conn_id, "ME close skipped (writer missing)"); debug!(conn_id, "ME close skipped (writer missing)");
} }
@@ -31,13 +56,16 @@ impl MePool {
Ok(()) Ok(())
} }
/// Sends the compact close signal used by ME-side forced connection teardown.
pub async fn send_close_conn(self: &Arc<Self>, conn_id: u64) -> Result<()> { pub async fn send_close_conn(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await { if let Some(w) = self.registry.get_writer(conn_id).await {
let payload = build_control_payload(RPC_CLOSE_CONN_U32, conn_id); let payload = build_control_payload(RPC_CLOSE_CONN_U32, conn_id);
match w.tx.try_send(WriterCommand::ControlAndFlush(payload)) { match w.tx.try_send(WriterCommand::ControlAndFlush(payload)) {
Ok(()) => {} Ok(()) => {}
Err(TrySendError::Full(cmd)) => { Err(TrySendError::Full(cmd)) => {
let _ = tokio::time::timeout(Duration::from_millis(50), w.tx.send(cmd)).await; let _ = reserve_writer_command_slot(&w.tx, Some(ME_CLOSE_SIGNAL_SEND_TIMEOUT))
.await
.map(|permit| permit.send(cmd));
} }
Err(TrySendError::Closed(_)) => { Err(TrySendError::Closed(_)) => {
debug!(conn_id, "ME close_conn skipped: writer channel closed"); debug!(conn_id, "ME close_conn skipped: writer channel closed");
@@ -51,6 +79,7 @@ impl MePool {
Ok(()) Ok(())
} }
/// Sends close signals for all currently registered ME-bound connections during shutdown.
pub async fn shutdown_send_close_conn_all(self: &Arc<Self>) -> usize { pub async fn shutdown_send_close_conn_all(self: &Arc<Self>) -> usize {
let conn_ids = self.registry.active_conn_ids().await; let conn_ids = self.registry.active_conn_ids().await;
let total = conn_ids.len(); let total = conn_ids.len();
@@ -60,6 +89,7 @@ impl MePool {
total total
} }
/// Returns the current number of active ME writers tracked by the pool.
pub fn connection_count(&self) -> usize { pub fn connection_count(&self) -> usize {
self.conn_count.load(Ordering::Relaxed) self.conn_count.load(Ordering::Relaxed)
} }

View File

@@ -1,13 +1,9 @@
use std::collections::HashSet;
use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use tracing::warn; use tracing::warn;
use crate::network::IpFamily;
use super::super::MePool; use super::super::MePool;
use super::{ use super::{
HYBRID_GLOBAL_BURST_PERIOD_ROUNDS, HYBRID_RECENT_SUCCESS_WINDOW_MS, HYBRID_GLOBAL_BURST_PERIOD_ROUNDS, HYBRID_RECENT_SUCCESS_WINDOW_MS,
@@ -17,18 +13,18 @@ use super::{
impl MePool { impl MePool {
pub(super) async fn wait_for_writer_until(&self, deadline: Instant) -> bool { pub(super) async fn wait_for_writer_until(&self, deadline: Instant) -> bool {
let mut rx = self.writer_epoch.subscribe(); let mut rx = self.writer_epoch.subscribe();
if !self.writers.read().await.is_empty() { if !self.writers.snapshot().is_empty() {
return true; return true;
} }
let now = Instant::now(); let now = Instant::now();
if now >= deadline { if now >= deadline {
return !self.writers.read().await.is_empty(); return !self.writers.snapshot().is_empty();
} }
let timeout = deadline.saturating_duration_since(now); let timeout = deadline.saturating_duration_since(now);
if tokio::time::timeout(timeout, rx.changed()).await.is_ok() { if tokio::time::timeout(timeout, rx.changed()).await.is_ok() {
return !self.writers.read().await.is_empty(); return !self.writers.snapshot().is_empty();
} }
!self.writers.read().await.is_empty() !self.writers.snapshot().is_empty()
} }
pub(super) async fn wait_for_candidate_until(&self, routed_dc: i32, deadline: Instant) -> bool { pub(super) async fn wait_for_candidate_until(&self, routed_dc: i32, deadline: Instant) -> bool {
@@ -58,11 +54,11 @@ impl MePool {
pub(super) async fn has_candidate_for_target_dc(&self, routed_dc: i32) -> bool { pub(super) async fn has_candidate_for_target_dc(&self, routed_dc: i32) -> bool {
let writers_snapshot = { let writers_snapshot = {
let ws = self.writers.read().await; let ws = self.writers.snapshot();
if ws.is_empty() { if ws.is_empty() {
return false; return false;
} }
ws.clone() ws
}; };
let mut candidate_indices = self let mut candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, routed_dc, false) .candidate_indices_for_dc(&writers_snapshot, routed_dc, false)
@@ -79,7 +75,7 @@ impl MePool {
self: &Arc<Self>, self: &Arc<Self>,
routed_dc: i32, routed_dc: i32,
) -> bool { ) -> bool {
let endpoints = self.endpoint_candidates_for_target_dc(routed_dc).await; let endpoints = self.preferred_endpoints_for_dc(routed_dc).await;
if endpoints.is_empty() { if endpoints.is_empty() {
return false; return false;
} }
@@ -92,32 +88,18 @@ impl MePool {
pub(super) async fn trigger_async_recovery_global(self: &Arc<Self>) { pub(super) async fn trigger_async_recovery_global(self: &Arc<Self>) {
self.stats.increment_me_async_recovery_trigger_total(); self.stats.increment_me_async_recovery_trigger_total();
let mut seen = HashSet::<(i32, SocketAddr)>::new(); let preferred = self.preferred_endpoints_by_dc.load();
for family in self.family_order() { let mut triggered = 0usize;
let map_guard = match family { for (dc, addrs) in preferred.iter() {
IpFamily::V4 => self.proxy_map_v4.read().await, for addr in addrs {
IpFamily::V6 => self.proxy_map_v6.read().await, self.trigger_immediate_refill_for_dc(*addr, *dc);
}; triggered = triggered.saturating_add(1);
for (dc, addrs) in map_guard.iter() { if triggered >= 8 {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
if seen.insert((*dc, addr)) {
self.trigger_immediate_refill_for_dc(addr, *dc);
}
if seen.len() >= 8 {
return; return;
} }
} }
} }
} }
}
pub(super) async fn endpoint_candidates_for_target_dc(
&self,
routed_dc: i32,
) -> Vec<SocketAddr> {
self.preferred_endpoints_for_dc(routed_dc).await
}
pub(super) async fn maybe_trigger_hybrid_recovery( pub(super) async fn maybe_trigger_hybrid_recovery(
self: &Arc<Self>, self: &Arc<Self>,

View File

@@ -15,7 +15,10 @@ impl MePool {
routed_dc: i32, routed_dc: i32,
include_warm: bool, include_warm: bool,
) -> Vec<usize> { ) -> Vec<usize> {
let preferred = self.preferred_endpoints_for_dc(routed_dc).await; let preferred_snapshot = self.preferred_endpoints_by_dc.load();
let Some(preferred) = preferred_snapshot.get(&routed_dc) else {
return Vec::new();
};
if preferred.is_empty() { if preferred.is_empty() {
return Vec::new(); return Vec::new();
} }
@@ -25,7 +28,7 @@ impl MePool {
if !self.writer_eligible_for_selection(w, include_warm) { if !self.writer_eligible_for_selection(w, include_warm) {
continue; continue;
} }
if w.writer_dc == routed_dc && preferred.contains(&w.addr) { if w.writer_dc == routed_dc && preferred.binary_search(&w.addr).is_ok() {
out.push(idx); out.push(idx);
} }
} }