mirror of https://github.com/telemt/telemt.git
ME Pool Refactoring
This commit is contained in:
parent
1f255d0aa4
commit
4eebb4feb2
43
src/main.rs
43
src/main.rs
|
|
@ -392,26 +392,33 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
|||
);
|
||||
|
||||
let pool_size = config.general.middle_proxy_pool_size.max(1);
|
||||
match pool.init(pool_size, &rng).await {
|
||||
Ok(()) => {
|
||||
info!("Middle-End pool initialized successfully");
|
||||
loop {
|
||||
match pool.init(pool_size, &rng).await {
|
||||
Ok(()) => {
|
||||
info!("Middle-End pool initialized successfully");
|
||||
|
||||
// Phase 4: Start health monitor
|
||||
let pool_clone = pool.clone();
|
||||
let rng_clone = rng.clone();
|
||||
let min_conns = pool_size;
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_health_monitor(
|
||||
pool_clone, rng_clone, min_conns,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
// Phase 4: Start health monitor
|
||||
let pool_clone = pool.clone();
|
||||
let rng_clone = rng.clone();
|
||||
let min_conns = pool_size;
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_health_monitor(
|
||||
pool_clone, rng_clone, min_conns,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
Some(pool)
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, "Failed to initialize ME pool. Falling back to direct mode.");
|
||||
None
|
||||
break Some(pool);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
error = %e,
|
||||
retry_in_secs = 2,
|
||||
"ME pool is not ready yet; retrying startup initialization"
|
||||
);
|
||||
pool.reset_stun_state();
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
|||
let mut inflight: HashMap<(i32, IpFamily), usize> = HashMap::new();
|
||||
loop {
|
||||
tokio::time::sleep(Duration::from_secs(HEALTH_INTERVAL_SECS)).await;
|
||||
pool.prune_closed_writers().await;
|
||||
check_family(
|
||||
IpFamily::V4,
|
||||
&pool,
|
||||
|
|
|
|||
|
|
@ -1,17 +1,22 @@
|
|||
//! Middle Proxy RPC transport.
|
||||
|
||||
mod codec;
|
||||
mod config_updater;
|
||||
mod handshake;
|
||||
mod health;
|
||||
mod pool;
|
||||
mod pool_config;
|
||||
mod pool_init;
|
||||
mod pool_nat;
|
||||
mod pool_refill;
|
||||
mod pool_reinit;
|
||||
mod pool_writer;
|
||||
mod ping;
|
||||
mod reader;
|
||||
mod registry;
|
||||
mod rotation;
|
||||
mod send;
|
||||
mod secret;
|
||||
mod rotation;
|
||||
mod config_updater;
|
||||
mod wire;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,81 @@
|
|||
use std::collections::HashMap;
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use tracing::warn;
|
||||
|
||||
use super::pool::MePool;
|
||||
|
||||
impl MePool {
|
||||
pub async fn update_proxy_maps(
|
||||
&self,
|
||||
new_v4: HashMap<i32, Vec<(IpAddr, u16)>>,
|
||||
new_v6: Option<HashMap<i32, Vec<(IpAddr, u16)>>>,
|
||||
) -> bool {
|
||||
let mut changed = false;
|
||||
{
|
||||
let mut guard = self.proxy_map_v4.write().await;
|
||||
if !new_v4.is_empty() && *guard != new_v4 {
|
||||
*guard = new_v4;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
if let Some(v6) = new_v6 {
|
||||
let mut guard = self.proxy_map_v6.write().await;
|
||||
if !v6.is_empty() && *guard != v6 {
|
||||
*guard = v6;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
// Ensure negative DC entries mirror positives when absent (Telegram convention).
|
||||
{
|
||||
let mut guard = self.proxy_map_v4.write().await;
|
||||
let keys: Vec<i32> = guard.keys().cloned().collect();
|
||||
for k in keys.iter().cloned().filter(|k| *k > 0) {
|
||||
if !guard.contains_key(&-k)
|
||||
&& let Some(addrs) = guard.get(&k).cloned()
|
||||
{
|
||||
guard.insert(-k, addrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
let mut guard = self.proxy_map_v6.write().await;
|
||||
let keys: Vec<i32> = guard.keys().cloned().collect();
|
||||
for k in keys.iter().cloned().filter(|k| *k > 0) {
|
||||
if !guard.contains_key(&-k)
|
||||
&& let Some(addrs) = guard.get(&k).cloned()
|
||||
{
|
||||
guard.insert(-k, addrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
changed
|
||||
}
|
||||
|
||||
pub async fn update_secret(self: &Arc<Self>, new_secret: Vec<u8>) -> bool {
|
||||
if new_secret.len() < 32 {
|
||||
warn!(len = new_secret.len(), "proxy-secret update ignored (too short)");
|
||||
return false;
|
||||
}
|
||||
let mut guard = self.proxy_secret.write().await;
|
||||
if *guard != new_secret {
|
||||
*guard = new_secret;
|
||||
drop(guard);
|
||||
self.reconnect_all().await;
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub async fn reconnect_all(self: &Arc<Self>) {
|
||||
let ws = self.writers.read().await.clone();
|
||||
for w in ws {
|
||||
if let Ok(()) = self.connect_one(w.addr, self.rng.as_ref()).await {
|
||||
self.mark_writer_draining(w.id).await;
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
|
||||
use rand::Rng;
|
||||
use rand::seq::SliceRandom;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::error::{ProxyError, Result};
|
||||
|
||||
use super::pool::MePool;
|
||||
|
||||
impl MePool {
|
||||
pub async fn init(self: &Arc<Self>, pool_size: usize, rng: &Arc<SecureRandom>) -> Result<()> {
|
||||
let family_order = self.family_order();
|
||||
let ks = self.key_selector().await;
|
||||
info!(
|
||||
me_servers = self.proxy_map_v4.read().await.len(),
|
||||
pool_size,
|
||||
key_selector = format_args!("0x{ks:08x}"),
|
||||
secret_len = self.proxy_secret.read().await.len(),
|
||||
"Initializing ME pool"
|
||||
);
|
||||
|
||||
for family in family_order {
|
||||
let map = self.proxy_map_for_family(family).await;
|
||||
let mut grouped_dc_addrs: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
|
||||
for (dc, addrs) in map {
|
||||
if addrs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
grouped_dc_addrs.entry(dc.abs()).or_default().extend(addrs);
|
||||
}
|
||||
let mut dc_addrs: Vec<(i32, Vec<(IpAddr, u16)>)> = grouped_dc_addrs
|
||||
.into_iter()
|
||||
.map(|(dc, mut addrs)| {
|
||||
addrs.sort_unstable();
|
||||
addrs.dedup();
|
||||
(dc, addrs)
|
||||
})
|
||||
.collect();
|
||||
dc_addrs.sort_unstable_by_key(|(dc, _)| *dc);
|
||||
|
||||
// Ensure at least one live writer per DC group; run missing DCs in parallel.
|
||||
let mut join = tokio::task::JoinSet::new();
|
||||
for (dc, addrs) in dc_addrs.iter().cloned() {
|
||||
if addrs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let endpoints: HashSet<SocketAddr> = addrs
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect();
|
||||
if self.active_writer_count_for_endpoints(&endpoints).await > 0 {
|
||||
continue;
|
||||
}
|
||||
let pool = Arc::clone(self);
|
||||
let rng_clone = Arc::clone(rng);
|
||||
join.spawn(async move { pool.connect_primary_for_dc(dc, addrs, rng_clone).await });
|
||||
}
|
||||
while join.join_next().await.is_some() {}
|
||||
|
||||
let mut missing_dcs = Vec::new();
|
||||
for (dc, addrs) in &dc_addrs {
|
||||
let endpoints: HashSet<SocketAddr> = addrs
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect();
|
||||
if self.active_writer_count_for_endpoints(&endpoints).await == 0 {
|
||||
missing_dcs.push(*dc);
|
||||
}
|
||||
}
|
||||
if !missing_dcs.is_empty() {
|
||||
return Err(ProxyError::Proxy(format!(
|
||||
"ME init incomplete: no live writers for DC groups {missing_dcs:?}"
|
||||
)));
|
||||
}
|
||||
|
||||
// Warm reserve writers asynchronously so startup does not block after first working pool is ready.
|
||||
let pool = Arc::clone(self);
|
||||
let rng_clone = Arc::clone(rng);
|
||||
let dc_addrs_bg = dc_addrs.clone();
|
||||
tokio::spawn(async move {
|
||||
if pool.me_warmup_stagger_enabled {
|
||||
for (dc, addrs) in &dc_addrs_bg {
|
||||
for (ip, port) in addrs {
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
let jitter = rand::rng()
|
||||
.random_range(0..=pool.me_warmup_step_jitter.as_millis() as u64);
|
||||
let delay_ms = pool.me_warmup_step_delay.as_millis() as u64 + jitter;
|
||||
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
|
||||
if let Err(e) = pool.connect_one(addr, rng_clone.as_ref()).await {
|
||||
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed (staggered)");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (dc, addrs) in &dc_addrs_bg {
|
||||
for (ip, port) in addrs {
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
if let Err(e) = pool.connect_one(addr, rng_clone.as_ref()).await {
|
||||
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed");
|
||||
}
|
||||
}
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
debug!(
|
||||
target_pool_size = pool_size,
|
||||
current_pool_size = pool.connection_count(),
|
||||
"Background ME reserve warmup finished"
|
||||
);
|
||||
});
|
||||
|
||||
if !self.decision.effective_multipath && self.connection_count() > 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if self.writers.read().await.is_empty() {
|
||||
return Err(ProxyError::Proxy("No ME connections".into()));
|
||||
}
|
||||
info!(
|
||||
active_writers = self.connection_count(),
|
||||
"ME primary pool ready; reserve warmup continues in background"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn connect_primary_for_dc(
|
||||
self: Arc<Self>,
|
||||
dc: i32,
|
||||
mut addrs: Vec<(IpAddr, u16)>,
|
||||
rng: Arc<SecureRandom>,
|
||||
) -> bool {
|
||||
if addrs.is_empty() {
|
||||
return false;
|
||||
}
|
||||
addrs.shuffle(&mut rand::rng());
|
||||
if addrs.len() > 1 {
|
||||
let concurrency = 2usize;
|
||||
let mut join = tokio::task::JoinSet::new();
|
||||
let mut next_idx = 0usize;
|
||||
|
||||
while next_idx < addrs.len() || !join.is_empty() {
|
||||
while next_idx < addrs.len() && join.len() < concurrency {
|
||||
let (ip, port) = addrs[next_idx];
|
||||
next_idx += 1;
|
||||
let addr = SocketAddr::new(ip, port);
|
||||
let pool = Arc::clone(&self);
|
||||
let rng_clone = Arc::clone(&rng);
|
||||
join.spawn(async move {
|
||||
(addr, pool.connect_one(addr, rng_clone.as_ref()).await)
|
||||
});
|
||||
}
|
||||
|
||||
let Some(res) = join.join_next().await else {
|
||||
break;
|
||||
};
|
||||
match res {
|
||||
Ok((addr, Ok(()))) => {
|
||||
info!(%addr, dc = %dc, "ME connected");
|
||||
join.abort_all();
|
||||
while join.join_next().await.is_some() {}
|
||||
return true;
|
||||
}
|
||||
Ok((addr, Err(e))) => {
|
||||
warn!(%addr, dc = %dc, error = %e, "ME connect failed, trying next");
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(dc = %dc, error = %e, "ME connect task failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
warn!(dc = %dc, "All ME servers for DC failed at init");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (ip, port) in addrs {
|
||||
let addr = SocketAddr::new(ip, port);
|
||||
match self.connect_one(addr, rng.as_ref()).await {
|
||||
Ok(()) => {
|
||||
info!(%addr, dc = %dc, "ME connected");
|
||||
return true;
|
||||
}
|
||||
Err(e) => warn!(%addr, dc = %dc, error = %e, "ME connect failed, trying next"),
|
||||
}
|
||||
}
|
||||
warn!(dc = %dc, "All ME servers for DC failed at init");
|
||||
false
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
use std::collections::HashSet;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::crypto::SecureRandom;
|
||||
|
||||
use super::pool::MePool;
|
||||
|
||||
impl MePool {
|
||||
pub(super) async fn connect_endpoints_round_robin(
|
||||
self: &Arc<Self>,
|
||||
endpoints: &[SocketAddr],
|
||||
rng: &SecureRandom,
|
||||
) -> bool {
|
||||
if endpoints.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let start = (self.rr.fetch_add(1, Ordering::Relaxed) as usize) % endpoints.len();
|
||||
for offset in 0..endpoints.len() {
|
||||
let idx = (start + offset) % endpoints.len();
|
||||
let addr = endpoints[idx];
|
||||
match self.connect_one(addr, rng).await {
|
||||
Ok(()) => return true,
|
||||
Err(e) => debug!(%addr, error = %e, "ME connect failed during round-robin warmup"),
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
async fn endpoints_for_same_dc(&self, addr: SocketAddr) -> Vec<SocketAddr> {
|
||||
let mut target_dc = HashSet::<i32>::new();
|
||||
let mut endpoints = HashSet::<SocketAddr>::new();
|
||||
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await.clone();
|
||||
for (dc, addrs) in &map {
|
||||
if addrs
|
||||
.iter()
|
||||
.any(|(ip, port)| SocketAddr::new(*ip, *port) == addr)
|
||||
{
|
||||
target_dc.insert(dc.abs());
|
||||
}
|
||||
}
|
||||
for dc in &target_dc {
|
||||
for key in [*dc, -*dc] {
|
||||
if let Some(addrs) = map.get(&key) {
|
||||
for (ip, port) in addrs {
|
||||
endpoints.insert(SocketAddr::new(*ip, *port));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await.clone();
|
||||
for (dc, addrs) in &map {
|
||||
if addrs
|
||||
.iter()
|
||||
.any(|(ip, port)| SocketAddr::new(*ip, *port) == addr)
|
||||
{
|
||||
target_dc.insert(dc.abs());
|
||||
}
|
||||
}
|
||||
for dc in &target_dc {
|
||||
for key in [*dc, -*dc] {
|
||||
if let Some(addrs) = map.get(&key) {
|
||||
for (ip, port) in addrs {
|
||||
endpoints.insert(SocketAddr::new(*ip, *port));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut sorted: Vec<SocketAddr> = endpoints.into_iter().collect();
|
||||
sorted.sort_unstable();
|
||||
sorted
|
||||
}
|
||||
|
||||
async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr) -> bool {
|
||||
let fast_retries = self.me_reconnect_fast_retry_count.max(1);
|
||||
|
||||
for attempt in 0..fast_retries {
|
||||
self.stats.increment_me_reconnect_attempt();
|
||||
match self.connect_one(addr, self.rng.as_ref()).await {
|
||||
Ok(()) => {
|
||||
self.stats.increment_me_reconnect_success();
|
||||
self.stats.increment_me_writer_restored_same_endpoint_total();
|
||||
info!(
|
||||
%addr,
|
||||
attempt = attempt + 1,
|
||||
"ME writer restored on the same endpoint"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(
|
||||
%addr,
|
||||
attempt = attempt + 1,
|
||||
error = %e,
|
||||
"ME immediate same-endpoint reconnect failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let dc_endpoints = self.endpoints_for_same_dc(addr).await;
|
||||
if dc_endpoints.is_empty() {
|
||||
self.stats.increment_me_refill_failed_total();
|
||||
return false;
|
||||
}
|
||||
|
||||
for attempt in 0..fast_retries {
|
||||
self.stats.increment_me_reconnect_attempt();
|
||||
if self
|
||||
.connect_endpoints_round_robin(&dc_endpoints, self.rng.as_ref())
|
||||
.await
|
||||
{
|
||||
self.stats.increment_me_reconnect_success();
|
||||
self.stats.increment_me_writer_restored_fallback_total();
|
||||
info!(
|
||||
%addr,
|
||||
attempt = attempt + 1,
|
||||
"ME writer restored via DC fallback endpoint"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
self.stats.increment_me_refill_failed_total();
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn trigger_immediate_refill(self: &Arc<Self>, addr: SocketAddr) {
|
||||
let pool = Arc::clone(self);
|
||||
tokio::spawn(async move {
|
||||
{
|
||||
let mut guard = pool.refill_inflight.lock().await;
|
||||
if !guard.insert(addr) {
|
||||
pool.stats.increment_me_refill_skipped_inflight_total();
|
||||
return;
|
||||
}
|
||||
}
|
||||
pool.stats.increment_me_refill_triggered_total();
|
||||
|
||||
let restored = pool.refill_writer_after_loss(addr).await;
|
||||
if !restored {
|
||||
warn!(%addr, "ME immediate refill failed");
|
||||
}
|
||||
|
||||
let mut guard = pool.refill_inflight.lock().await;
|
||||
guard.remove(&addr);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,383 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::Rng;
|
||||
use rand::seq::SliceRandom;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::crypto::SecureRandom;
|
||||
|
||||
use super::pool::MePool;
|
||||
|
||||
impl MePool {
|
||||
fn coverage_ratio(
|
||||
desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>,
|
||||
active_writer_addrs: &HashSet<SocketAddr>,
|
||||
) -> (f32, Vec<i32>) {
|
||||
if desired_by_dc.is_empty() {
|
||||
return (1.0, Vec::new());
|
||||
}
|
||||
|
||||
let mut missing_dc = Vec::<i32>::new();
|
||||
let mut covered = 0usize;
|
||||
for (dc, endpoints) in desired_by_dc {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if endpoints
|
||||
.iter()
|
||||
.any(|addr| active_writer_addrs.contains(addr))
|
||||
{
|
||||
covered += 1;
|
||||
} else {
|
||||
missing_dc.push(*dc);
|
||||
}
|
||||
}
|
||||
|
||||
missing_dc.sort_unstable();
|
||||
let total = desired_by_dc.len().max(1);
|
||||
let ratio = (covered as f32) / (total as f32);
|
||||
(ratio, missing_dc)
|
||||
}
|
||||
|
||||
pub async fn reconcile_connections(self: &Arc<Self>, rng: &SecureRandom) {
|
||||
let writers = self.writers.read().await;
|
||||
let current: HashSet<SocketAddr> = writers
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.map(|w| w.addr)
|
||||
.collect();
|
||||
drop(writers);
|
||||
|
||||
for family in self.family_order() {
|
||||
let map = self.proxy_map_for_family(family).await;
|
||||
for (_dc, addrs) in &map {
|
||||
let dc_addrs: Vec<SocketAddr> = addrs
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect();
|
||||
if !dc_addrs.iter().any(|a| current.contains(a)) {
|
||||
let mut shuffled = dc_addrs.clone();
|
||||
shuffled.shuffle(&mut rand::rng());
|
||||
for addr in shuffled {
|
||||
if self.connect_one(addr, rng).await.is_ok() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self.decision.effective_multipath && !current.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn desired_dc_endpoints(&self) -> HashMap<i32, HashSet<SocketAddr>> {
|
||||
let mut out: HashMap<i32, HashSet<SocketAddr>> = HashMap::new();
|
||||
|
||||
if self.decision.ipv4_me {
|
||||
let map_v4 = self.proxy_map_v4.read().await.clone();
|
||||
for (dc, addrs) in map_v4 {
|
||||
let entry = out.entry(dc.abs()).or_default();
|
||||
for (ip, port) in addrs {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.decision.ipv6_me {
|
||||
let map_v6 = self.proxy_map_v6.read().await.clone();
|
||||
for (dc, addrs) in map_v6 {
|
||||
let entry = out.entry(dc.abs()).or_default();
|
||||
for (ip, port) in addrs {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub(super) fn required_writers_for_dc(endpoint_count: usize) -> usize {
|
||||
endpoint_count.max(3)
|
||||
}
|
||||
|
||||
fn hardswap_warmup_connect_delay_ms(&self) -> u64 {
|
||||
let min_ms = self.me_hardswap_warmup_delay_min_ms.load(Ordering::Relaxed);
|
||||
let max_ms = self.me_hardswap_warmup_delay_max_ms.load(Ordering::Relaxed);
|
||||
let (min_ms, max_ms) = if min_ms <= max_ms {
|
||||
(min_ms, max_ms)
|
||||
} else {
|
||||
(max_ms, min_ms)
|
||||
};
|
||||
if min_ms == max_ms {
|
||||
return min_ms;
|
||||
}
|
||||
rand::rng().random_range(min_ms..=max_ms)
|
||||
}
|
||||
|
||||
fn hardswap_warmup_backoff_ms(&self, pass_idx: usize) -> u64 {
|
||||
let base_ms = self
|
||||
.me_hardswap_warmup_pass_backoff_base_ms
|
||||
.load(Ordering::Relaxed);
|
||||
let cap_ms = (self.me_reconnect_backoff_cap.as_millis() as u64).max(base_ms);
|
||||
let shift = (pass_idx as u32).min(20);
|
||||
let scaled = base_ms.saturating_mul(1u64 << shift);
|
||||
let core = scaled.min(cap_ms);
|
||||
let jitter = (core / 2).max(1);
|
||||
core.saturating_add(rand::rng().random_range(0..=jitter))
|
||||
}
|
||||
|
||||
async fn fresh_writer_count_for_endpoints(
|
||||
&self,
|
||||
generation: u64,
|
||||
endpoints: &HashSet<SocketAddr>,
|
||||
) -> usize {
|
||||
let ws = self.writers.read().await;
|
||||
ws.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| w.generation == generation)
|
||||
.filter(|w| endpoints.contains(&w.addr))
|
||||
.count()
|
||||
}
|
||||
|
||||
pub(super) async fn active_writer_count_for_endpoints(
|
||||
&self,
|
||||
endpoints: &HashSet<SocketAddr>,
|
||||
) -> usize {
|
||||
let ws = self.writers.read().await;
|
||||
ws.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| endpoints.contains(&w.addr))
|
||||
.count()
|
||||
}
|
||||
|
||||
async fn warmup_generation_for_all_dcs(
|
||||
self: &Arc<Self>,
|
||||
rng: &SecureRandom,
|
||||
generation: u64,
|
||||
desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>,
|
||||
) {
|
||||
let extra_passes = self
|
||||
.me_hardswap_warmup_extra_passes
|
||||
.load(Ordering::Relaxed)
|
||||
.min(10) as usize;
|
||||
let total_passes = 1 + extra_passes;
|
||||
|
||||
for (dc, endpoints) in desired_by_dc {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut endpoint_list: Vec<SocketAddr> = endpoints.iter().copied().collect();
|
||||
endpoint_list.sort_unstable();
|
||||
let required = Self::required_writers_for_dc(endpoint_list.len());
|
||||
let mut completed = false;
|
||||
let mut last_fresh_count = self
|
||||
.fresh_writer_count_for_endpoints(generation, endpoints)
|
||||
.await;
|
||||
|
||||
for pass_idx in 0..total_passes {
|
||||
if last_fresh_count >= required {
|
||||
completed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
let missing = required.saturating_sub(last_fresh_count);
|
||||
debug!(
|
||||
dc = *dc,
|
||||
pass = pass_idx + 1,
|
||||
total_passes,
|
||||
fresh_count = last_fresh_count,
|
||||
required,
|
||||
missing,
|
||||
endpoint_count = endpoint_list.len(),
|
||||
"ME hardswap warmup pass started"
|
||||
);
|
||||
|
||||
for attempt_idx in 0..missing {
|
||||
let delay_ms = self.hardswap_warmup_connect_delay_ms();
|
||||
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
|
||||
|
||||
let connected = self.connect_endpoints_round_robin(&endpoint_list, rng).await;
|
||||
debug!(
|
||||
dc = *dc,
|
||||
pass = pass_idx + 1,
|
||||
total_passes,
|
||||
attempt = attempt_idx + 1,
|
||||
delay_ms,
|
||||
connected,
|
||||
"ME hardswap warmup connect attempt finished"
|
||||
);
|
||||
}
|
||||
|
||||
last_fresh_count = self
|
||||
.fresh_writer_count_for_endpoints(generation, endpoints)
|
||||
.await;
|
||||
if last_fresh_count >= required {
|
||||
completed = true;
|
||||
info!(
|
||||
dc = *dc,
|
||||
pass = pass_idx + 1,
|
||||
total_passes,
|
||||
fresh_count = last_fresh_count,
|
||||
required,
|
||||
"ME hardswap warmup floor reached for DC"
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if pass_idx + 1 < total_passes {
|
||||
let backoff_ms = self.hardswap_warmup_backoff_ms(pass_idx);
|
||||
debug!(
|
||||
dc = *dc,
|
||||
pass = pass_idx + 1,
|
||||
total_passes,
|
||||
fresh_count = last_fresh_count,
|
||||
required,
|
||||
backoff_ms,
|
||||
"ME hardswap warmup pass incomplete, delaying next pass"
|
||||
);
|
||||
tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
|
||||
}
|
||||
}
|
||||
|
||||
if !completed {
|
||||
warn!(
|
||||
dc = *dc,
|
||||
fresh_count = last_fresh_count,
|
||||
required,
|
||||
endpoint_count = endpoint_list.len(),
|
||||
total_passes,
|
||||
"ME warmup stopped: unable to reach required writer floor for DC"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn zero_downtime_reinit_after_map_change(self: &Arc<Self>, rng: &SecureRandom) {
|
||||
let desired_by_dc = self.desired_dc_endpoints().await;
|
||||
if desired_by_dc.is_empty() {
|
||||
warn!("ME endpoint map is empty; skipping stale writer drain");
|
||||
return;
|
||||
}
|
||||
|
||||
let previous_generation = self.current_generation();
|
||||
let generation = self.generation.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
let hardswap = self.hardswap.load(Ordering::Relaxed);
|
||||
|
||||
if hardswap {
|
||||
self.warmup_generation_for_all_dcs(rng, generation, &desired_by_dc)
|
||||
.await;
|
||||
} else {
|
||||
self.reconcile_connections(rng).await;
|
||||
}
|
||||
|
||||
let writers = self.writers.read().await;
|
||||
let active_writer_addrs: HashSet<SocketAddr> = writers
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.map(|w| w.addr)
|
||||
.collect();
|
||||
let min_ratio = Self::permille_to_ratio(
|
||||
self.me_pool_min_fresh_ratio_permille
|
||||
.load(Ordering::Relaxed),
|
||||
);
|
||||
let (coverage_ratio, missing_dc) = Self::coverage_ratio(&desired_by_dc, &active_writer_addrs);
|
||||
if !hardswap && coverage_ratio < min_ratio {
|
||||
warn!(
|
||||
previous_generation,
|
||||
generation,
|
||||
coverage_ratio = format_args!("{coverage_ratio:.3}"),
|
||||
min_ratio = format_args!("{min_ratio:.3}"),
|
||||
missing_dc = ?missing_dc,
|
||||
"ME reinit coverage below threshold; keeping stale writers"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if hardswap {
|
||||
let mut fresh_missing_dc = Vec::<(i32, usize, usize)>::new();
|
||||
for (dc, endpoints) in &desired_by_dc {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let required = Self::required_writers_for_dc(endpoints.len());
|
||||
let fresh_count = writers
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| w.generation == generation)
|
||||
.filter(|w| endpoints.contains(&w.addr))
|
||||
.count();
|
||||
if fresh_count < required {
|
||||
fresh_missing_dc.push((*dc, fresh_count, required));
|
||||
}
|
||||
}
|
||||
if !fresh_missing_dc.is_empty() {
|
||||
warn!(
|
||||
previous_generation,
|
||||
generation,
|
||||
missing_dc = ?fresh_missing_dc,
|
||||
"ME hardswap pending: fresh generation coverage incomplete"
|
||||
);
|
||||
return;
|
||||
}
|
||||
} else if !missing_dc.is_empty() {
|
||||
warn!(
|
||||
missing_dc = ?missing_dc,
|
||||
// Keep stale writers alive when fresh coverage is incomplete.
|
||||
"ME reinit coverage incomplete; keeping stale writers"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let desired_addrs: HashSet<SocketAddr> = desired_by_dc
|
||||
.values()
|
||||
.flat_map(|set| set.iter().copied())
|
||||
.collect();
|
||||
|
||||
let stale_writer_ids: Vec<u64> = writers
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| {
|
||||
if hardswap {
|
||||
w.generation < generation
|
||||
} else {
|
||||
!desired_addrs.contains(&w.addr)
|
||||
}
|
||||
})
|
||||
.map(|w| w.id)
|
||||
.collect();
|
||||
drop(writers);
|
||||
|
||||
if stale_writer_ids.is_empty() {
|
||||
debug!("ME reinit cycle completed with no stale writers");
|
||||
return;
|
||||
}
|
||||
|
||||
let drain_timeout = self.force_close_timeout();
|
||||
let drain_timeout_secs = drain_timeout.map(|d| d.as_secs()).unwrap_or(0);
|
||||
info!(
|
||||
stale_writers = stale_writer_ids.len(),
|
||||
previous_generation,
|
||||
generation,
|
||||
hardswap,
|
||||
coverage_ratio = format_args!("{coverage_ratio:.3}"),
|
||||
min_ratio = format_args!("{min_ratio:.3}"),
|
||||
drain_timeout_secs,
|
||||
"ME reinit cycle covered; draining stale writers"
|
||||
);
|
||||
self.stats.increment_pool_swap_total();
|
||||
for writer_id in stale_writer_ids {
|
||||
self.mark_writer_draining_with_timeout(writer_id, drain_timeout, !hardswap)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn zero_downtime_reinit_periodic(self: &Arc<Self>, rng: &SecureRandom) {
|
||||
self.zero_downtime_reinit_after_map_change(rng).await;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,348 @@
|
|||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use bytes::BytesMut;
|
||||
use rand::Rng;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::error::{ProxyError, Result};
|
||||
use crate::protocol::constants::RPC_PING_U32;
|
||||
|
||||
use super::codec::{RpcWriter, WriterCommand};
|
||||
use super::pool::{MePool, MeWriter};
|
||||
use super::reader::reader_loop;
|
||||
use super::registry::BoundConn;
|
||||
|
||||
const ME_ACTIVE_PING_SECS: u64 = 25;
|
||||
const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
|
||||
|
||||
impl MePool {
|
||||
pub(crate) async fn prune_closed_writers(self: &Arc<Self>) {
|
||||
let closed_writer_ids: Vec<u64> = {
|
||||
let ws = self.writers.read().await;
|
||||
ws.iter().filter(|w| w.tx.is_closed()).map(|w| w.id).collect()
|
||||
};
|
||||
if closed_writer_ids.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
for writer_id in closed_writer_ids {
|
||||
if self.registry.is_writer_empty(writer_id).await {
|
||||
let _ = self.remove_writer_only(writer_id).await;
|
||||
} else {
|
||||
let _ = self.remove_writer_and_close_clients(writer_id).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn connect_one(self: &Arc<Self>, addr: SocketAddr, rng: &SecureRandom) -> Result<()> {
|
||||
let secret_len = self.proxy_secret.read().await.len();
|
||||
if secret_len < 32 {
|
||||
return Err(ProxyError::Proxy("proxy-secret too short for ME auth".into()));
|
||||
}
|
||||
|
||||
let (stream, _connect_ms) = self.connect_tcp(addr).await?;
|
||||
let hs = self.handshake_only(stream, addr, rng).await?;
|
||||
|
||||
let writer_id = self.next_writer_id.fetch_add(1, Ordering::Relaxed);
|
||||
let generation = self.current_generation();
|
||||
let cancel = CancellationToken::new();
|
||||
let degraded = Arc::new(AtomicBool::new(false));
|
||||
let draining = Arc::new(AtomicBool::new(false));
|
||||
let draining_started_at_epoch_secs = Arc::new(AtomicU64::new(0));
|
||||
let allow_drain_fallback = Arc::new(AtomicBool::new(false));
|
||||
let (tx, mut rx) = mpsc::channel::<WriterCommand>(4096);
|
||||
let mut rpc_writer = RpcWriter {
|
||||
writer: hs.wr,
|
||||
key: hs.write_key,
|
||||
iv: hs.write_iv,
|
||||
seq_no: 0,
|
||||
crc_mode: hs.crc_mode,
|
||||
};
|
||||
let cancel_wr = cancel.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
cmd = rx.recv() => {
|
||||
match cmd {
|
||||
Some(WriterCommand::Data(payload)) => {
|
||||
if rpc_writer.send(&payload).await.is_err() { break; }
|
||||
}
|
||||
Some(WriterCommand::DataAndFlush(payload)) => {
|
||||
if rpc_writer.send_and_flush(&payload).await.is_err() { break; }
|
||||
}
|
||||
Some(WriterCommand::Close) | None => break,
|
||||
}
|
||||
}
|
||||
_ = cancel_wr.cancelled() => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
let writer = MeWriter {
|
||||
id: writer_id,
|
||||
addr,
|
||||
generation,
|
||||
tx: tx.clone(),
|
||||
cancel: cancel.clone(),
|
||||
degraded: degraded.clone(),
|
||||
draining: draining.clone(),
|
||||
draining_started_at_epoch_secs: draining_started_at_epoch_secs.clone(),
|
||||
allow_drain_fallback: allow_drain_fallback.clone(),
|
||||
};
|
||||
self.writers.write().await.push(writer.clone());
|
||||
self.conn_count.fetch_add(1, Ordering::Relaxed);
|
||||
self.writer_available.notify_one();
|
||||
|
||||
let reg = self.registry.clone();
|
||||
let writers_arc = self.writers_arc();
|
||||
let ping_tracker = self.ping_tracker.clone();
|
||||
let ping_tracker_reader = ping_tracker.clone();
|
||||
let rtt_stats = self.rtt_stats.clone();
|
||||
let stats_reader = self.stats.clone();
|
||||
let stats_ping = self.stats.clone();
|
||||
let pool = Arc::downgrade(self);
|
||||
let cancel_ping = cancel.clone();
|
||||
let tx_ping = tx.clone();
|
||||
let ping_tracker_ping = ping_tracker.clone();
|
||||
let cleanup_done = Arc::new(AtomicBool::new(false));
|
||||
let cleanup_for_reader = cleanup_done.clone();
|
||||
let cleanup_for_ping = cleanup_done.clone();
|
||||
let keepalive_enabled = self.me_keepalive_enabled;
|
||||
let keepalive_interval = self.me_keepalive_interval;
|
||||
let keepalive_jitter = self.me_keepalive_jitter;
|
||||
let cancel_reader_token = cancel.clone();
|
||||
let cancel_ping_token = cancel_ping.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let res = reader_loop(
|
||||
hs.rd,
|
||||
hs.read_key,
|
||||
hs.read_iv,
|
||||
hs.crc_mode,
|
||||
reg.clone(),
|
||||
BytesMut::new(),
|
||||
BytesMut::new(),
|
||||
tx.clone(),
|
||||
ping_tracker_reader,
|
||||
rtt_stats.clone(),
|
||||
stats_reader,
|
||||
writer_id,
|
||||
degraded.clone(),
|
||||
cancel_reader_token.clone(),
|
||||
)
|
||||
.await;
|
||||
if let Some(pool) = pool.upgrade()
|
||||
&& cleanup_for_reader
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
{
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
}
|
||||
if let Err(e) = res {
|
||||
warn!(error = %e, "ME reader ended");
|
||||
}
|
||||
let mut ws = writers_arc.write().await;
|
||||
ws.retain(|w| w.id != writer_id);
|
||||
info!(remaining = ws.len(), "Dead ME writer removed from pool");
|
||||
});
|
||||
|
||||
let pool_ping = Arc::downgrade(self);
|
||||
tokio::spawn(async move {
|
||||
let mut ping_id: i64 = rand::random::<i64>();
|
||||
// Per-writer jittered start to avoid phase sync.
|
||||
let startup_jitter = if keepalive_enabled {
|
||||
let jitter_cap_ms = keepalive_interval.as_millis() / 2;
|
||||
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
|
||||
Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
|
||||
} else {
|
||||
let jitter = rand::rng().random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
|
||||
let wait = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
|
||||
Duration::from_secs(wait)
|
||||
};
|
||||
tokio::select! {
|
||||
_ = cancel_ping_token.cancelled() => return,
|
||||
_ = tokio::time::sleep(startup_jitter) => {}
|
||||
}
|
||||
loop {
|
||||
let wait = if keepalive_enabled {
|
||||
let jitter_cap_ms = keepalive_interval.as_millis() / 2;
|
||||
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
|
||||
keepalive_interval + Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
|
||||
} else {
|
||||
let jitter = rand::rng().random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
|
||||
let secs = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
|
||||
Duration::from_secs(secs)
|
||||
};
|
||||
tokio::select! {
|
||||
_ = cancel_ping_token.cancelled() => {
|
||||
break;
|
||||
}
|
||||
_ = tokio::time::sleep(wait) => {}
|
||||
}
|
||||
let sent_id = ping_id;
|
||||
let mut p = Vec::with_capacity(12);
|
||||
p.extend_from_slice(&RPC_PING_U32.to_le_bytes());
|
||||
p.extend_from_slice(&sent_id.to_le_bytes());
|
||||
{
|
||||
let mut tracker = ping_tracker_ping.lock().await;
|
||||
let before = tracker.len();
|
||||
tracker.retain(|_, (ts, _)| ts.elapsed() < Duration::from_secs(120));
|
||||
let expired = before.saturating_sub(tracker.len());
|
||||
if expired > 0 {
|
||||
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
|
||||
}
|
||||
tracker.insert(sent_id, (std::time::Instant::now(), writer_id));
|
||||
}
|
||||
ping_id = ping_id.wrapping_add(1);
|
||||
stats_ping.increment_me_keepalive_sent();
|
||||
if tx_ping.send(WriterCommand::DataAndFlush(p)).await.is_err() {
|
||||
stats_ping.increment_me_keepalive_failed();
|
||||
debug!("ME ping failed, removing dead writer");
|
||||
cancel_ping.cancel();
|
||||
if let Some(pool) = pool_ping.upgrade()
|
||||
&& cleanup_for_ping
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
{
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn remove_writer_and_close_clients(self: &Arc<Self>, writer_id: u64) {
|
||||
let conns = self.remove_writer_only(writer_id).await;
|
||||
for bound in conns {
|
||||
let _ = self.registry.route(bound.conn_id, super::MeResponse::Close).await;
|
||||
let _ = self.registry.unregister(bound.conn_id).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn remove_writer_only(self: &Arc<Self>, writer_id: u64) -> Vec<BoundConn> {
|
||||
let mut close_tx: Option<mpsc::Sender<WriterCommand>> = None;
|
||||
let mut removed_addr: Option<SocketAddr> = None;
|
||||
let mut trigger_refill = false;
|
||||
{
|
||||
let mut ws = self.writers.write().await;
|
||||
if let Some(pos) = ws.iter().position(|w| w.id == writer_id) {
|
||||
let w = ws.remove(pos);
|
||||
let was_draining = w.draining.load(Ordering::Relaxed);
|
||||
if was_draining {
|
||||
self.stats.decrement_pool_drain_active();
|
||||
}
|
||||
self.stats.increment_me_writer_removed_total();
|
||||
w.cancel.cancel();
|
||||
removed_addr = Some(w.addr);
|
||||
trigger_refill = !was_draining;
|
||||
if trigger_refill {
|
||||
self.stats.increment_me_writer_removed_unexpected_total();
|
||||
}
|
||||
close_tx = Some(w.tx.clone());
|
||||
self.conn_count.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
if let Some(tx) = close_tx {
|
||||
let _ = tx.send(WriterCommand::Close).await;
|
||||
}
|
||||
if trigger_refill
|
||||
&& let Some(addr) = removed_addr
|
||||
{
|
||||
self.trigger_immediate_refill(addr);
|
||||
}
|
||||
self.rtt_stats.lock().await.remove(&writer_id);
|
||||
self.registry.writer_lost(writer_id).await
|
||||
}
|
||||
|
||||
pub(crate) async fn mark_writer_draining_with_timeout(
|
||||
self: &Arc<Self>,
|
||||
writer_id: u64,
|
||||
timeout: Option<Duration>,
|
||||
allow_drain_fallback: bool,
|
||||
) {
|
||||
let timeout = timeout.filter(|d| !d.is_zero());
|
||||
let found = {
|
||||
let mut ws = self.writers.write().await;
|
||||
if let Some(w) = ws.iter_mut().find(|w| w.id == writer_id) {
|
||||
let already_draining = w.draining.swap(true, Ordering::Relaxed);
|
||||
w.allow_drain_fallback
|
||||
.store(allow_drain_fallback, Ordering::Relaxed);
|
||||
w.draining_started_at_epoch_secs
|
||||
.store(Self::now_epoch_secs(), Ordering::Relaxed);
|
||||
if !already_draining {
|
||||
self.stats.increment_pool_drain_active();
|
||||
}
|
||||
w.draining.store(true, Ordering::Relaxed);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
if !found {
|
||||
return;
|
||||
}
|
||||
|
||||
let timeout_secs = timeout.map(|d| d.as_secs()).unwrap_or(0);
|
||||
debug!(
|
||||
writer_id,
|
||||
timeout_secs,
|
||||
allow_drain_fallback,
|
||||
"ME writer marked draining"
|
||||
);
|
||||
|
||||
let pool = Arc::downgrade(self);
|
||||
tokio::spawn(async move {
|
||||
let deadline = timeout.map(|t| Instant::now() + t);
|
||||
while let Some(p) = pool.upgrade() {
|
||||
if let Some(deadline_at) = deadline
|
||||
&& Instant::now() >= deadline_at
|
||||
{
|
||||
warn!(writer_id, "Drain timeout, force-closing");
|
||||
p.stats.increment_pool_force_close_total();
|
||||
let _ = p.remove_writer_and_close_clients(writer_id).await;
|
||||
break;
|
||||
}
|
||||
if p.registry.is_writer_empty(writer_id).await {
|
||||
let _ = p.remove_writer_only(writer_id).await;
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
pub(crate) async fn mark_writer_draining(self: &Arc<Self>, writer_id: u64) {
|
||||
self.mark_writer_draining_with_timeout(writer_id, Some(Duration::from_secs(300)), false)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub(super) fn writer_accepts_new_binding(&self, writer: &MeWriter) -> bool {
|
||||
if !writer.draining.load(Ordering::Relaxed) {
|
||||
return true;
|
||||
}
|
||||
if !writer.allow_drain_fallback.load(Ordering::Relaxed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let ttl_secs = self.me_pool_drain_ttl_secs.load(Ordering::Relaxed);
|
||||
if ttl_secs == 0 {
|
||||
return true;
|
||||
}
|
||||
|
||||
let started = writer.draining_started_at_epoch_secs.load(Ordering::Relaxed);
|
||||
if started == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
Self::now_epoch_secs().saturating_sub(started) <= ttl_secs
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue