ME Concurrency

Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
This commit is contained in:
Alexey 2026-02-19 16:02:50 +03:00
parent 820ed8d346
commit 2926b9f5c8
No known key found for this signature in database
2 changed files with 15 additions and 0 deletions

View File

@ -14,10 +14,12 @@ use super::MePool;
const HEALTH_INTERVAL_SECS: u64 = 1; const HEALTH_INTERVAL_SECS: u64 = 1;
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
const MAX_CONCURRENT_PER_DC_DEFAULT: usize = 1;
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) { pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new(); let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new();
let mut next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new(); let mut next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut inflight: HashMap<(i32, IpFamily), usize> = HashMap::new();
loop { loop {
tokio::time::sleep(Duration::from_secs(HEALTH_INTERVAL_SECS)).await; tokio::time::sleep(Duration::from_secs(HEALTH_INTERVAL_SECS)).await;
check_family( check_family(
@ -26,6 +28,7 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&rng, &rng,
&mut backoff, &mut backoff,
&mut next_attempt, &mut next_attempt,
&mut inflight,
) )
.await; .await;
check_family( check_family(
@ -34,6 +37,7 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&rng, &rng,
&mut backoff, &mut backoff,
&mut next_attempt, &mut next_attempt,
&mut inflight,
) )
.await; .await;
} }
@ -45,6 +49,7 @@ async fn check_family(
rng: &Arc<SecureRandom>, rng: &Arc<SecureRandom>,
backoff: &mut HashMap<(i32, IpFamily), u64>, backoff: &mut HashMap<(i32, IpFamily), u64>,
next_attempt: &mut HashMap<(i32, IpFamily), Instant>, next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
inflight: &mut HashMap<(i32, IpFamily), usize>,
) { ) {
let enabled = match family { let enabled = match family {
IpFamily::V4 => pool.decision.ipv4_me, IpFamily::V4 => pool.decision.ipv4_me,
@ -91,6 +96,12 @@ async fn check_family(
} }
} }
let max_concurrent = pool.me_reconnect_max_concurrent_per_dc.max(1) as usize;
if *inflight.get(&key).unwrap_or(&0) >= max_concurrent {
return;
}
*inflight.entry(key).or_insert(0) += 1;
let mut shuffled = dc_addrs.clone(); let mut shuffled = dc_addrs.clone();
shuffled.shuffle(&mut rand::rng()); shuffled.shuffle(&mut rand::rng());
let mut success = false; let mut success = false;
@ -126,5 +137,8 @@ async fn check_family(
next_attempt.insert(key, now + wait); next_attempt.insert(key, now + wait);
warn!(dc = %dc, backoff_ms = next_ms, ?family, "DC has no ME coverage, scheduled reconnect"); warn!(dc = %dc, backoff_ms = next_ms, ?family, "DC has no ME coverage, scheduled reconnect");
} }
if let Some(v) = inflight.get_mut(&key) {
*v = v.saturating_sub(1);
}
} }
} }

View File

@ -420,6 +420,7 @@ impl MePool {
let (tx, mut rx) = mpsc::channel::<WriterCommand>(4096); let (tx, mut rx) = mpsc::channel::<WriterCommand>(4096);
let tx_for_keepalive = tx.clone(); let tx_for_keepalive = tx.clone();
let keepalive_random = self.me_keepalive_payload_random; let keepalive_random = self.me_keepalive_payload_random;
let stats = self.stats.clone();
let mut rpc_writer = RpcWriter { let mut rpc_writer = RpcWriter {
writer: hs.wr, writer: hs.wr,
key: hs.write_key, key: hs.write_key,