mirror of
https://github.com/telemt/telemt.git
synced 2026-04-15 09:34:10 +03:00
Routed DC + Strict ME Writers
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
This commit is contained in:
@@ -147,7 +147,7 @@ async fn check_family(
|
||||
IpFamily::V6 => pool.proxy_map_v6.read().await,
|
||||
};
|
||||
for (dc, addrs) in map_guard.iter() {
|
||||
let entry = dc_endpoints.entry(dc.abs()).or_default();
|
||||
let entry = dc_endpoints.entry(*dc).or_default();
|
||||
for (ip, port) in addrs.iter().copied() {
|
||||
entry.push(SocketAddr::new(ip, port));
|
||||
}
|
||||
@@ -164,14 +164,15 @@ async fn check_family(
|
||||
adaptive_recover_until.clear();
|
||||
}
|
||||
|
||||
let mut live_addr_counts = HashMap::<SocketAddr, usize>::new();
|
||||
let mut live_writer_ids_by_addr = HashMap::<SocketAddr, Vec<u64>>::new();
|
||||
let mut live_addr_counts = HashMap::<(i32, SocketAddr), usize>::new();
|
||||
let mut live_writer_ids_by_addr = HashMap::<(i32, SocketAddr), Vec<u64>>::new();
|
||||
for writer in pool.writers.read().await.iter().filter(|w| {
|
||||
!w.draining.load(std::sync::atomic::Ordering::Relaxed)
|
||||
}) {
|
||||
*live_addr_counts.entry(writer.addr).or_insert(0) += 1;
|
||||
let key = (writer.writer_dc, writer.addr);
|
||||
*live_addr_counts.entry(key).or_insert(0) += 1;
|
||||
live_writer_ids_by_addr
|
||||
.entry(writer.addr)
|
||||
.entry(key)
|
||||
.or_default()
|
||||
.push(writer.id);
|
||||
}
|
||||
@@ -211,7 +212,7 @@ async fn check_family(
|
||||
});
|
||||
let alive = endpoints
|
||||
.iter()
|
||||
.map(|addr| *live_addr_counts.get(addr).unwrap_or(&0))
|
||||
.map(|addr| *live_addr_counts.get(&(dc, *addr)).unwrap_or(&0))
|
||||
.sum::<usize>();
|
||||
|
||||
if endpoints.len() == 1 && pool.single_endpoint_outage_mode_enabled() && alive == 0 {
|
||||
@@ -321,7 +322,10 @@ async fn check_family(
|
||||
if *inflight.get(&key).unwrap_or(&0) >= max_concurrent {
|
||||
continue;
|
||||
}
|
||||
if pool.has_refill_inflight_for_endpoints(&endpoints).await {
|
||||
if pool
|
||||
.has_refill_inflight_for_dc_key(super::pool::RefillDcKey { dc, family })
|
||||
.await
|
||||
{
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
@@ -373,7 +377,7 @@ async fn check_family(
|
||||
}
|
||||
let res = tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_endpoints_round_robin(&endpoints, rng.as_ref()),
|
||||
pool.connect_endpoints_round_robin(dc, &endpoints, rng.as_ref()),
|
||||
)
|
||||
.await;
|
||||
match res {
|
||||
@@ -484,12 +488,13 @@ fn adaptive_floor_class_max(
|
||||
}
|
||||
|
||||
fn list_writer_ids_for_endpoints(
|
||||
dc: i32,
|
||||
endpoints: &[SocketAddr],
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
) -> Vec<u64> {
|
||||
let mut out = Vec::<u64>::new();
|
||||
for endpoint in endpoints {
|
||||
if let Some(ids) = live_writer_ids_by_addr.get(endpoint) {
|
||||
if let Some(ids) = live_writer_ids_by_addr.get(&(dc, *endpoint)) {
|
||||
out.extend(ids.iter().copied());
|
||||
}
|
||||
}
|
||||
@@ -500,8 +505,8 @@ async fn build_family_floor_plan(
|
||||
pool: &Arc<MePool>,
|
||||
family: IpFamily,
|
||||
dc_endpoints: &HashMap<i32, Vec<SocketAddr>>,
|
||||
live_addr_counts: &HashMap<SocketAddr, usize>,
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_addr_counts: &HashMap<(i32, SocketAddr), usize>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
@@ -522,6 +527,7 @@ async fn build_family_floor_plan(
|
||||
let reduce_for_idle = should_reduce_floor_for_idle(
|
||||
pool,
|
||||
key,
|
||||
*dc,
|
||||
endpoints,
|
||||
live_writer_ids_by_addr,
|
||||
bound_clients_by_writer,
|
||||
@@ -551,10 +557,10 @@ async fn build_family_floor_plan(
|
||||
let target_required = desired_raw.clamp(min_required, max_required);
|
||||
let alive = endpoints
|
||||
.iter()
|
||||
.map(|endpoint| live_addr_counts.get(endpoint).copied().unwrap_or(0))
|
||||
.map(|endpoint| live_addr_counts.get(&(*dc, *endpoint)).copied().unwrap_or(0))
|
||||
.sum::<usize>();
|
||||
family_active_total = family_active_total.saturating_add(alive);
|
||||
let writer_ids = list_writer_ids_for_endpoints(endpoints, live_writer_ids_by_addr);
|
||||
let writer_ids = list_writer_ids_for_endpoints(*dc, endpoints, live_writer_ids_by_addr);
|
||||
let has_bound_clients = has_bound_clients_on_endpoint(&writer_ids, bound_clients_by_writer);
|
||||
|
||||
entries.push(DcFloorPlanEntry {
|
||||
@@ -654,14 +660,14 @@ async fn maybe_swap_idle_writer_for_cap(
|
||||
dc: i32,
|
||||
family: IpFamily,
|
||||
endpoints: &[SocketAddr],
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
writer_idle_since: &HashMap<u64, u64>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
) -> bool {
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let mut candidate: Option<(u64, SocketAddr, u64)> = None;
|
||||
for endpoint in endpoints {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(endpoint) else {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&(dc, *endpoint)) else {
|
||||
continue;
|
||||
};
|
||||
for writer_id in writer_ids {
|
||||
@@ -686,7 +692,12 @@ async fn maybe_swap_idle_writer_for_cap(
|
||||
return false;
|
||||
};
|
||||
|
||||
let connected = match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
|
||||
let connected = match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(error)) => {
|
||||
debug!(
|
||||
@@ -738,7 +749,7 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
endpoints: &[SocketAddr],
|
||||
alive: usize,
|
||||
required: usize,
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
writer_idle_since: &HashMap<u64, u64>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
@@ -757,7 +768,7 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let mut candidate: Option<(u64, SocketAddr, u64, u64)> = None;
|
||||
for endpoint in endpoints {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(endpoint) else {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&(dc, *endpoint)) else {
|
||||
continue;
|
||||
};
|
||||
for writer_id in writer_ids {
|
||||
@@ -787,7 +798,12 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
return;
|
||||
};
|
||||
|
||||
let rotate_ok = match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
|
||||
let rotate_ok = match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(error)) => {
|
||||
debug!(
|
||||
@@ -843,8 +859,9 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
async fn should_reduce_floor_for_idle(
|
||||
pool: &Arc<MePool>,
|
||||
key: (i32, IpFamily),
|
||||
dc: i32,
|
||||
endpoints: &[SocketAddr],
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
@@ -856,7 +873,7 @@ async fn should_reduce_floor_for_idle(
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
let writer_ids = list_writer_ids_for_endpoints(endpoints, live_writer_ids_by_addr);
|
||||
let writer_ids = list_writer_ids_for_endpoints(dc, endpoints, live_writer_ids_by_addr);
|
||||
let has_bound_clients = has_bound_clients_on_endpoint(&writer_ids, bound_clients_by_writer);
|
||||
if has_bound_clients {
|
||||
adaptive_idle_since.remove(&key);
|
||||
@@ -922,7 +939,12 @@ async fn recover_single_endpoint_outage(
|
||||
let attempt_ok = if bypass_quarantine {
|
||||
pool.stats
|
||||
.increment_me_single_endpoint_quarantine_bypass_total();
|
||||
match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
|
||||
match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_one_for_dc(endpoint, key.0, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(e)) => {
|
||||
debug!(
|
||||
@@ -948,7 +970,7 @@ async fn recover_single_endpoint_outage(
|
||||
let one_endpoint = [endpoint];
|
||||
match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_endpoints_round_robin(&one_endpoint, rng.as_ref()),
|
||||
pool.connect_endpoints_round_robin(key.0, &one_endpoint, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -1012,7 +1034,7 @@ async fn maybe_rotate_single_endpoint_shadow(
|
||||
endpoints: &[SocketAddr],
|
||||
alive: usize,
|
||||
required: usize,
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
shadow_rotate_deadline: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) {
|
||||
@@ -1045,7 +1067,7 @@ async fn maybe_rotate_single_endpoint_shadow(
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&endpoint) else {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&(dc, endpoint)) else {
|
||||
shadow_rotate_deadline.insert(key, now + Duration::from_secs(SHADOW_ROTATE_RETRY_SECS));
|
||||
return;
|
||||
};
|
||||
@@ -1071,7 +1093,12 @@ async fn maybe_rotate_single_endpoint_shadow(
|
||||
return;
|
||||
};
|
||||
|
||||
let rotate_ok = match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
|
||||
let rotate_ok = match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(e)) => {
|
||||
debug!(
|
||||
|
||||
Reference in New Issue
Block a user