mirror of
https://github.com/telemt/telemt.git
synced 2026-04-18 02:54:10 +03:00
ME Probe parallelized
This commit is contained in:
@@ -50,6 +50,8 @@ pub struct MePool {
|
||||
pub(super) nat_probe: bool,
|
||||
pub(super) nat_stun: Option<String>,
|
||||
pub(super) nat_stun_servers: Vec<String>,
|
||||
pub(super) nat_stun_live_servers: Arc<RwLock<Vec<String>>>,
|
||||
pub(super) nat_probe_concurrency: usize,
|
||||
pub(super) detected_ipv6: Option<Ipv6Addr>,
|
||||
pub(super) nat_probe_attempts: std::sync::atomic::AtomicU8,
|
||||
pub(super) nat_probe_disabled: std::sync::atomic::AtomicBool,
|
||||
@@ -120,6 +122,7 @@ impl MePool {
|
||||
nat_probe: bool,
|
||||
nat_stun: Option<String>,
|
||||
nat_stun_servers: Vec<String>,
|
||||
nat_probe_concurrency: usize,
|
||||
detected_ipv6: Option<Ipv6Addr>,
|
||||
me_one_retry: u8,
|
||||
me_one_timeout_ms: u64,
|
||||
@@ -162,6 +165,8 @@ impl MePool {
|
||||
nat_probe,
|
||||
nat_stun,
|
||||
nat_stun_servers,
|
||||
nat_stun_live_servers: Arc::new(RwLock::new(Vec::new())),
|
||||
nat_probe_concurrency: nat_probe_concurrency.max(1),
|
||||
detected_ipv6,
|
||||
nat_probe_attempts: std::sync::atomic::AtomicU8::new(0),
|
||||
nat_probe_disabled: std::sync::atomic::AtomicBool::new(false),
|
||||
@@ -241,6 +246,9 @@ impl MePool {
|
||||
pub fn reset_stun_state(&self) {
|
||||
self.nat_probe_attempts.store(0, Ordering::Relaxed);
|
||||
self.nat_probe_disabled.store(false, Ordering::Relaxed);
|
||||
if let Ok(mut live) = self.nat_stun_live_servers.try_write() {
|
||||
live.clear();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn translate_our_addr(&self, addr: SocketAddr) -> SocketAddr {
|
||||
@@ -896,10 +904,25 @@ impl MePool {
|
||||
|
||||
for family in family_order {
|
||||
let map = self.proxy_map_for_family(family).await;
|
||||
let dc_addrs: Vec<(i32, Vec<(IpAddr, u16)>)> = map
|
||||
.iter()
|
||||
.map(|(dc, addrs)| (*dc, addrs.clone()))
|
||||
let mut grouped_dc_addrs: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
|
||||
for (dc, addrs) in map {
|
||||
if addrs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
grouped_dc_addrs
|
||||
.entry(dc.abs())
|
||||
.or_default()
|
||||
.extend(addrs);
|
||||
}
|
||||
let mut dc_addrs: Vec<(i32, Vec<(IpAddr, u16)>)> = grouped_dc_addrs
|
||||
.into_iter()
|
||||
.map(|(dc, mut addrs)| {
|
||||
addrs.sort_unstable();
|
||||
addrs.dedup();
|
||||
(dc, addrs)
|
||||
})
|
||||
.collect();
|
||||
dc_addrs.sort_unstable_by_key(|(dc, _)| *dc);
|
||||
|
||||
// Ensure at least one connection per DC; run DCs in parallel.
|
||||
let mut join = tokio::task::JoinSet::new();
|
||||
@@ -923,38 +946,49 @@ impl MePool {
|
||||
return Err(ProxyError::Proxy("Too many ME DC init failures, falling back to direct".into()));
|
||||
}
|
||||
|
||||
// Additional connections up to pool_size total (round-robin across DCs), staggered to de-phase lifecycles.
|
||||
if self.me_warmup_stagger_enabled {
|
||||
for (dc, addrs) in dc_addrs.iter() {
|
||||
for (ip, port) in addrs {
|
||||
if self.connection_count() >= pool_size {
|
||||
break;
|
||||
// Warm reserve writers asynchronously so startup does not block after first working pool is ready.
|
||||
let pool = Arc::clone(self);
|
||||
let rng_clone = Arc::clone(rng);
|
||||
let dc_addrs_bg = dc_addrs.clone();
|
||||
tokio::spawn(async move {
|
||||
if pool.me_warmup_stagger_enabled {
|
||||
for (dc, addrs) in dc_addrs_bg.iter() {
|
||||
for (ip, port) in addrs {
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
let jitter = rand::rng()
|
||||
.random_range(0..=pool.me_warmup_step_jitter.as_millis() as u64);
|
||||
let delay_ms = pool.me_warmup_step_delay.as_millis() as u64 + jitter;
|
||||
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
|
||||
if let Err(e) = pool.connect_one(addr, rng_clone.as_ref()).await {
|
||||
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed (staggered)");
|
||||
}
|
||||
}
|
||||
}
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
let jitter = rand::rng().random_range(0..=self.me_warmup_step_jitter.as_millis() as u64);
|
||||
let delay_ms = self.me_warmup_step_delay.as_millis() as u64 + jitter;
|
||||
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
|
||||
if let Err(e) = self.connect_one(addr, rng.as_ref()).await {
|
||||
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed (staggered)");
|
||||
} else {
|
||||
for (dc, addrs) in dc_addrs_bg.iter() {
|
||||
for (ip, port) in addrs {
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
if let Err(e) = pool.connect_one(addr, rng_clone.as_ref()).await {
|
||||
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed");
|
||||
}
|
||||
}
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (dc, addrs) in dc_addrs.iter() {
|
||||
for (ip, port) in addrs {
|
||||
if self.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
if let Err(e) = self.connect_one(addr, rng.as_ref()).await {
|
||||
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed");
|
||||
}
|
||||
}
|
||||
if self.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
debug!(
|
||||
target_pool_size = pool_size,
|
||||
current_pool_size = pool.connection_count(),
|
||||
"Background ME reserve warmup finished"
|
||||
);
|
||||
});
|
||||
|
||||
if !self.decision.effective_multipath && self.connection_count() > 0 {
|
||||
break;
|
||||
@@ -964,6 +998,10 @@ impl MePool {
|
||||
if self.writers.read().await.is_empty() {
|
||||
return Err(ProxyError::Proxy("No ME connections".into()));
|
||||
}
|
||||
info!(
|
||||
active_writers = self.connection_count(),
|
||||
"ME primary pool ready; reserve warmup continues in background"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use std::collections::HashMap;
|
||||
use std::net::{IpAddr, Ipv4Addr};
|
||||
use std::time::Duration;
|
||||
|
||||
use tracing::{info, warn};
|
||||
use tokio::task::JoinSet;
|
||||
use tokio::time::timeout;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::error::{ProxyError, Result};
|
||||
use crate::network::probe::is_bogon;
|
||||
@@ -10,6 +13,8 @@ use crate::network::stun::{stun_probe_dual, IpFamily, StunProbeResult};
|
||||
use super::MePool;
|
||||
use std::time::Instant;
|
||||
|
||||
const STUN_BATCH_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub async fn stun_probe(stun_addr: Option<String>) -> Result<crate::network::stun::DualStunResult> {
|
||||
let stun_addr = stun_addr.unwrap_or_else(|| "stun.l.google.com:19302".to_string());
|
||||
@@ -22,6 +27,99 @@ pub async fn detect_public_ip() -> Option<IpAddr> {
|
||||
}
|
||||
|
||||
impl MePool {
|
||||
fn configured_stun_servers(&self) -> Vec<String> {
|
||||
if !self.nat_stun_servers.is_empty() {
|
||||
return self.nat_stun_servers.clone();
|
||||
}
|
||||
if let Some(s) = &self.nat_stun {
|
||||
return vec![s.clone()];
|
||||
}
|
||||
vec!["stun.l.google.com:19302".to_string()]
|
||||
}
|
||||
|
||||
async fn probe_stun_batch_for_family(
|
||||
&self,
|
||||
servers: &[String],
|
||||
family: IpFamily,
|
||||
attempt: u8,
|
||||
) -> (Vec<String>, Option<std::net::SocketAddr>) {
|
||||
let mut join_set = JoinSet::new();
|
||||
let mut next_idx = 0usize;
|
||||
let mut live_servers = Vec::new();
|
||||
let mut best_by_ip: HashMap<IpAddr, (usize, std::net::SocketAddr)> = HashMap::new();
|
||||
let concurrency = self.nat_probe_concurrency.max(1);
|
||||
|
||||
while next_idx < servers.len() || !join_set.is_empty() {
|
||||
while next_idx < servers.len() && join_set.len() < concurrency {
|
||||
let stun_addr = servers[next_idx].clone();
|
||||
next_idx += 1;
|
||||
join_set.spawn(async move {
|
||||
let res = timeout(STUN_BATCH_TIMEOUT, stun_probe_dual(&stun_addr)).await;
|
||||
(stun_addr, res)
|
||||
});
|
||||
}
|
||||
|
||||
let Some(task) = join_set.join_next().await else {
|
||||
break;
|
||||
};
|
||||
|
||||
match task {
|
||||
Ok((stun_addr, Ok(Ok(res)))) => {
|
||||
let picked: Option<StunProbeResult> = match family {
|
||||
IpFamily::V4 => res.v4,
|
||||
IpFamily::V6 => res.v6,
|
||||
};
|
||||
|
||||
if let Some(result) = picked {
|
||||
live_servers.push(stun_addr.clone());
|
||||
let entry = best_by_ip
|
||||
.entry(result.reflected_addr.ip())
|
||||
.or_insert((0, result.reflected_addr));
|
||||
entry.0 += 1;
|
||||
debug!(
|
||||
local = %result.local_addr,
|
||||
reflected = %result.reflected_addr,
|
||||
family = ?family,
|
||||
stun = %stun_addr,
|
||||
"NAT probe: reflected address"
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok((stun_addr, Ok(Err(e)))) => {
|
||||
debug!(
|
||||
error = %e,
|
||||
stun = %stun_addr,
|
||||
attempt = attempt + 1,
|
||||
"NAT probe failed, trying next server"
|
||||
);
|
||||
}
|
||||
Ok((stun_addr, Err(_))) => {
|
||||
debug!(
|
||||
stun = %stun_addr,
|
||||
attempt = attempt + 1,
|
||||
"NAT probe timeout, trying next server"
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(
|
||||
error = %e,
|
||||
attempt = attempt + 1,
|
||||
"NAT probe task join failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
live_servers.sort_unstable();
|
||||
live_servers.dedup();
|
||||
let best_reflected = best_by_ip
|
||||
.into_values()
|
||||
.max_by_key(|(count, _)| *count)
|
||||
.map(|(_, addr)| addr);
|
||||
|
||||
(live_servers, best_reflected)
|
||||
}
|
||||
|
||||
pub(super) fn translate_ip_for_nat(&self, ip: IpAddr) -> IpAddr {
|
||||
let nat_ip = self
|
||||
.nat_ip_cfg
|
||||
@@ -128,39 +226,51 @@ impl MePool {
|
||||
}
|
||||
|
||||
let attempt = self.nat_probe_attempts.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
let servers = if !self.nat_stun_servers.is_empty() {
|
||||
self.nat_stun_servers.clone()
|
||||
} else if let Some(s) = &self.nat_stun {
|
||||
vec![s.clone()]
|
||||
let configured_servers = self.configured_stun_servers();
|
||||
let live_snapshot = self.nat_stun_live_servers.read().await.clone();
|
||||
let primary_servers = if live_snapshot.is_empty() {
|
||||
configured_servers.clone()
|
||||
} else {
|
||||
vec!["stun.l.google.com:19302".to_string()]
|
||||
live_snapshot
|
||||
};
|
||||
|
||||
for stun_addr in servers {
|
||||
match stun_probe_dual(&stun_addr).await {
|
||||
Ok(res) => {
|
||||
let picked: Option<StunProbeResult> = match family {
|
||||
IpFamily::V4 => res.v4,
|
||||
IpFamily::V6 => res.v6,
|
||||
};
|
||||
if let Some(result) = picked {
|
||||
info!(local = %result.local_addr, reflected = %result.reflected_addr, family = ?family, stun = %stun_addr, "NAT probe: reflected address");
|
||||
self.nat_probe_attempts.store(0, std::sync::atomic::Ordering::Relaxed);
|
||||
if let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
|
||||
let slot = match family {
|
||||
IpFamily::V4 => &mut cache.v4,
|
||||
IpFamily::V6 => &mut cache.v6,
|
||||
};
|
||||
*slot = Some((Instant::now(), result.reflected_addr));
|
||||
}
|
||||
return Some(result.reflected_addr);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, stun = %stun_addr, attempt = attempt + 1, "NAT probe failed, trying next server");
|
||||
}
|
||||
}
|
||||
let (mut live_servers, mut selected_reflected) = self
|
||||
.probe_stun_batch_for_family(&primary_servers, family, attempt)
|
||||
.await;
|
||||
|
||||
if selected_reflected.is_none() && !configured_servers.is_empty() && primary_servers != configured_servers {
|
||||
let (rediscovered_live, rediscovered_reflected) = self
|
||||
.probe_stun_batch_for_family(&configured_servers, family, attempt)
|
||||
.await;
|
||||
live_servers = rediscovered_live;
|
||||
selected_reflected = rediscovered_reflected;
|
||||
}
|
||||
|
||||
let live_server_count = live_servers.len();
|
||||
if !live_servers.is_empty() {
|
||||
*self.nat_stun_live_servers.write().await = live_servers;
|
||||
} else {
|
||||
self.nat_stun_live_servers.write().await.clear();
|
||||
}
|
||||
|
||||
if let Some(reflected_addr) = selected_reflected {
|
||||
self.nat_probe_attempts.store(0, std::sync::atomic::Ordering::Relaxed);
|
||||
info!(
|
||||
family = ?family,
|
||||
live_servers = live_server_count,
|
||||
"STUN-Quorum reached, IP: {}",
|
||||
reflected_addr.ip()
|
||||
);
|
||||
if let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
|
||||
let slot = match family {
|
||||
IpFamily::V4 => &mut cache.v4,
|
||||
IpFamily::V6 => &mut cache.v6,
|
||||
};
|
||||
*slot = Some((Instant::now(), reflected_addr));
|
||||
}
|
||||
return Some(reflected_addr);
|
||||
}
|
||||
|
||||
let backoff = Duration::from_secs(60 * 2u64.pow((attempt as u32).min(6)));
|
||||
*self.stun_backoff_until.write().await = Some(Instant::now() + backoff);
|
||||
None
|
||||
|
||||
Reference in New Issue
Block a user