Merge pull request #206 from telemt/flow

Flush on Response + Hotpath tunings + Reuseport Checker
This commit is contained in:
Alexey 2026-02-23 01:03:15 +03:00 committed by GitHub
commit 69be44b2b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 440 additions and 143 deletions

View File

@ -213,6 +213,7 @@ listen_addr_ipv6 = "::"
[[server.listeners]] [[server.listeners]]
ip = "0.0.0.0" ip = "0.0.0.0"
# reuse_allow = false # Set true only when intentionally running multiple telemt instances on same port
[[server.listeners]] [[server.listeners]]
ip = "::" ip = "::"

View File

@ -227,6 +227,7 @@ impl ProxyConfig {
announce: None, announce: None,
announce_ip: None, announce_ip: None,
proxy_protocol: None, proxy_protocol: None,
reuse_allow: false,
}); });
} }
if let Some(ipv6_str) = &config.server.listen_addr_ipv6 { if let Some(ipv6_str) = &config.server.listen_addr_ipv6 {
@ -236,6 +237,7 @@ impl ProxyConfig {
announce: None, announce: None,
announce_ip: None, announce_ip: None,
proxy_protocol: None, proxy_protocol: None,
reuse_allow: false,
}); });
} }
} }

View File

@ -603,6 +603,10 @@ pub struct ListenerConfig {
/// Per-listener PROXY protocol override. When set, overrides global server.proxy_protocol. /// Per-listener PROXY protocol override. When set, overrides global server.proxy_protocol.
#[serde(default)] #[serde(default)]
pub proxy_protocol: Option<bool>, pub proxy_protocol: Option<bool>,
/// Allow multiple telemt instances to listen on the same IP:port (SO_REUSEPORT).
/// Default is false for safety.
#[serde(default)]
pub reuse_allow: bool,
} }
// ============= ShowLink ============= // ============= ShowLink =============

View File

@ -38,7 +38,7 @@ use crate::stream::BufferPool;
use crate::transport::middle_proxy::{ use crate::transport::middle_proxy::{
MePool, fetch_proxy_config, run_me_ping, MePingFamily, MePingSample, format_sample_line, MePool, fetch_proxy_config, run_me_ping, MePingFamily, MePingSample, format_sample_line,
}; };
use crate::transport::{ListenOptions, UpstreamManager, create_listener}; use crate::transport::{ListenOptions, UpstreamManager, create_listener, find_listener_processes};
use crate::tls_front::TlsFrontCache; use crate::tls_front::TlsFrontCache;
fn parse_cli() -> (String, bool, Option<String>) { fn parse_cli() -> (String, bool, Option<String>) {
@ -715,6 +715,7 @@ match crate::transport::middle_proxy::fetch_proxy_secret(proxy_secret_path).awai
continue; continue;
} }
let options = ListenOptions { let options = ListenOptions {
reuse_port: listener_conf.reuse_allow,
ipv6_only: listener_conf.ip.is_ipv6(), ipv6_only: listener_conf.ip.is_ipv6(),
..Default::default() ..Default::default()
}; };
@ -753,7 +754,33 @@ match crate::transport::middle_proxy::fetch_proxy_secret(proxy_secret_path).awai
listeners.push((listener, listener_proxy_protocol)); listeners.push((listener, listener_proxy_protocol));
} }
Err(e) => { Err(e) => {
error!("Failed to bind to {}: {}", addr, e); if e.kind() == std::io::ErrorKind::AddrInUse {
let owners = find_listener_processes(addr);
if owners.is_empty() {
error!(
%addr,
"Failed to bind: address already in use (owner process unresolved)"
);
} else {
for owner in owners {
error!(
%addr,
pid = owner.pid,
process = %owner.process,
"Failed to bind: address already in use"
);
}
}
if !listener_conf.reuse_allow {
error!(
%addr,
"reuse_allow=false; set [[server.listeners]].reuse_allow=true to allow multi-instance listening"
);
}
} else {
error!("Failed to bind to {}: {}", addr, e);
}
} }
} }
} }

View File

@ -159,10 +159,13 @@ pub const MAX_TLS_CHUNK_SIZE: usize = 16384 + 256;
/// Generate padding length for Secure Intermediate protocol. /// Generate padding length for Secure Intermediate protocol.
/// Total (data + padding) must not be divisible by 4 per MTProto spec. /// Total (data + padding) must not be divisible by 4 per MTProto spec.
pub fn secure_padding_len(data_len: usize, rng: &SecureRandom) -> usize { pub fn secure_padding_len(data_len: usize, rng: &SecureRandom) -> usize {
if data_len % 4 == 0 { let rem = data_len % 4;
(rng.range(3) + 1) as usize // 1-3 match rem {
} else { 0 => (rng.range(3) + 1) as usize, // {1, 2, 3}
rng.range(4) as usize // 0-3 1 => rng.range(3) as usize, // {0, 1, 2}
2 => [0usize, 1, 3][rng.range(3) as usize], // {0, 1, 3}
3 => [0usize, 2, 3][rng.range(3) as usize], // {0, 2, 3}
_ => unreachable!(),
} }
} }
@ -332,4 +335,24 @@ mod tests {
assert_eq!(TG_DATACENTERS_V4.len(), 5); assert_eq!(TG_DATACENTERS_V4.len(), 5);
assert_eq!(TG_DATACENTERS_V6.len(), 5); assert_eq!(TG_DATACENTERS_V6.len(), 5);
} }
#[test]
fn secure_padding_never_produces_aligned_total() {
let rng = SecureRandom::new();
for data_len in 0..1000 {
for _ in 0..100 {
let padding = secure_padding_len(data_len, &rng);
assert!(
padding <= 3,
"padding out of range: data_len={data_len}, padding={padding}"
);
assert_ne!(
(data_len + padding) % 4,
0,
"invariant violated: data_len={data_len}, padding={padding}, total={}",
data_len + padding
);
}
}
}
} }

View File

@ -2,7 +2,7 @@ use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
use tokio::sync::oneshot; use tokio::sync::{mpsc, oneshot};
use tracing::{debug, info, trace, warn}; use tracing::{debug, info, trace, warn};
use crate::config::ProxyConfig; use crate::config::ProxyConfig;
@ -14,6 +14,11 @@ use crate::stats::Stats;
use crate::stream::{BufferPool, CryptoReader, CryptoWriter}; use crate::stream::{BufferPool, CryptoReader, CryptoWriter};
use crate::transport::middle_proxy::{MePool, MeResponse, proto_flags_for_tag}; use crate::transport::middle_proxy::{MePool, MeResponse, proto_flags_for_tag};
enum C2MeCommand {
Data { payload: Vec<u8>, flags: u32 },
Close,
}
pub(crate) async fn handle_via_middle_proxy<R, W>( pub(crate) async fn handle_via_middle_proxy<R, W>(
mut crypto_reader: CryptoReader<R>, mut crypto_reader: CryptoReader<R>,
crypto_writer: CryptoWriter<W>, crypto_writer: CryptoWriter<W>,
@ -59,6 +64,30 @@ where
let frame_limit = config.general.max_client_frame; let frame_limit = config.general.max_client_frame;
let (c2me_tx, mut c2me_rx) = mpsc::channel::<C2MeCommand>(1024);
let me_pool_c2me = me_pool.clone();
let c2me_sender = tokio::spawn(async move {
while let Some(cmd) = c2me_rx.recv().await {
match cmd {
C2MeCommand::Data { payload, flags } => {
me_pool_c2me.send_proxy_req(
conn_id,
success.dc_idx,
peer,
translated_local_addr,
&payload,
flags,
).await?;
}
C2MeCommand::Close => {
let _ = me_pool_c2me.send_close(conn_id).await;
return Ok(());
}
}
}
Ok(())
});
let (stop_tx, mut stop_rx) = oneshot::channel::<()>(); let (stop_tx, mut stop_rx) = oneshot::channel::<()>();
let mut me_rx_task = me_rx; let mut me_rx_task = me_rx;
let stats_clone = stats.clone(); let stats_clone = stats.clone();
@ -74,6 +103,34 @@ where
trace!(conn_id, bytes = data.len(), flags, "ME->C data"); trace!(conn_id, bytes = data.len(), flags, "ME->C data");
stats_clone.add_user_octets_to(&user_clone, data.len() as u64); stats_clone.add_user_octets_to(&user_clone, data.len() as u64);
write_client_payload(&mut writer, proto_tag, flags, &data, rng_clone.as_ref()).await?; write_client_payload(&mut writer, proto_tag, flags, &data, rng_clone.as_ref()).await?;
// Drain all immediately queued ME responses and flush once.
while let Ok(next) = me_rx_task.try_recv() {
match next {
MeResponse::Data { flags, data } => {
trace!(conn_id, bytes = data.len(), flags, "ME->C data (batched)");
stats_clone.add_user_octets_to(&user_clone, data.len() as u64);
write_client_payload(
&mut writer,
proto_tag,
flags,
&data,
rng_clone.as_ref(),
).await?;
}
MeResponse::Ack(confirm) => {
trace!(conn_id, confirm, "ME->C quickack (batched)");
write_client_ack(&mut writer, proto_tag, confirm).await?;
}
MeResponse::Close => {
debug!(conn_id, "ME sent close (batched)");
let _ = writer.flush().await;
return Ok(());
}
}
}
writer.flush().await.map_err(ProxyError::Io)?;
} }
Some(MeResponse::Ack(confirm)) => { Some(MeResponse::Ack(confirm)) => {
trace!(conn_id, confirm, "ME->C quickack"); trace!(conn_id, confirm, "ME->C quickack");
@ -81,6 +138,7 @@ where
} }
Some(MeResponse::Close) => { Some(MeResponse::Close) => {
debug!(conn_id, "ME sent close"); debug!(conn_id, "ME sent close");
let _ = writer.flush().await;
return Ok(()); return Ok(());
} }
None => { None => {
@ -99,8 +157,15 @@ where
let mut main_result: Result<()> = Ok(()); let mut main_result: Result<()> = Ok(());
let mut client_closed = false; let mut client_closed = false;
let mut frame_counter: u64 = 0;
loop { loop {
match read_client_payload(&mut crypto_reader, proto_tag, frame_limit, &user).await { match read_client_payload(
&mut crypto_reader,
proto_tag,
frame_limit,
&user,
&mut frame_counter,
).await {
Ok(Some((payload, quickack))) => { Ok(Some((payload, quickack))) => {
trace!(conn_id, bytes = payload.len(), "C->ME frame"); trace!(conn_id, bytes = payload.len(), "C->ME frame");
stats.add_user_octets_from(&user, payload.len() as u64); stats.add_user_octets_from(&user, payload.len() as u64);
@ -111,22 +176,20 @@ where
if payload.len() >= 8 && payload[..8].iter().all(|b| *b == 0) { if payload.len() >= 8 && payload[..8].iter().all(|b| *b == 0) {
flags |= RPC_FLAG_NOT_ENCRYPTED; flags |= RPC_FLAG_NOT_ENCRYPTED;
} }
if let Err(e) = me_pool.send_proxy_req( // Keep client read loop lightweight: route heavy ME send path via a dedicated task.
conn_id, if c2me_tx
success.dc_idx, .send(C2MeCommand::Data { payload, flags })
peer, .await
translated_local_addr, .is_err()
&payload, {
flags, main_result = Err(ProxyError::Proxy("ME sender channel closed".into()));
).await {
main_result = Err(e);
break; break;
} }
} }
Ok(None) => { Ok(None) => {
debug!(conn_id, "Client EOF"); debug!(conn_id, "Client EOF");
client_closed = true; client_closed = true;
let _ = me_pool.send_close(conn_id).await; let _ = c2me_tx.send(C2MeCommand::Close).await;
break; break;
} }
Err(e) => { Err(e) => {
@ -136,6 +199,11 @@ where
} }
} }
drop(c2me_tx);
let c2me_result = c2me_sender
.await
.unwrap_or_else(|e| Err(ProxyError::Proxy(format!("ME sender join error: {e}"))));
let _ = stop_tx.send(()); let _ = stop_tx.send(());
let mut writer_result = me_writer let mut writer_result = me_writer
.await .await
@ -151,10 +219,11 @@ where
} }
} }
let result = match (main_result, writer_result) { let result = match (main_result, c2me_result, writer_result) {
(Ok(()), Ok(())) => Ok(()), (Ok(()), Ok(()), Ok(())) => Ok(()),
(Err(e), _) => Err(e), (Err(e), _, _) => Err(e),
(_, Err(e)) => Err(e), (_, Err(e), _) => Err(e),
(_, _, Err(e)) => Err(e),
}; };
debug!(user = %user, conn_id, "ME relay cleanup"); debug!(user = %user, conn_id, "ME relay cleanup");
@ -168,73 +237,111 @@ async fn read_client_payload<R>(
proto_tag: ProtoTag, proto_tag: ProtoTag,
max_frame: usize, max_frame: usize,
user: &str, user: &str,
frame_counter: &mut u64,
) -> Result<Option<(Vec<u8>, bool)>> ) -> Result<Option<(Vec<u8>, bool)>>
where where
R: AsyncRead + Unpin + Send + 'static, R: AsyncRead + Unpin + Send + 'static,
{ {
let (len, quickack) = match proto_tag { loop {
ProtoTag::Abridged => { let (len, quickack, raw_len_bytes) = match proto_tag {
let mut first = [0u8; 1]; ProtoTag::Abridged => {
match client_reader.read_exact(&mut first).await { let mut first = [0u8; 1];
Ok(_) => {} match client_reader.read_exact(&mut first).await {
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None), Ok(_) => {}
Err(e) => return Err(ProxyError::Io(e)), Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None),
Err(e) => return Err(ProxyError::Io(e)),
}
let quickack = (first[0] & 0x80) != 0;
let len_words = if (first[0] & 0x7f) == 0x7f {
let mut ext = [0u8; 3];
client_reader
.read_exact(&mut ext)
.await
.map_err(ProxyError::Io)?;
u32::from_le_bytes([ext[0], ext[1], ext[2], 0]) as usize
} else {
(first[0] & 0x7f) as usize
};
let len = len_words
.checked_mul(4)
.ok_or_else(|| ProxyError::Proxy("Abridged frame length overflow".into()))?;
(len, quickack, None)
} }
ProtoTag::Intermediate | ProtoTag::Secure => {
let quickack = (first[0] & 0x80) != 0; let mut len_buf = [0u8; 4];
let len_words = if (first[0] & 0x7f) == 0x7f { match client_reader.read_exact(&mut len_buf).await {
let mut ext = [0u8; 3]; Ok(_) => {}
client_reader Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None),
.read_exact(&mut ext) Err(e) => return Err(ProxyError::Io(e)),
.await }
.map_err(ProxyError::Io)?; let quickack = (len_buf[3] & 0x80) != 0;
u32::from_le_bytes([ext[0], ext[1], ext[2], 0]) as usize (
} else { (u32::from_le_bytes(len_buf) & 0x7fff_ffff) as usize,
(first[0] & 0x7f) as usize quickack,
}; Some(len_buf),
)
let len = len_words
.checked_mul(4)
.ok_or_else(|| ProxyError::Proxy("Abridged frame length overflow".into()))?;
(len, quickack)
}
ProtoTag::Intermediate | ProtoTag::Secure => {
let mut len_buf = [0u8; 4];
match client_reader.read_exact(&mut len_buf).await {
Ok(_) => {}
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None),
Err(e) => return Err(ProxyError::Io(e)),
} }
let quickack = (len_buf[3] & 0x80) != 0; };
((u32::from_le_bytes(len_buf) & 0x7fff_ffff) as usize, quickack)
if len == 0 {
continue;
} }
}; if len < 4 && proto_tag != ProtoTag::Abridged {
warn!(
if len > max_frame { user = %user,
warn!( len,
user = %user, proto = ?proto_tag,
raw_len = len, "Frame too small — corrupt or probe"
raw_len_hex = format_args!("0x{:08x}", len), );
proto = ?proto_tag, return Err(ProxyError::Proxy(format!("Frame too small: {len}")));
"Frame too large — possible crypto desync or TLS record error"
);
return Err(ProxyError::Proxy(format!("Frame too large: {len} (max {max_frame})")));
}
let mut payload = vec![0u8; len];
client_reader
.read_exact(&mut payload)
.await
.map_err(ProxyError::Io)?;
// Secure Intermediate: remove random padding (last len%4 bytes)
if proto_tag == ProtoTag::Secure {
let rem = len % 4;
if rem != 0 && payload.len() >= rem {
payload.truncate(len - rem);
} }
if len > max_frame {
let len_buf = raw_len_bytes.unwrap_or((len as u32).to_le_bytes());
let looks_like_tls = raw_len_bytes
.map(|b| b[0] == 0x16 && b[1] == 0x03)
.unwrap_or(false);
let looks_like_http = raw_len_bytes
.map(|b| matches!(b[0], b'G' | b'P' | b'H' | b'C' | b'D'))
.unwrap_or(false);
warn!(
user = %user,
raw_len = len,
raw_len_hex = format_args!("0x{:08x}", len),
raw_bytes = format_args!(
"{:02x} {:02x} {:02x} {:02x}",
len_buf[0], len_buf[1], len_buf[2], len_buf[3]
),
proto = ?proto_tag,
tls_like = looks_like_tls,
http_like = looks_like_http,
frames_ok = *frame_counter,
"Frame too large — crypto desync forensics"
);
return Err(ProxyError::Proxy(format!(
"Frame too large: {len} (max {max_frame}), frames_ok={}",
*frame_counter
)));
}
let mut payload = vec![0u8; len];
client_reader
.read_exact(&mut payload)
.await
.map_err(ProxyError::Io)?;
// Secure Intermediate: remove random padding (last len%4 bytes)
if proto_tag == ProtoTag::Secure {
let rem = len % 4;
if rem != 0 && payload.len() >= rem {
payload.truncate(len - rem);
}
}
*frame_counter += 1;
return Ok(Some((payload, quickack)));
} }
Ok(Some((payload, quickack)))
} }
async fn write_client_payload<W>( async fn write_client_payload<W>(
@ -264,8 +371,11 @@ where
if quickack { if quickack {
first |= 0x80; first |= 0x80;
} }
let mut frame_buf = Vec::with_capacity(1 + data.len());
frame_buf.push(first);
frame_buf.extend_from_slice(data);
client_writer client_writer
.write_all(&[first]) .write_all(&frame_buf)
.await .await
.map_err(ProxyError::Io)?; .map_err(ProxyError::Io)?;
} else if len_words < (1 << 24) { } else if len_words < (1 << 24) {
@ -274,8 +384,11 @@ where
first |= 0x80; first |= 0x80;
} }
let lw = (len_words as u32).to_le_bytes(); let lw = (len_words as u32).to_le_bytes();
let mut frame_buf = Vec::with_capacity(4 + data.len());
frame_buf.extend_from_slice(&[first, lw[0], lw[1], lw[2]]);
frame_buf.extend_from_slice(data);
client_writer client_writer
.write_all(&[first, lw[0], lw[1], lw[2]]) .write_all(&frame_buf)
.await .await
.map_err(ProxyError::Io)?; .map_err(ProxyError::Io)?;
} else { } else {
@ -284,11 +397,6 @@ where
data.len() data.len()
))); )));
} }
client_writer
.write_all(data)
.await
.map_err(ProxyError::Io)?;
} }
ProtoTag::Intermediate | ProtoTag::Secure => { ProtoTag::Intermediate | ProtoTag::Secure => {
let padding_len = if proto_tag == ProtoTag::Secure { let padding_len = if proto_tag == ProtoTag::Secure {
@ -296,35 +404,24 @@ where
} else { } else {
0 0
}; };
let mut len = (data.len() + padding_len) as u32; let mut len_val = (data.len() + padding_len) as u32;
if quickack { if quickack {
len |= 0x8000_0000; len_val |= 0x8000_0000;
} }
client_writer let total = 4 + data.len() + padding_len;
.write_all(&len.to_le_bytes()) let mut frame_buf = Vec::with_capacity(total);
.await frame_buf.extend_from_slice(&len_val.to_le_bytes());
.map_err(ProxyError::Io)?; frame_buf.extend_from_slice(data);
client_writer
.write_all(data)
.await
.map_err(ProxyError::Io)?;
if padding_len > 0 { if padding_len > 0 {
let pad = rng.bytes(padding_len); frame_buf.extend_from_slice(&rng.bytes(padding_len));
client_writer
.write_all(&pad)
.await
.map_err(ProxyError::Io)?;
} }
client_writer
.write_all(&frame_buf)
.await
.map_err(ProxyError::Io)?;
} }
} }
// Avoid unconditional per-frame flush (throughput killer on large downloads).
// Flush only when low-latency ack semantics are requested or when
// CryptoWriter has buffered pending ciphertext that must be drained.
if quickack || client_writer.has_pending() {
client_writer.flush().await.map_err(ProxyError::Io)?;
}
Ok(()) Ok(())
} }

View File

@ -8,7 +8,7 @@ use std::io::{self, Error, ErrorKind};
use std::sync::Arc; use std::sync::Arc;
use tokio_util::codec::{Decoder, Encoder}; use tokio_util::codec::{Decoder, Encoder};
use crate::protocol::constants::ProtoTag; use crate::protocol::constants::{ProtoTag, secure_padding_len};
use crate::crypto::SecureRandom; use crate::crypto::SecureRandom;
use super::frame::{Frame, FrameMeta, FrameCodec as FrameCodecTrait}; use super::frame::{Frame, FrameMeta, FrameCodec as FrameCodecTrait};
@ -303,14 +303,8 @@ fn encode_secure(frame: &Frame, dst: &mut BytesMut, rng: &SecureRandom) -> io::R
return Ok(()); return Ok(());
} }
// Generate padding to make length not divisible by 4 // Generate padding that keeps total length non-divisible by 4.
let padding_len = if data.len() % 4 == 0 { let padding_len = secure_padding_len(data.len(), rng);
// Add 1-3 bytes to make it non-aligned
(rng.range(3) + 1) as usize
} else {
// Already non-aligned, can add 0-3
rng.range(4) as usize
};
let total_len = data.len() + padding_len; let total_len = data.len() + padding_len;
dst.reserve(4 + total_len); dst.reserve(4 + total_len);
@ -625,4 +619,4 @@ mod tests {
let result = codec.decode(&mut buf); let result = codec.decode(&mut buf);
assert!(result.is_err()); assert!(result.is_err());
} }
} }

View File

@ -190,11 +190,26 @@ impl RpcWriter {
self.writer.flush().await.map_err(ProxyError::Io) self.writer.flush().await.map_err(ProxyError::Io)
} }
pub(crate) async fn send_keepalive(&mut self, payload: [u8; 4]) -> Result<()> { /// Sends a 4-byte keepalive marker directly into the CBC stream.
// Keepalive is a frame with fl == 4 and 4 bytes payload. /// This is not an RPC frame and must not consume sequence numbers.
let mut frame = Vec::with_capacity(8); pub(crate) async fn send_keepalive(&mut self) -> Result<()> {
frame.extend_from_slice(&4u32.to_le_bytes()); let mut buf = [0u8; 16];
frame.extend_from_slice(&payload); for i in 0..4 {
self.send(&frame).await let start = i * 4;
let end = start + 4;
buf[start..end].copy_from_slice(&PADDING_FILLER);
}
let cipher = AesCbc::new(self.key, self.iv);
let mut v = buf.to_vec();
cipher
.encrypt_in_place(&mut v)
.map_err(|e| ProxyError::Crypto(format!("{e}")))?;
if v.len() >= 16 {
self.iv.copy_from_slice(&v[v.len() - 16..]);
}
self.writer.write_all(&v).await.map_err(ProxyError::Io)?;
self.writer.flush().await.map_err(ProxyError::Io)
} }
} }

View File

@ -23,7 +23,6 @@ use super::reader::reader_loop;
use super::MeResponse; use super::MeResponse;
const ME_ACTIVE_PING_SECS: u64 = 25; const ME_ACTIVE_PING_SECS: u64 = 25;
const ME_ACTIVE_PING_JITTER_SECS: i64 = 5; const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
const ME_KEEPALIVE_PAYLOAD_LEN: usize = 4;
#[derive(Clone)] #[derive(Clone)]
pub struct MeWriter { pub struct MeWriter {
@ -361,7 +360,6 @@ impl MePool {
// Additional connections up to pool_size total (round-robin across DCs), staggered to de-phase lifecycles. // Additional connections up to pool_size total (round-robin across DCs), staggered to de-phase lifecycles.
if self.me_warmup_stagger_enabled { if self.me_warmup_stagger_enabled {
let mut delay_ms = 0u64;
for (dc, addrs) in dc_addrs.iter() { for (dc, addrs) in dc_addrs.iter() {
for (ip, port) in addrs { for (ip, port) in addrs {
if self.connection_count() >= pool_size { if self.connection_count() >= pool_size {
@ -369,7 +367,7 @@ impl MePool {
} }
let addr = SocketAddr::new(*ip, *port); let addr = SocketAddr::new(*ip, *port);
let jitter = rand::rng().random_range(0..=self.me_warmup_step_jitter.as_millis() as u64); let jitter = rand::rng().random_range(0..=self.me_warmup_step_jitter.as_millis() as u64);
delay_ms = delay_ms.saturating_add(self.me_warmup_step_delay.as_millis() as u64 + jitter); let delay_ms = self.me_warmup_step_delay.as_millis() as u64 + jitter;
tokio::time::sleep(Duration::from_millis(delay_ms)).await; tokio::time::sleep(Duration::from_millis(delay_ms)).await;
if let Err(e) = self.connect_one(addr, rng.as_ref()).await { if let Err(e) = self.connect_one(addr, rng.as_ref()).await {
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed (staggered)"); debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed (staggered)");
@ -419,7 +417,6 @@ impl MePool {
let draining = Arc::new(AtomicBool::new(false)); let draining = Arc::new(AtomicBool::new(false));
let (tx, mut rx) = mpsc::channel::<WriterCommand>(4096); let (tx, mut rx) = mpsc::channel::<WriterCommand>(4096);
let tx_for_keepalive = tx.clone(); let tx_for_keepalive = tx.clone();
let keepalive_random = self.me_keepalive_payload_random;
let stats = self.stats.clone(); let stats = self.stats.clone();
let mut rpc_writer = RpcWriter { let mut rpc_writer = RpcWriter {
writer: hs.wr, writer: hs.wr,
@ -440,11 +437,7 @@ impl MePool {
if rpc_writer.send_and_flush(&payload).await.is_err() { break; } if rpc_writer.send_and_flush(&payload).await.is_err() { break; }
} }
Some(WriterCommand::Keepalive) => { Some(WriterCommand::Keepalive) => {
let mut payload = [0u8; ME_KEEPALIVE_PAYLOAD_LEN]; match rpc_writer.send_keepalive().await {
if keepalive_random {
rand::rng().fill(&mut payload);
}
match rpc_writer.send_keepalive(payload).await {
Ok(()) => { Ok(()) => {
stats.increment_me_keepalive_sent(); stats.increment_me_keepalive_sent();
} }

View File

@ -33,7 +33,6 @@ pub(crate) async fn reader_loop(
) -> Result<()> { ) -> Result<()> {
let mut raw = enc_leftover; let mut raw = enc_leftover;
let mut expected_seq: i32 = 0; let mut expected_seq: i32 = 0;
let mut crc_errors = 0u32;
let mut seq_mismatch = 0u32; let mut seq_mismatch = 0u32;
loop { loop {
@ -80,13 +79,15 @@ pub(crate) async fn reader_loop(
let frame = dec.split_to(fl); let frame = dec.split_to(fl);
let pe = fl - 4; let pe = fl - 4;
let ec = u32::from_le_bytes(frame[pe..pe + 4].try_into().unwrap()); let ec = u32::from_le_bytes(frame[pe..pe + 4].try_into().unwrap());
if crc32(&frame[..pe]) != ec { let actual_crc = crc32(&frame[..pe]);
warn!("CRC mismatch in data frame"); if actual_crc != ec {
crc_errors += 1; warn!(
if crc_errors > 3 { frame_len = fl,
return Err(ProxyError::Proxy("Too many CRC mismatches".into())); expected_crc = format_args!("0x{ec:08x}"),
} actual_crc = format_args!("0x{actual_crc:08x}"),
continue; "CRC mismatch — CBC crypto desync, aborting ME connection"
);
return Err(ProxyError::Proxy("CRC mismatch (crypto desync)".into()));
} }
let seq_no = i32::from_le_bytes(frame[4..8].try_into().unwrap()); let seq_no = i32::from_le_bytes(frame[4..8].try_into().unwrap());

View File

@ -1,5 +1,7 @@
//! TCP Socket Configuration //! TCP Socket Configuration
use std::collections::HashSet;
use std::fs;
use std::io::Result; use std::io::Result;
use std::net::{SocketAddr, IpAddr}; use std::net::{SocketAddr, IpAddr};
use std::time::Duration; use std::time::Duration;
@ -234,6 +236,133 @@ pub fn create_listener(addr: SocketAddr, options: &ListenOptions) -> Result<Sock
Ok(socket) Ok(socket)
} }
/// Best-effort process list for listeners occupying the same local TCP port.
#[derive(Debug, Clone)]
pub struct ListenerProcessInfo {
pub pid: u32,
pub process: String,
}
/// Find processes currently listening on the local TCP port of `addr`.
/// Returns an empty list when unsupported or when no owners can be resolved.
pub fn find_listener_processes(addr: SocketAddr) -> Vec<ListenerProcessInfo> {
#[cfg(target_os = "linux")]
{
find_listener_processes_linux(addr)
}
#[cfg(not(target_os = "linux"))]
{
let _ = addr;
Vec::new()
}
}
#[cfg(target_os = "linux")]
fn find_listener_processes_linux(addr: SocketAddr) -> Vec<ListenerProcessInfo> {
let inodes = listening_inodes_for_port(addr);
if inodes.is_empty() {
return Vec::new();
}
let mut out = Vec::new();
let proc_entries = match fs::read_dir("/proc") {
Ok(entries) => entries,
Err(_) => return out,
};
for entry in proc_entries.flatten() {
let pid = match entry.file_name().to_string_lossy().parse::<u32>() {
Ok(pid) => pid,
Err(_) => continue,
};
let fd_dir = entry.path().join("fd");
let fd_entries = match fs::read_dir(fd_dir) {
Ok(entries) => entries,
Err(_) => continue,
};
let mut matched = false;
for fd in fd_entries.flatten() {
let link_target = match fs::read_link(fd.path()) {
Ok(link) => link,
Err(_) => continue,
};
let link_str = link_target.to_string_lossy();
let Some(rest) = link_str.strip_prefix("socket:[") else {
continue;
};
let Some(inode_str) = rest.strip_suffix(']') else {
continue;
};
let Ok(inode) = inode_str.parse::<u64>() else {
continue;
};
if inodes.contains(&inode) {
matched = true;
break;
}
}
if matched {
let process = fs::read_to_string(entry.path().join("comm"))
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "unknown".to_string());
out.push(ListenerProcessInfo { pid, process });
}
}
out.sort_by_key(|p| p.pid);
out.dedup_by_key(|p| p.pid);
out
}
#[cfg(target_os = "linux")]
fn listening_inodes_for_port(addr: SocketAddr) -> HashSet<u64> {
let path = match addr {
SocketAddr::V4(_) => "/proc/net/tcp",
SocketAddr::V6(_) => "/proc/net/tcp6",
};
let mut inodes = HashSet::new();
let Ok(data) = fs::read_to_string(path) else {
return inodes;
};
for line in data.lines().skip(1) {
let cols: Vec<&str> = line.split_whitespace().collect();
if cols.len() < 10 {
continue;
}
// LISTEN state in /proc/net/tcp*
if cols[3] != "0A" {
continue;
}
let Some(port_hex) = cols[1].split(':').nth(1) else {
continue;
};
let Ok(port) = u16::from_str_radix(port_hex, 16) else {
continue;
};
if port != addr.port() {
continue;
}
if let Ok(inode) = cols[9].parse::<u64>() {
inodes.insert(inode);
}
}
inodes
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View File

@ -24,6 +24,8 @@ const NUM_DCS: usize = 5;
/// Timeout for individual DC ping attempt /// Timeout for individual DC ping attempt
const DC_PING_TIMEOUT_SECS: u64 = 5; const DC_PING_TIMEOUT_SECS: u64 = 5;
/// Timeout for direct TG DC TCP connect readiness.
const DIRECT_CONNECT_TIMEOUT_SECS: u64 = 10;
// ============= RTT Tracking ============= // ============= RTT Tracking =============
@ -375,7 +377,16 @@ impl UpstreamManager {
let std_stream: std::net::TcpStream = socket.into(); let std_stream: std::net::TcpStream = socket.into();
let stream = TcpStream::from_std(std_stream)?; let stream = TcpStream::from_std(std_stream)?;
stream.writable().await?; let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS);
match tokio::time::timeout(connect_timeout, stream.writable()).await {
Ok(Ok(())) => {}
Ok(Err(e)) => return Err(ProxyError::Io(e)),
Err(_) => {
return Err(ProxyError::ConnectionTimeout {
addr: target.to_string(),
});
}
}
if let Some(e) = stream.take_error()? { if let Some(e) = stream.take_error()? {
return Err(ProxyError::Io(e)); return Err(ProxyError::Io(e));
} }