Compare commits

..

21 Commits

Author SHA1 Message Date
Alexey
1b25bada29 ServerHello fixes + Docker Health-Check + Conntrack Control for Docker: merge pull request #717 from telemt/flow
ServerHello fixes + Docker Health-Check + Conntrack Control for Docker
2026-04-17 19:43:59 +03:00
Alexey
bde30eaf05 Update emulator.rs 2026-04-17 19:20:06 +03:00
Alexey
b447f60a72 Rustfmt + Bump 2026-04-17 19:08:57 +03:00
Alexey
093faed0c2 Conntrack Control for Docker 2026-04-17 19:06:18 +03:00
Alexey
3ca3e8ff0e Docker Health-Check 2026-04-17 16:36:15 +03:00
Alexey
6e3b4a1ce5 ServerHello fixes 2026-04-17 15:11:36 +03:00
Alexey
cd0771eee4 Merge pull request #715 from telemt/flow
Fixes in TLS-F
2026-04-17 13:00:30 +03:00
Alexey
a858dd799e Bump 2026-04-17 12:43:41 +03:00
Alexey
947ef2beb7 Fixes in TLS-F 2026-04-17 12:38:22 +03:00
Alexey
376f9b42fb Traffic Control + Fairness + Evaluating hard-idle timeout + Improve FakeTLS server-flight fidelity + PROXY Protocol V2 UNKNOWN/LOCAL misuse fixes: merge pull request #714 from telemt/flow
Traffic Control + Fairness + Evaluating hard-idle timeout + Improve FakeTLS server-flight fidelity + PROXY Protocol V2 UNKNOWN/LOCAL misuse fixes
2026-04-17 11:54:18 +03:00
Alexey
191ca35076 Update scheduler.rs 2026-04-17 11:20:58 +03:00
Alexey
44485a545e Fixes for unused imports 2026-04-17 11:06:42 +03:00
Alexey
17a966b822 Rustfmt 2026-04-17 10:48:01 +03:00
Alexey
073eacbb37 PROXY Protocol V2 UNKNOWN/LOCAL misuse fixes for TLS-Fetcher by #713
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-04-17 10:43:49 +03:00
Alexey
5c99cd8eb7 Backpressure-driven Fairness
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-04-17 10:33:37 +03:00
Alexey
7494cb3092 Merge pull request #692 from ne4sp/patch-1
FIx XRAY_DOUBLE_HOP.md files.
2026-04-16 18:39:36 +03:00
Alexey
d25aa5a1e9 Merge pull request #709 from groozchique/main
[docs] add hyperlinks to README
2026-04-16 16:12:48 +03:00
Nick Parfyonov
f1b7b9aa08 [docs] add hyperlinks to README 2026-04-16 09:40:55 +03:00
ne4sp
3f69b54f5d Update XRAY_DOUBLE_HOP.ru.md 2026-04-12 16:15:52 +03:00
ne4sp
62a90e05a0 Update XRAY_DOUBLE_HOP.en.md 2026-04-12 15:59:59 +03:00
ne4sp
1b3d2d8bc5 Update XRAY_DOUBLE_HOP.ru.md
При инициализации xray на сервере не запускался из-за длинного shortID.
2026-04-12 15:45:01 +03:00
38 changed files with 2311 additions and 178 deletions

2
Cargo.lock generated
View File

@@ -2780,7 +2780,7 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
[[package]]
name = "telemt"
version = "3.4.0"
version = "3.4.3"
dependencies = [
"aes",
"anyhow",

View File

@@ -1,6 +1,6 @@
[package]
name = "telemt"
version = "3.4.0"
version = "3.4.3"
edition = "2024"
[features]

View File

@@ -77,6 +77,34 @@ COPY config.toml /app/config.toml
EXPOSE 443 9090 9091
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 CMD ["/app/telemt", "healthcheck", "/app/config.toml", "--mode", "liveness"]
ENTRYPOINT ["/app/telemt"]
CMD ["config.toml"]
# ==========================
# Production Netfilter Profile
# ==========================
FROM debian:12-slim AS prod-netfilter
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates \
conntrack \
nftables \
iptables; \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY --from=minimal /telemt /app/telemt
COPY config.toml /app/config.toml
EXPOSE 443 9090 9091
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 CMD ["/app/telemt", "healthcheck", "/app/config.toml", "--mode", "liveness"]
ENTRYPOINT ["/app/telemt"]
CMD ["config.toml"]
@@ -94,5 +122,7 @@ USER nonroot:nonroot
EXPOSE 443 9090 9091
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 CMD ["/app/telemt", "healthcheck", "/app/config.toml", "--mode", "liveness"]
ENTRYPOINT ["/app/telemt"]
CMD ["config.toml"]

View File

@@ -1,6 +1,6 @@
# Telemt - MTProxy on Rust + Tokio
![Latest Release](https://img.shields.io/github/v/release/telemt/telemt?color=neon) ![Stars](https://img.shields.io/github/stars/telemt/telemt?style=social) ![Forks](https://img.shields.io/github/forks/telemt/telemt?style=social) [![Telegram](https://img.shields.io/badge/Telegram-Chat-24a1de?logo=telegram&logoColor=24a1de)](https://t.me/telemtrs)
[![Latest Release](https://img.shields.io/github/v/release/telemt/telemt?color=neon)](https://github.com/telemt/telemt/releases/latest) [![Stars](https://img.shields.io/github/stars/telemt/telemt?style=social)](https://github.com/telemt/telemt/stargazers) [![Forks](https://img.shields.io/github/forks/telemt/telemt?style=social)](https://github.com/telemt/telemt/network/members) [![Telegram](https://img.shields.io/badge/Telegram-Chat-24a1de?logo=telegram&logoColor=24a1de)](https://t.me/telemtrs)
[🇷🇺 README на русском](https://github.com/telemt/telemt/blob/main/README.ru.md)

View File

@@ -1,6 +1,6 @@
# Telemt — MTProxy на Rust + Tokio
![Latest Release](https://img.shields.io/github/v/release/telemt/telemt?color=neon) ![Stars](https://img.shields.io/github/stars/telemt/telemt?style=social) ![Forks](https://img.shields.io/github/forks/telemt/telemt?style=social) [![Telegram](https://img.shields.io/badge/Telegram-Chat-24a1de?logo=telegram&logoColor=24a1de)](https://t.me/telemtrs)
[![Latest Release](https://img.shields.io/github/v/release/telemt/telemt?color=neon)](https://github.com/telemt/telemt/releases/latest) [![Stars](https://img.shields.io/github/stars/telemt/telemt?style=social)](https://github.com/telemt/telemt/stargazers) [![Forks](https://img.shields.io/github/forks/telemt/telemt?style=social)](https://github.com/telemt/telemt/network/members) [![Telegram](https://img.shields.io/badge/Telegram-Chat-24a1de?logo=telegram&logoColor=24a1de)](https://t.me/telemtrs)
***Решает проблемы раньше, чем другие узнают об их существовании***

View File

@@ -0,0 +1,10 @@
services:
telemt:
build:
context: .
target: prod-netfilter
network_mode: host
ports: []
cap_add:
- NET_BIND_SERVICE
- NET_ADMIN

View File

@@ -0,0 +1,8 @@
services:
telemt:
build:
context: .
target: prod-netfilter
cap_add:
- NET_BIND_SERVICE
- NET_ADMIN

View File

@@ -1,7 +1,9 @@
services:
telemt:
image: ghcr.io/telemt/telemt:latest
build: .
build:
context: .
target: prod
container_name: telemt
restart: unless-stopped
ports:
@@ -16,13 +18,18 @@ services:
- /etc/telemt:rw,mode=1777,size=4m
environment:
- RUST_LOG=info
healthcheck:
test: [ "CMD", "/app/telemt", "healthcheck", "/etc/telemt/config.toml", "--mode", "liveness" ]
interval: 30s
timeout: 5s
retries: 3
start_period: 20s
# Uncomment this line if you want to use host network for IPv6, but bridge is default and usually better
# network_mode: host
cap_drop:
- ALL
cap_add:
- NET_BIND_SERVICE
- NET_ADMIN
read_only: true
security_opt:
- no-new-privileges:true

View File

@@ -37,13 +37,13 @@ xray x25519
```
3. **Short ID (Reality identifier):**
```bash
openssl rand -hex 16
# Save the output (e.g.: 0123456789abcdef0123456789abcdef) — this is <SHORT_ID>
openssl rand -hex 8
# Save the output (e.g.: abc123def456) — this is <SHORT_ID>
```
4. **Random Path (for xhttp):**
```bash
openssl rand -hex 8
# Save the output (e.g., abc123def456) to replace <YOUR_RANDOM_PATH> in configs
openssl rand -hex 16
# Save the output (e.g., 0123456789abcdef0123456789abcdef) to replace <YOUR_RANDOM_PATH> in configs
```
---

View File

@@ -37,13 +37,13 @@ xray x25519
```
3. **Short ID (идентификатор Reality):**
```bash
openssl rand -hex 16
# Сохраните вывод (например: 0123456789abcdef0123456789abcdef) — это <SHORT_ID>
openssl rand -hex 8
# Сохраните вывод (например: abc123def456) — это <SHORT_ID>
```
4. **Random Path (путь для xhttp):**
```bash
openssl rand -hex 8
# Сохраните вывод (например, abc123def456), чтобы заменить <YOUR_RANDOM_PATH> в конфигах
openssl rand -hex 16
# Сохраните вывод (например, 0123456789abcdef0123456789abcdef), чтобы заменить <YOUR_RANDOM_PATH> в конфигах
```
---

View File

@@ -41,8 +41,8 @@ use config_store::{current_revision, load_config_from_disk, parse_if_match};
use events::ApiEventStore;
use http_utils::{error_response, read_json, read_optional_json, success_response};
use model::{
ApiFailure, CreateUserRequest, DeleteUserResponse, HealthData, PatchUserRequest,
RotateSecretRequest, SummaryData, UserActiveIps,
ApiFailure, CreateUserRequest, DeleteUserResponse, HealthData, HealthReadyData,
PatchUserRequest, RotateSecretRequest, SummaryData, UserActiveIps,
};
use runtime_edge::{
EdgeConnectionsCacheEntry, build_runtime_connections_summary_data,
@@ -275,6 +275,33 @@ async fn handle(
};
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/health/ready") => {
let revision = current_revision(&shared.config_path).await?;
let admission_open = shared.runtime_state.admission_open.load(Ordering::Relaxed);
let upstream_health = shared.upstream_manager.api_health_summary().await;
let ready = admission_open && upstream_health.healthy_total > 0;
let reason = if ready {
None
} else if !admission_open {
Some("admission_closed")
} else {
Some("no_healthy_upstreams")
};
let data = HealthReadyData {
ready,
status: if ready { "ready" } else { "not_ready" },
reason,
admission_open,
healthy_upstreams: upstream_health.healthy_total,
total_upstreams: upstream_health.configured_total,
};
let status_code = if ready {
StatusCode::OK
} else {
StatusCode::SERVICE_UNAVAILABLE
};
Ok(success_response(status_code, data, revision))
}
("GET", "/v1/system/info") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_system_info_data(shared.as_ref(), cfg.as_ref(), &revision);

View File

@@ -60,6 +60,17 @@ pub(super) struct HealthData {
pub(super) read_only: bool,
}
#[derive(Serialize)]
pub(super) struct HealthReadyData {
pub(super) ready: bool,
pub(super) status: &'static str,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reason: Option<&'static str>,
pub(super) admission_open: bool,
pub(super) healthy_upstreams: usize,
pub(super) total_upstreams: usize,
}
#[derive(Serialize)]
pub(super) struct SummaryData {
pub(super) uptime_seconds: f64,

View File

@@ -6,12 +6,15 @@
//! - `reload [--pid-file PATH]` - Reload configuration (SIGHUP)
//! - `status [--pid-file PATH]` - Check daemon status
//! - `run [OPTIONS] [config.toml]` - Run in foreground (default behavior)
//! - `healthcheck [OPTIONS] [config.toml]` - Run control-plane health probe
use rand::RngExt;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use crate::healthcheck::{self, HealthcheckMode};
#[cfg(unix)]
use crate::daemon::{self, DEFAULT_PID_FILE, DaemonOptions};
@@ -28,6 +31,8 @@ pub enum Subcommand {
Reload,
/// Check daemon status (`status` subcommand).
Status,
/// Run health probe and exit with status code.
Healthcheck,
/// Fire-and-forget setup (`--init`).
Init,
}
@@ -38,6 +43,8 @@ pub struct ParsedCommand {
pub subcommand: Subcommand,
pub pid_file: PathBuf,
pub config_path: String,
pub healthcheck_mode: HealthcheckMode,
pub healthcheck_mode_invalid: Option<String>,
#[cfg(unix)]
pub daemon_opts: DaemonOptions,
pub init_opts: Option<InitOptions>,
@@ -52,6 +59,8 @@ impl Default for ParsedCommand {
#[cfg(not(unix))]
pid_file: PathBuf::from("/var/run/telemt.pid"),
config_path: "config.toml".to_string(),
healthcheck_mode: HealthcheckMode::Liveness,
healthcheck_mode_invalid: None,
#[cfg(unix)]
daemon_opts: DaemonOptions::default(),
init_opts: None,
@@ -91,6 +100,9 @@ pub fn parse_command(args: &[String]) -> ParsedCommand {
"status" => {
cmd.subcommand = Subcommand::Status;
}
"healthcheck" => {
cmd.subcommand = Subcommand::Healthcheck;
}
"run" => {
cmd.subcommand = Subcommand::Run;
#[cfg(unix)]
@@ -113,7 +125,35 @@ pub fn parse_command(args: &[String]) -> ParsedCommand {
while i < args.len() {
match args[i].as_str() {
// Skip subcommand names
"start" | "stop" | "reload" | "status" | "run" => {}
"start" | "stop" | "reload" | "status" | "run" | "healthcheck" => {}
"--mode" => {
i += 1;
if i < args.len() {
match HealthcheckMode::from_cli_arg(&args[i]) {
Some(mode) => {
cmd.healthcheck_mode = mode;
cmd.healthcheck_mode_invalid = None;
}
None => {
cmd.healthcheck_mode_invalid = Some(args[i].clone());
}
}
} else {
cmd.healthcheck_mode_invalid = Some(String::new());
}
}
s if s.starts_with("--mode=") => {
let raw = s.trim_start_matches("--mode=");
match HealthcheckMode::from_cli_arg(raw) {
Some(mode) => {
cmd.healthcheck_mode = mode;
cmd.healthcheck_mode_invalid = None;
}
None => {
cmd.healthcheck_mode_invalid = Some(raw.to_string());
}
}
}
// PID file option (for stop/reload/status)
"--pid-file" => {
i += 1;
@@ -152,6 +192,20 @@ pub fn execute_subcommand(cmd: &ParsedCommand) -> Option<i32> {
Subcommand::Stop => Some(cmd_stop(&cmd.pid_file)),
Subcommand::Reload => Some(cmd_reload(&cmd.pid_file)),
Subcommand::Status => Some(cmd_status(&cmd.pid_file)),
Subcommand::Healthcheck => {
if let Some(invalid_mode) = cmd.healthcheck_mode_invalid.as_ref() {
if invalid_mode.is_empty() {
eprintln!("[telemt] Missing value for --mode (supported: liveness, ready)");
} else {
eprintln!(
"[telemt] Invalid --mode value '{invalid_mode}' (supported: liveness, ready)"
);
}
Some(2)
} else {
Some(healthcheck::run(&cmd.config_path, cmd.healthcheck_mode))
}
}
Subcommand::Init => {
if let Some(opts) = cmd.init_opts.clone() {
match run_init(opts) {
@@ -177,6 +231,20 @@ pub fn execute_subcommand(cmd: &ParsedCommand) -> Option<i32> {
eprintln!("[telemt] Subcommand not supported on this platform");
Some(1)
}
Subcommand::Healthcheck => {
if let Some(invalid_mode) = cmd.healthcheck_mode_invalid.as_ref() {
if invalid_mode.is_empty() {
eprintln!("[telemt] Missing value for --mode (supported: liveness, ready)");
} else {
eprintln!(
"[telemt] Invalid --mode value '{invalid_mode}' (supported: liveness, ready)"
);
}
Some(2)
} else {
Some(healthcheck::run(&cmd.config_path, cmd.healthcheck_mode))
}
}
Subcommand::Init => {
if let Some(opts) = cmd.init_opts.clone() {
match run_init(opts) {

View File

@@ -122,7 +122,8 @@ pub struct HotFields {
pub user_expirations: std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
pub user_data_quota: std::collections::HashMap<String, u64>,
pub user_rate_limits: std::collections::HashMap<String, crate::config::RateLimitBps>,
pub cidr_rate_limits: std::collections::HashMap<ipnetwork::IpNetwork, crate::config::RateLimitBps>,
pub cidr_rate_limits:
std::collections::HashMap<ipnetwork::IpNetwork, crate::config::RateLimitBps>,
pub user_max_unique_ips: std::collections::HashMap<String, usize>,
pub user_max_unique_ips_global_each: usize,
pub user_max_unique_ips_mode: crate::config::UserMaxUniqueIpsMode,

View File

@@ -343,6 +343,10 @@ impl ProxyConfig {
let network_table = parsed_toml
.get("network")
.and_then(|value| value.as_table());
let server_table = parsed_toml.get("server").and_then(|value| value.as_table());
let conntrack_control_table = server_table
.and_then(|table| table.get("conntrack_control"))
.and_then(|value| value.as_table());
let update_every_is_explicit = general_table
.map(|table| table.contains_key("update_every"))
.unwrap_or(false);
@@ -372,10 +376,17 @@ impl ProxyConfig {
let stun_servers_is_explicit = network_table
.map(|table| table.contains_key("stun_servers"))
.unwrap_or(false);
let inline_conntrack_control_is_explicit = conntrack_control_table
.map(|table| table.contains_key("inline_conntrack_control"))
.unwrap_or(false);
let mut config: ProxyConfig = parsed_toml
.try_into()
.map_err(|e| ProxyError::Config(e.to_string()))?;
config
.server
.conntrack_control
.inline_conntrack_control_explicit = inline_conntrack_control_is_explicit;
if !update_every_is_explicit && (legacy_secret_is_explicit || legacy_config_is_explicit) {
config.general.update_every = None;
@@ -1881,6 +1892,43 @@ mod tests {
);
}
#[test]
fn conntrack_inline_explicit_flag_is_false_when_omitted() {
let cfg = load_config_from_temp_toml(
r#"
[general]
[network]
[server]
[server.conntrack_control]
[access]
"#,
);
assert!(
!cfg.server
.conntrack_control
.inline_conntrack_control_explicit
);
}
#[test]
fn conntrack_inline_explicit_flag_is_true_when_present() {
let cfg = load_config_from_temp_toml(
r#"
[general]
[network]
[server]
[server.conntrack_control]
inline_conntrack_control = true
[access]
"#,
);
assert!(
cfg.server
.conntrack_control
.inline_conntrack_control_explicit
);
}
#[test]
fn unknown_sni_action_parses_and_defaults_to_drop() {
let cfg_default: ProxyConfig = toml::from_str(

View File

@@ -1329,6 +1329,10 @@ pub struct ConntrackControlConfig {
#[serde(default = "default_conntrack_control_enabled")]
pub inline_conntrack_control: bool,
/// Tracks whether inline_conntrack_control was explicitly set in config.
#[serde(skip)]
pub inline_conntrack_control_explicit: bool,
/// Conntrack mode for listener ingress traffic.
#[serde(default)]
pub mode: ConntrackMode,
@@ -1363,6 +1367,7 @@ impl Default for ConntrackControlConfig {
fn default() -> Self {
Self {
inline_conntrack_control: default_conntrack_control_enabled(),
inline_conntrack_control_explicit: false,
mode: ConntrackMode::default(),
backend: ConntrackBackend::default(),
profile: ConntrackPressureProfile::default(),

View File

@@ -24,6 +24,13 @@ enum NetfilterBackend {
Iptables,
}
#[derive(Clone, Copy)]
struct ConntrackRuntimeSupport {
netfilter_backend: Option<NetfilterBackend>,
has_cap_net_admin: bool,
has_conntrack_binary: bool,
}
#[derive(Clone, Copy)]
struct PressureSample {
conn_pct: Option<u8>,
@@ -56,11 +63,8 @@ pub(crate) fn spawn_conntrack_controller(
shared: Arc<ProxySharedState>,
) {
if !cfg!(target_os = "linux") {
let enabled = config_rx
.borrow()
.server
.conntrack_control
.inline_conntrack_control;
let cfg = config_rx.borrow();
let enabled = cfg.server.conntrack_control.inline_conntrack_control;
stats.set_conntrack_control_enabled(enabled);
stats.set_conntrack_control_available(false);
stats.set_conntrack_pressure_active(false);
@@ -68,9 +72,14 @@ pub(crate) fn spawn_conntrack_controller(
stats.set_conntrack_rule_apply_ok(false);
shared.disable_conntrack_close_sender();
shared.set_conntrack_pressure_active(false);
if enabled {
if enabled
&& cfg
.server
.conntrack_control
.inline_conntrack_control_explicit
{
warn!(
"conntrack control is configured but unsupported on this OS; disabling runtime worker"
"conntrack control explicitly enabled but unsupported on this OS; disabling runtime worker"
);
}
return;
@@ -92,16 +101,17 @@ async fn run_conntrack_controller(
let mut cfg = config_rx.borrow().clone();
let mut pressure_state = PressureState::new(stats.as_ref());
let mut delete_budget_tokens = cfg.server.conntrack_control.delete_budget_per_sec;
let mut backend = pick_backend(cfg.server.conntrack_control.backend);
let mut runtime_support = probe_runtime_support(cfg.server.conntrack_control.backend);
let mut effective_enabled = effective_conntrack_enabled(&cfg, runtime_support);
apply_runtime_state(
stats.as_ref(),
shared.as_ref(),
&cfg,
backend.is_some(),
runtime_support,
false,
);
reconcile_rules(&cfg, backend, stats.as_ref()).await;
reconcile_rules(&cfg, runtime_support, stats.as_ref()).await;
loop {
tokio::select! {
@@ -110,17 +120,18 @@ async fn run_conntrack_controller(
break;
}
cfg = config_rx.borrow_and_update().clone();
backend = pick_backend(cfg.server.conntrack_control.backend);
runtime_support = probe_runtime_support(cfg.server.conntrack_control.backend);
effective_enabled = effective_conntrack_enabled(&cfg, runtime_support);
delete_budget_tokens = cfg.server.conntrack_control.delete_budget_per_sec;
apply_runtime_state(stats.as_ref(), shared.as_ref(), &cfg, backend.is_some(), pressure_state.active);
reconcile_rules(&cfg, backend, stats.as_ref()).await;
apply_runtime_state(stats.as_ref(), shared.as_ref(), &cfg, runtime_support, pressure_state.active);
reconcile_rules(&cfg, runtime_support, stats.as_ref()).await;
}
event = close_rx.recv() => {
let Some(event) = event else {
break;
};
stats.set_conntrack_event_queue_depth(close_rx.len() as u64);
if !cfg.server.conntrack_control.inline_conntrack_control {
if !effective_enabled {
continue;
}
if !pressure_state.active {
@@ -156,6 +167,7 @@ async fn run_conntrack_controller(
stats.as_ref(),
shared.as_ref(),
&cfg,
effective_enabled,
&sample,
&mut pressure_state,
);
@@ -175,20 +187,30 @@ fn apply_runtime_state(
stats: &Stats,
shared: &ProxySharedState,
cfg: &ProxyConfig,
backend_available: bool,
runtime_support: ConntrackRuntimeSupport,
pressure_active: bool,
) {
let enabled = cfg.server.conntrack_control.inline_conntrack_control;
let available = enabled && backend_available && has_cap_net_admin();
if enabled && !available {
let available = effective_conntrack_enabled(cfg, runtime_support);
if enabled
&& !available
&& cfg
.server
.conntrack_control
.inline_conntrack_control_explicit
{
warn!(
"conntrack control enabled but unavailable (missing CAP_NET_ADMIN or backend binaries)"
has_cap_net_admin = runtime_support.has_cap_net_admin,
backend_available = runtime_support.netfilter_backend.is_some(),
conntrack_binary_available = runtime_support.has_conntrack_binary,
configured_backend = ?cfg.server.conntrack_control.backend,
"conntrack control explicitly enabled but unavailable; disabling runtime features"
);
}
stats.set_conntrack_control_enabled(enabled);
stats.set_conntrack_control_available(available);
shared.set_conntrack_pressure_active(enabled && pressure_active);
stats.set_conntrack_pressure_active(enabled && pressure_active);
shared.set_conntrack_pressure_active(available && pressure_active);
stats.set_conntrack_pressure_active(available && pressure_active);
}
fn collect_pressure_sample(
@@ -228,10 +250,11 @@ fn update_pressure_state(
stats: &Stats,
shared: &ProxySharedState,
cfg: &ProxyConfig,
effective_enabled: bool,
sample: &PressureSample,
state: &mut PressureState,
) {
if !cfg.server.conntrack_control.inline_conntrack_control {
if !effective_enabled {
if state.active {
state.active = false;
state.low_streak = 0;
@@ -285,22 +308,26 @@ fn update_pressure_state(
state.low_streak = 0;
}
async fn reconcile_rules(cfg: &ProxyConfig, backend: Option<NetfilterBackend>, stats: &Stats) {
async fn reconcile_rules(
cfg: &ProxyConfig,
runtime_support: ConntrackRuntimeSupport,
stats: &Stats,
) {
if !cfg.server.conntrack_control.inline_conntrack_control {
clear_notrack_rules_all_backends().await;
stats.set_conntrack_rule_apply_ok(true);
return;
}
if !has_cap_net_admin() {
if !effective_conntrack_enabled(cfg, runtime_support) {
clear_notrack_rules_all_backends().await;
stats.set_conntrack_rule_apply_ok(false);
return;
}
let Some(backend) = backend else {
stats.set_conntrack_rule_apply_ok(false);
return;
};
let backend = runtime_support
.netfilter_backend
.expect("netfilter backend must be available for effective conntrack control");
let apply_result = match backend {
NetfilterBackend::Nftables => apply_nft_rules(cfg).await,
@@ -315,6 +342,24 @@ async fn reconcile_rules(cfg: &ProxyConfig, backend: Option<NetfilterBackend>, s
}
}
fn probe_runtime_support(configured_backend: ConntrackBackend) -> ConntrackRuntimeSupport {
ConntrackRuntimeSupport {
netfilter_backend: pick_backend(configured_backend),
has_cap_net_admin: has_cap_net_admin(),
has_conntrack_binary: command_exists("conntrack"),
}
}
fn effective_conntrack_enabled(
cfg: &ProxyConfig,
runtime_support: ConntrackRuntimeSupport,
) -> bool {
cfg.server.conntrack_control.inline_conntrack_control
&& runtime_support.has_cap_net_admin
&& runtime_support.netfilter_backend.is_some()
&& runtime_support.has_conntrack_binary
}
fn pick_backend(configured: ConntrackBackend) -> Option<NetfilterBackend> {
match configured {
ConntrackBackend::Auto => {
@@ -710,7 +755,7 @@ mod tests {
me_queue_pressure_delta: 0,
};
update_pressure_state(&stats, shared.as_ref(), &cfg, &sample, &mut state);
update_pressure_state(&stats, shared.as_ref(), &cfg, true, &sample, &mut state);
assert!(state.active);
assert!(shared.conntrack_pressure_active());
@@ -731,7 +776,14 @@ mod tests {
accept_timeout_delta: 0,
me_queue_pressure_delta: 0,
};
update_pressure_state(&stats, shared.as_ref(), &cfg, &high_sample, &mut state);
update_pressure_state(
&stats,
shared.as_ref(),
&cfg,
true,
&high_sample,
&mut state,
);
assert!(state.active);
let low_sample = PressureSample {
@@ -740,11 +792,11 @@ mod tests {
accept_timeout_delta: 0,
me_queue_pressure_delta: 0,
};
update_pressure_state(&stats, shared.as_ref(), &cfg, &low_sample, &mut state);
update_pressure_state(&stats, shared.as_ref(), &cfg, true, &low_sample, &mut state);
assert!(state.active);
update_pressure_state(&stats, shared.as_ref(), &cfg, &low_sample, &mut state);
update_pressure_state(&stats, shared.as_ref(), &cfg, true, &low_sample, &mut state);
assert!(state.active);
update_pressure_state(&stats, shared.as_ref(), &cfg, &low_sample, &mut state);
update_pressure_state(&stats, shared.as_ref(), &cfg, true, &low_sample, &mut state);
assert!(!state.active);
assert!(!shared.conntrack_pressure_active());
@@ -765,7 +817,7 @@ mod tests {
me_queue_pressure_delta: 10,
};
update_pressure_state(&stats, shared.as_ref(), &cfg, &sample, &mut state);
update_pressure_state(&stats, shared.as_ref(), &cfg, false, &sample, &mut state);
assert!(!state.active);
assert!(!shared.conntrack_pressure_active());

View File

@@ -8,8 +8,8 @@ use std::io::{self, Read, Write};
use std::os::unix::fs::OpenOptionsExt;
use std::path::{Path, PathBuf};
use nix::fcntl::{Flock, FlockArg};
use nix::errno::Errno;
use nix::fcntl::{Flock, FlockArg};
use nix::unistd::{self, ForkResult, Gid, Pid, Uid, chdir, close, fork, getpid, setsid};
use tracing::{debug, info, warn};
@@ -158,15 +158,15 @@ fn redirect_stdio_to_devnull() -> Result<(), DaemonError> {
unsafe {
// Redirect stdin (fd 0)
if libc::dup2(devnull_fd, 0) < 0 {
return Err(DaemonError::RedirectFailed(nix::errno::Errno::last()));
return Err(DaemonError::RedirectFailed(Errno::last()));
}
// Redirect stdout (fd 1)
if libc::dup2(devnull_fd, 1) < 0 {
return Err(DaemonError::RedirectFailed(nix::errno::Errno::last()));
return Err(DaemonError::RedirectFailed(Errno::last()));
}
// Redirect stderr (fd 2)
if libc::dup2(devnull_fd, 2) < 0 {
return Err(DaemonError::RedirectFailed(nix::errno::Errno::last()));
return Err(DaemonError::RedirectFailed(Errno::last()));
}
}
@@ -350,11 +350,7 @@ fn set_supplementary_groups(gid: Gid) -> Result<(), nix::Error> {
groups.as_ptr(),
)
};
if rc == 0 {
Ok(())
} else {
Err(Errno::last())
}
if rc == 0 { Ok(()) } else { Err(Errno::last()) }
}
#[cfg(not(target_os = "macos"))]

211
src/healthcheck.rs Normal file
View File

@@ -0,0 +1,211 @@
use std::io::{Read, Write};
use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, TcpStream};
use std::time::Duration;
use serde_json::Value;
use crate::config::ProxyConfig;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum HealthcheckMode {
Liveness,
Ready,
}
impl HealthcheckMode {
pub(crate) fn from_cli_arg(value: &str) -> Option<Self> {
match value {
"liveness" => Some(Self::Liveness),
"ready" => Some(Self::Ready),
_ => None,
}
}
fn request_path(self) -> &'static str {
match self {
Self::Liveness => "/v1/health",
Self::Ready => "/v1/health/ready",
}
}
}
pub(crate) fn run(config_path: &str, mode: HealthcheckMode) -> i32 {
match run_inner(config_path, mode) {
Ok(()) => 0,
Err(error) => {
eprintln!("[telemt] healthcheck failed: {error}");
1
}
}
}
fn run_inner(config_path: &str, mode: HealthcheckMode) -> Result<(), String> {
let config =
ProxyConfig::load(config_path).map_err(|error| format!("config load failed: {error}"))?;
let api_cfg = &config.server.api;
if !api_cfg.enabled {
return Ok(());
}
let listen: SocketAddr = api_cfg
.listen
.parse()
.map_err(|_| format!("invalid API listen address: {}", api_cfg.listen))?;
if listen.port() == 0 {
return Err("API listen port is 0".to_string());
}
let target = probe_target(listen);
let mut stream = TcpStream::connect_timeout(&target, Duration::from_secs(2))
.map_err(|error| format!("connect {target} failed: {error}"))?;
stream
.set_read_timeout(Some(Duration::from_secs(2)))
.map_err(|error| format!("set read timeout failed: {error}"))?;
stream
.set_write_timeout(Some(Duration::from_secs(2)))
.map_err(|error| format!("set write timeout failed: {error}"))?;
let request = build_request(target, mode.request_path(), &api_cfg.auth_header);
stream
.write_all(request.as_bytes())
.map_err(|error| format!("request write failed: {error}"))?;
stream
.flush()
.map_err(|error| format!("request flush failed: {error}"))?;
let mut raw_response = Vec::new();
stream
.read_to_end(&mut raw_response)
.map_err(|error| format!("response read failed: {error}"))?;
let response =
String::from_utf8(raw_response).map_err(|_| "response is not valid UTF-8".to_string())?;
let (status_code, body) = split_response(&response)?;
if status_code != 200 {
return Err(format!("HTTP status {status_code}"));
}
validate_payload(mode, body)?;
Ok(())
}
fn probe_target(listen: SocketAddr) -> SocketAddr {
match listen {
SocketAddr::V4(addr) => {
let ip = if addr.ip().is_unspecified() {
Ipv4Addr::LOCALHOST
} else {
*addr.ip()
};
SocketAddr::from((ip, addr.port()))
}
SocketAddr::V6(addr) => {
let ip = if addr.ip().is_unspecified() {
Ipv6Addr::LOCALHOST
} else {
*addr.ip()
};
SocketAddr::from((ip, addr.port()))
}
}
}
fn build_request(target: SocketAddr, path: &str, auth_header: &str) -> String {
let mut request = format!(
"GET {path} HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n",
target
);
if !auth_header.is_empty() {
request.push_str("Authorization: ");
request.push_str(auth_header);
request.push_str("\r\n");
}
request.push_str("\r\n");
request
}
fn split_response(response: &str) -> Result<(u16, &str), String> {
let header_end = response
.find("\r\n\r\n")
.ok_or_else(|| "invalid HTTP response headers".to_string())?;
let header = &response[..header_end];
let body = &response[header_end + 4..];
let status_line = header
.lines()
.next()
.ok_or_else(|| "missing HTTP status line".to_string())?;
let status_code = parse_status_code(status_line)?;
Ok((status_code, body))
}
fn parse_status_code(status_line: &str) -> Result<u16, String> {
let mut parts = status_line.split_whitespace();
let version = parts
.next()
.ok_or_else(|| "missing HTTP version".to_string())?;
if !version.starts_with("HTTP/") {
return Err(format!("invalid HTTP status line: {status_line}"));
}
let code = parts
.next()
.ok_or_else(|| "missing HTTP status code".to_string())?;
code.parse::<u16>()
.map_err(|_| format!("invalid HTTP status code: {code}"))
}
fn validate_payload(mode: HealthcheckMode, body: &str) -> Result<(), String> {
let payload: Value =
serde_json::from_str(body).map_err(|_| "response body is not valid JSON".to_string())?;
if payload.get("ok").and_then(Value::as_bool) != Some(true) {
return Err("response JSON has ok=false".to_string());
}
let data = payload
.get("data")
.ok_or_else(|| "response JSON has no data field".to_string())?;
match mode {
HealthcheckMode::Liveness => {
if data.get("status").and_then(Value::as_str) != Some("ok") {
return Err("liveness status is not ok".to_string());
}
}
HealthcheckMode::Ready => {
if data.get("ready").and_then(Value::as_bool) != Some(true) {
return Err("readiness flag is false".to_string());
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::{HealthcheckMode, parse_status_code, split_response, validate_payload};
#[test]
fn parse_status_code_reads_http_200() {
let status = parse_status_code("HTTP/1.1 200 OK").expect("must parse status");
assert_eq!(status, 200);
}
#[test]
fn split_response_extracts_status_and_body() {
let response = "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\n\r\n{\"ok\":true}";
let (status, body) = split_response(response).expect("must split response");
assert_eq!(status, 200);
assert_eq!(body, "{\"ok\":true}");
}
#[test]
fn validate_payload_accepts_liveness_contract() {
let body = "{\"ok\":true,\"data\":{\"status\":\"ok\"}}";
validate_payload(HealthcheckMode::Liveness, body).expect("liveness payload must pass");
}
#[test]
fn validate_payload_rejects_not_ready() {
let body = "{\"ok\":true,\"data\":{\"ready\":false}}";
let result = validate_payload(HealthcheckMode::Ready, body);
assert!(result.is_err());
}
}

View File

@@ -190,8 +190,16 @@ pub(crate) async fn spawn_runtime_tasks(
);
let mut config_rx_rate_limits = config_rx.clone();
tokio::spawn(async move {
let mut prev_user_limits = config_rx_rate_limits.borrow().access.user_rate_limits.clone();
let mut prev_cidr_limits = config_rx_rate_limits.borrow().access.cidr_rate_limits.clone();
let mut prev_user_limits = config_rx_rate_limits
.borrow()
.access
.user_rate_limits
.clone();
let mut prev_cidr_limits = config_rx_rate_limits
.borrow()
.access
.cidr_rate_limits
.clone();
loop {
if config_rx_rate_limits.changed().await.is_err() {
break;

View File

@@ -8,6 +8,7 @@ mod crypto;
#[cfg(unix)]
mod daemon;
mod error;
mod healthcheck;
mod ip_tracker;
#[cfg(test)]
#[path = "tests/ip_tracker_encapsulation_adversarial_tests.rs"]

View File

@@ -1310,6 +1310,143 @@ async fn render_metrics(
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_fair_pressure_state Worker-local fairness pressure state"
);
let _ = writeln!(out, "# TYPE telemt_me_fair_pressure_state gauge");
let _ = writeln!(
out,
"telemt_me_fair_pressure_state {}",
if me_allows_normal {
stats.get_me_fair_pressure_state_gauge()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_fair_active_flows Fair-scheduler active flow count"
);
let _ = writeln!(out, "# TYPE telemt_me_fair_active_flows gauge");
let _ = writeln!(
out,
"telemt_me_fair_active_flows {}",
if me_allows_normal {
stats.get_me_fair_active_flows_gauge()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_fair_queued_bytes Fair-scheduler queued bytes"
);
let _ = writeln!(out, "# TYPE telemt_me_fair_queued_bytes gauge");
let _ = writeln!(
out,
"telemt_me_fair_queued_bytes {}",
if me_allows_normal {
stats.get_me_fair_queued_bytes_gauge()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_fair_flow_state_gauge Fair-scheduler flow health classes"
);
let _ = writeln!(out, "# TYPE telemt_me_fair_flow_state_gauge gauge");
let _ = writeln!(
out,
"telemt_me_fair_flow_state_gauge{{class=\"standing\"}} {}",
if me_allows_normal {
stats.get_me_fair_standing_flows_gauge()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_fair_flow_state_gauge{{class=\"backpressured\"}} {}",
if me_allows_normal {
stats.get_me_fair_backpressured_flows_gauge()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_fair_events_total Fair-scheduler event counters"
);
let _ = writeln!(out, "# TYPE telemt_me_fair_events_total counter");
let _ = writeln!(
out,
"telemt_me_fair_events_total{{event=\"scheduler_round\"}} {}",
if me_allows_normal {
stats.get_me_fair_scheduler_rounds_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_fair_events_total{{event=\"deficit_grant\"}} {}",
if me_allows_normal {
stats.get_me_fair_deficit_grants_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_fair_events_total{{event=\"deficit_skip\"}} {}",
if me_allows_normal {
stats.get_me_fair_deficit_skips_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_fair_events_total{{event=\"enqueue_reject\"}} {}",
if me_allows_normal {
stats.get_me_fair_enqueue_rejects_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_fair_events_total{{event=\"shed_drop\"}} {}",
if me_allows_normal {
stats.get_me_fair_shed_drops_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_fair_events_total{{event=\"penalty\"}} {}",
if me_allows_normal {
stats.get_me_fair_penalties_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_fair_events_total{{event=\"downstream_stall\"}} {}",
if me_allows_normal {
stats.get_me_fair_downstream_stalls_total()
} else {
0
}
);
let _ = writeln!(
out,

View File

@@ -316,7 +316,9 @@ where
stats.increment_user_connects(user);
let _direct_connection_lease = stats.acquire_direct_connection_lease();
let traffic_lease = shared.traffic_limiter.acquire_lease(user, success.peer.ip());
let traffic_lease = shared
.traffic_limiter
.acquire_lease(user, success.peer.ip());
let buffer_pool_trim = Arc::clone(&buffer_pool);
let relay_activity_timeout = if shared.conntrack_pressure_active() {

View File

@@ -289,17 +289,9 @@ impl<S> StatsIo<S> {
let Some(started_at) = wait.started_at.take() else {
return;
};
let wait_ms = started_at
.elapsed()
.as_millis()
.min(u128::from(u64::MAX)) as u64;
let wait_ms = started_at.elapsed().as_millis().min(u128::from(u64::MAX)) as u64;
if let Some(lease) = lease {
lease.observe_wait_ms(
direction,
wait.blocked_user,
wait.blocked_cidr,
wait_ms,
);
lease.observe_wait_ms(direction, wait.blocked_user, wait.blocked_cidr, wait_ms);
}
wait.blocked_user = false;
wait.blocked_cidr = false;
@@ -340,8 +332,7 @@ impl<S> StatsIo<S> {
while self.c2s_rate_debt_bytes > 0 {
let consume = lease.try_consume(RateDirection::Up, self.c2s_rate_debt_bytes);
if consume.granted > 0 {
self.c2s_rate_debt_bytes =
self.c2s_rate_debt_bytes.saturating_sub(consume.granted);
self.c2s_rate_debt_bytes = self.c2s_rate_debt_bytes.saturating_sub(consume.granted);
continue;
}
Self::arm_wait(
@@ -647,7 +638,10 @@ impl<S: AsyncWrite + Unpin> AsyncWrite for StatsIo<S> {
match Pin::new(&mut this.inner).poll_write(cx, write_buf) {
Poll::Ready(Ok(n)) => {
if reserved_bytes > n as u64 {
refund_reserved_quota_bytes(this.user_stats.as_ref(), reserved_bytes - n as u64);
refund_reserved_quota_bytes(
this.user_stats.as_ref(),
reserved_bytes - n as u64,
);
}
if shaper_reserved_bytes > n as u64
&& let Some(lease) = this.traffic_lease.as_ref()

View File

@@ -74,7 +74,8 @@ impl ScopeMetrics {
self.wait_up_ms_total.fetch_add(wait_ms, Ordering::Relaxed);
}
RateDirection::Down => {
self.wait_down_ms_total.fetch_add(wait_ms, Ordering::Relaxed);
self.wait_down_ms_total
.fetch_add(wait_ms, Ordering::Relaxed);
}
}
}
@@ -254,9 +255,7 @@ impl CidrDirectionBucket {
let grant = if guaranteed_remaining > 0 {
requested.min(guaranteed_remaining).min(total_remaining)
} else {
requested
.min(total_remaining)
.min(MAX_BORROW_CHUNK_BYTES)
requested.min(total_remaining).min(MAX_BORROW_CHUNK_BYTES)
};
if grant == 0 {
@@ -266,12 +265,7 @@ impl CidrDirectionBucket {
let next_total = total_used.saturating_add(grant);
if self
.used
.compare_exchange_weak(
total_used,
next_total,
Ordering::Relaxed,
Ordering::Relaxed,
)
.compare_exchange_weak(total_used, next_total, Ordering::Relaxed, Ordering::Relaxed)
.is_ok()
{
user_state.used.fetch_add(grant, Ordering::Relaxed);
@@ -430,8 +424,14 @@ struct PolicySnapshot {
impl PolicySnapshot {
fn match_cidr(&self, ip: IpAddr) -> Option<&CidrRule> {
match ip {
IpAddr::V4(_) => self.cidr_rules_v4.iter().find(|rule| rule.cidr.contains(ip)),
IpAddr::V6(_) => self.cidr_rules_v6.iter().find(|rule| rule.cidr.contains(ip)),
IpAddr::V4(_) => self
.cidr_rules_v4
.iter()
.find(|rule| rule.cidr.contains(ip)),
IpAddr::V6(_) => self
.cidr_rules_v6
.iter()
.find(|rule| rule.cidr.contains(ip)),
}
}
}
@@ -535,7 +535,8 @@ impl TrafficLease {
if let (Some(cidr_bucket), Some(cidr_user_share)) =
(self.cidr_bucket.as_ref(), self.cidr_user_share.as_ref())
{
let cidr_granted = cidr_bucket.try_consume_for_user(direction, cidr_user_share, granted);
let cidr_granted =
cidr_bucket.try_consume_for_user(direction, cidr_user_share, granted);
if cidr_granted < granted
&& let Some(user_bucket) = self.user_bucket.as_ref()
{
@@ -693,7 +694,9 @@ impl TrafficLimiter {
.get_or_insert_with(user, || UserBucket::new(limit));
bucket.set_rates(limit);
bucket.active_leases.fetch_add(1, Ordering::Relaxed);
self.user_scope.active_leases.fetch_add(1, Ordering::Relaxed);
self.user_scope
.active_leases
.fetch_add(1, Ordering::Relaxed);
user_bucket = Some(bucket);
}
@@ -706,7 +709,9 @@ impl TrafficLimiter {
.get_or_insert_with(rule.key.as_str(), || CidrBucket::new(rule.limits));
bucket.set_rates(rule.limits);
bucket.active_leases.fetch_add(1, Ordering::Relaxed);
self.cidr_scope.active_leases.fetch_add(1, Ordering::Relaxed);
self.cidr_scope
.active_leases
.fetch_add(1, Ordering::Relaxed);
let share = bucket.acquire_user_share(user);
cidr_user_key = Some(user.to_string());
cidr_user_share = Some(share);
@@ -784,7 +789,8 @@ impl TrafficLimiter {
let policy = self.policy.load_full();
self.user_buckets.retain(|user, bucket| {
bucket.active_leases.load(Ordering::Relaxed) > 0 || policy.user_limits.contains_key(user)
bucket.active_leases.load(Ordering::Relaxed) > 0
|| policy.user_limits.contains_key(user)
});
self.cidr_buckets.retain(|cidr_key, bucket| {
bucket.cleanup_idle_users();

View File

@@ -175,6 +175,18 @@ pub struct Stats {
me_route_drop_queue_full: AtomicU64,
me_route_drop_queue_full_base: AtomicU64,
me_route_drop_queue_full_high: AtomicU64,
me_fair_pressure_state_gauge: AtomicU64,
me_fair_active_flows_gauge: AtomicU64,
me_fair_queued_bytes_gauge: AtomicU64,
me_fair_standing_flows_gauge: AtomicU64,
me_fair_backpressured_flows_gauge: AtomicU64,
me_fair_scheduler_rounds_total: AtomicU64,
me_fair_deficit_grants_total: AtomicU64,
me_fair_deficit_skips_total: AtomicU64,
me_fair_enqueue_rejects_total: AtomicU64,
me_fair_shed_drops_total: AtomicU64,
me_fair_penalties_total: AtomicU64,
me_fair_downstream_stalls_total: AtomicU64,
me_d2c_batches_total: AtomicU64,
me_d2c_batch_frames_total: AtomicU64,
me_d2c_batch_bytes_total: AtomicU64,
@@ -856,6 +868,78 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn set_me_fair_pressure_state_gauge(&self, value: u64) {
if self.telemetry_me_allows_normal() {
self.me_fair_pressure_state_gauge
.store(value, Ordering::Relaxed);
}
}
pub fn set_me_fair_active_flows_gauge(&self, value: u64) {
if self.telemetry_me_allows_normal() {
self.me_fair_active_flows_gauge
.store(value, Ordering::Relaxed);
}
}
pub fn set_me_fair_queued_bytes_gauge(&self, value: u64) {
if self.telemetry_me_allows_normal() {
self.me_fair_queued_bytes_gauge
.store(value, Ordering::Relaxed);
}
}
pub fn set_me_fair_standing_flows_gauge(&self, value: u64) {
if self.telemetry_me_allows_normal() {
self.me_fair_standing_flows_gauge
.store(value, Ordering::Relaxed);
}
}
pub fn set_me_fair_backpressured_flows_gauge(&self, value: u64) {
if self.telemetry_me_allows_normal() {
self.me_fair_backpressured_flows_gauge
.store(value, Ordering::Relaxed);
}
}
pub fn add_me_fair_scheduler_rounds_total(&self, value: u64) {
if self.telemetry_me_allows_normal() && value > 0 {
self.me_fair_scheduler_rounds_total
.fetch_add(value, Ordering::Relaxed);
}
}
pub fn add_me_fair_deficit_grants_total(&self, value: u64) {
if self.telemetry_me_allows_normal() && value > 0 {
self.me_fair_deficit_grants_total
.fetch_add(value, Ordering::Relaxed);
}
}
pub fn add_me_fair_deficit_skips_total(&self, value: u64) {
if self.telemetry_me_allows_normal() && value > 0 {
self.me_fair_deficit_skips_total
.fetch_add(value, Ordering::Relaxed);
}
}
pub fn add_me_fair_enqueue_rejects_total(&self, value: u64) {
if self.telemetry_me_allows_normal() && value > 0 {
self.me_fair_enqueue_rejects_total
.fetch_add(value, Ordering::Relaxed);
}
}
pub fn add_me_fair_shed_drops_total(&self, value: u64) {
if self.telemetry_me_allows_normal() && value > 0 {
self.me_fair_shed_drops_total
.fetch_add(value, Ordering::Relaxed);
}
}
pub fn add_me_fair_penalties_total(&self, value: u64) {
if self.telemetry_me_allows_normal() && value > 0 {
self.me_fair_penalties_total
.fetch_add(value, Ordering::Relaxed);
}
}
pub fn add_me_fair_downstream_stalls_total(&self, value: u64) {
if self.telemetry_me_allows_normal() && value > 0 {
self.me_fair_downstream_stalls_total
.fetch_add(value, Ordering::Relaxed);
}
}
pub fn increment_me_d2c_batches_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_d2c_batches_total.fetch_add(1, Ordering::Relaxed);
@@ -1806,6 +1890,43 @@ impl Stats {
pub fn get_me_route_drop_queue_full_high(&self) -> u64 {
self.me_route_drop_queue_full_high.load(Ordering::Relaxed)
}
pub fn get_me_fair_pressure_state_gauge(&self) -> u64 {
self.me_fair_pressure_state_gauge.load(Ordering::Relaxed)
}
pub fn get_me_fair_active_flows_gauge(&self) -> u64 {
self.me_fair_active_flows_gauge.load(Ordering::Relaxed)
}
pub fn get_me_fair_queued_bytes_gauge(&self) -> u64 {
self.me_fair_queued_bytes_gauge.load(Ordering::Relaxed)
}
pub fn get_me_fair_standing_flows_gauge(&self) -> u64 {
self.me_fair_standing_flows_gauge.load(Ordering::Relaxed)
}
pub fn get_me_fair_backpressured_flows_gauge(&self) -> u64 {
self.me_fair_backpressured_flows_gauge
.load(Ordering::Relaxed)
}
pub fn get_me_fair_scheduler_rounds_total(&self) -> u64 {
self.me_fair_scheduler_rounds_total.load(Ordering::Relaxed)
}
pub fn get_me_fair_deficit_grants_total(&self) -> u64 {
self.me_fair_deficit_grants_total.load(Ordering::Relaxed)
}
pub fn get_me_fair_deficit_skips_total(&self) -> u64 {
self.me_fair_deficit_skips_total.load(Ordering::Relaxed)
}
pub fn get_me_fair_enqueue_rejects_total(&self) -> u64 {
self.me_fair_enqueue_rejects_total.load(Ordering::Relaxed)
}
pub fn get_me_fair_shed_drops_total(&self) -> u64 {
self.me_fair_shed_drops_total.load(Ordering::Relaxed)
}
pub fn get_me_fair_penalties_total(&self) -> u64 {
self.me_fair_penalties_total.load(Ordering::Relaxed)
}
pub fn get_me_fair_downstream_stalls_total(&self) -> u64 {
self.me_fair_downstream_stalls_total.load(Ordering::Relaxed)
}
pub fn get_me_d2c_batches_total(&self) -> u64 {
self.me_d2c_batches_total.load(Ordering::Relaxed)
}

View File

@@ -66,9 +66,19 @@ fn ensure_payload_capacity(mut sizes: Vec<usize>, payload_len: usize) -> Vec<usi
fn emulated_app_data_sizes(cached: &CachedTlsData) -> Vec<usize> {
match cached.behavior_profile.source {
TlsProfileSource::Raw | TlsProfileSource::Merged => {
if !cached.behavior_profile.app_data_record_sizes.is_empty() {
return cached.behavior_profile.app_data_record_sizes.clone();
}
return cached
.app_data_records_sizes
.first()
.copied()
.or_else(|| {
cached
.behavior_profile
.app_data_record_sizes
.first()
.copied()
})
.map(|size| vec![size])
.unwrap_or_else(|| vec![cached.total_app_data_len.max(1024)]);
}
TlsProfileSource::Default | TlsProfileSource::Rustls => {}
}
@@ -80,8 +90,8 @@ fn emulated_app_data_sizes(cached: &CachedTlsData) -> Vec<usize> {
sizes
}
fn emulated_change_cipher_spec_count(cached: &CachedTlsData) -> usize {
usize::from(cached.behavior_profile.change_cipher_spec_count.max(1))
fn emulated_change_cipher_spec_count(_cached: &CachedTlsData) -> usize {
1
}
fn emulated_ticket_record_sizes(
@@ -89,17 +99,20 @@ fn emulated_ticket_record_sizes(
new_session_tickets: u8,
rng: &SecureRandom,
) -> Vec<usize> {
let mut sizes = match cached.behavior_profile.source {
let target_count = usize::from(new_session_tickets.min(MAX_TICKET_RECORDS as u8));
if target_count == 0 {
return Vec::new();
}
let profiled_sizes = match cached.behavior_profile.source {
TlsProfileSource::Raw | TlsProfileSource::Merged => {
cached.behavior_profile.ticket_record_sizes.clone()
cached.behavior_profile.ticket_record_sizes.as_slice()
}
TlsProfileSource::Default | TlsProfileSource::Rustls => Vec::new(),
TlsProfileSource::Default | TlsProfileSource::Rustls => &[],
};
let target_count = sizes
.len()
.max(usize::from(new_session_tickets.min(MAX_TICKET_RECORDS as u8)))
.min(MAX_TICKET_RECORDS);
let mut sizes = Vec::with_capacity(target_count);
sizes.extend(profiled_sizes.iter().copied().take(target_count));
while sizes.len() < target_count {
sizes.push(rng.range(48) + 48);
@@ -240,7 +253,18 @@ pub fn build_emulated_server_hello(
}
// --- ApplicationData (fake encrypted records) ---
let mut sizes = jitter_and_clamp_sizes(&emulated_app_data_sizes(cached), rng);
let mut sizes = {
let base_sizes = emulated_app_data_sizes(cached);
match cached.behavior_profile.source {
TlsProfileSource::Raw | TlsProfileSource::Merged => base_sizes
.into_iter()
.map(|size| size.clamp(MIN_APP_DATA, MAX_APP_DATA))
.collect(),
TlsProfileSource::Default | TlsProfileSource::Rustls => {
jitter_and_clamp_sizes(&base_sizes, rng)
}
}
};
let compact_payload = cached
.cert_info
.as_ref()
@@ -329,11 +353,11 @@ pub fn build_emulated_server_hello(
let mut tickets = Vec::new();
for ticket_len in emulated_ticket_record_sizes(cached, new_session_tickets, rng) {
let mut rec = Vec::with_capacity(5 + ticket_len);
rec.push(TLS_RECORD_APPLICATION);
rec.extend_from_slice(&TLS_VERSION);
rec.extend_from_slice(&(ticket_len as u16).to_be_bytes());
rec.extend_from_slice(&rng.bytes(ticket_len));
tickets.extend_from_slice(&rec);
rec.push(TLS_RECORD_APPLICATION);
rec.extend_from_slice(&TLS_VERSION);
rec.extend_from_slice(&(ticket_len as u16).to_be_bytes());
rec.extend_from_slice(&rng.bytes(ticket_len));
tickets.extend_from_slice(&rec);
}
let mut response = Vec::with_capacity(
@@ -506,7 +530,7 @@ mod tests {
}
#[test]
fn test_build_emulated_server_hello_replays_tail_records_for_profiled_tls() {
fn test_build_emulated_server_hello_ignores_tail_records_for_profiled_tls() {
let mut cached = make_cached(None);
cached.app_data_records_sizes = vec![27, 3905, 537, 69];
cached.total_app_data_len = 4538;
@@ -528,19 +552,11 @@ mod tests {
let hello_len = u16::from_be_bytes([response[3], response[4]]) as usize;
let ccs_start = 5 + hello_len;
let mut pos = ccs_start + 6;
let mut app_lengths = Vec::new();
while pos + 5 <= response.len() {
assert_eq!(response[pos], TLS_RECORD_APPLICATION);
let record_len = u16::from_be_bytes([response[pos + 3], response[pos + 4]]) as usize;
app_lengths.push(record_len);
pos += 5 + record_len;
}
assert_eq!(app_lengths.len(), 4);
assert_eq!(app_lengths[0], 64);
assert_eq!(app_lengths[3], 69);
assert!(app_lengths[1] >= 64);
assert!(app_lengths[2] >= 64);
let app_start = ccs_start + 6;
let app_len =
u16::from_be_bytes([response[app_start + 3], response[app_start + 4]]) as usize;
assert_eq!(response[app_start], TLS_RECORD_APPLICATION);
assert_eq!(app_len, 64);
assert_eq!(app_start + 5 + app_len, response.len());
}
}

View File

@@ -1,6 +1,7 @@
#![allow(clippy::too_many_arguments)]
use dashmap::DashMap;
use std::net::SocketAddr;
use std::sync::Arc;
use std::sync::OnceLock;
use std::time::{Duration, Instant};
@@ -793,6 +794,51 @@ async fn connect_tcp_with_upstream(
))
}
fn socket_addrs_from_upstream_stream(
stream: &UpstreamStream,
) -> (Option<SocketAddr>, Option<SocketAddr>) {
match stream {
UpstreamStream::Tcp(tcp) => (tcp.local_addr().ok(), tcp.peer_addr().ok()),
UpstreamStream::Shadowsocks(_) => (None, None),
}
}
fn build_tls_fetch_proxy_header(
proxy_protocol: u8,
src_addr: Option<SocketAddr>,
dst_addr: Option<SocketAddr>,
) -> Option<Vec<u8>> {
match proxy_protocol {
0 => None,
2 => {
let header = match (src_addr, dst_addr) {
(Some(src @ SocketAddr::V4(_)), Some(dst @ SocketAddr::V4(_)))
| (Some(src @ SocketAddr::V6(_)), Some(dst @ SocketAddr::V6(_))) => {
ProxyProtocolV2Builder::new().with_addrs(src, dst).build()
}
_ => ProxyProtocolV2Builder::new().build(),
};
Some(header)
}
_ => {
let header = match (src_addr, dst_addr) {
(Some(SocketAddr::V4(src)), Some(SocketAddr::V4(dst))) => {
ProxyProtocolV1Builder::new()
.tcp4(src.into(), dst.into())
.build()
}
(Some(SocketAddr::V6(src)), Some(SocketAddr::V6(dst))) => {
ProxyProtocolV1Builder::new()
.tcp6(src.into(), dst.into())
.build()
}
_ => ProxyProtocolV1Builder::new().build(),
};
Some(header)
}
}
}
fn encode_tls13_certificate_message(cert_chain_der: &[Vec<u8>]) -> Option<Vec<u8>> {
if cert_chain_der.is_empty() {
return None;
@@ -824,7 +870,7 @@ async fn fetch_via_raw_tls_stream<S>(
mut stream: S,
sni: &str,
connect_timeout: Duration,
proxy_protocol: u8,
proxy_header: Option<Vec<u8>>,
profile: TlsFetchProfile,
grease_enabled: bool,
deterministic: bool,
@@ -835,11 +881,7 @@ where
let rng = SecureRandom::new();
let client_hello = build_client_hello(sni, &rng, profile, grease_enabled, deterministic);
timeout(connect_timeout, async {
if proxy_protocol > 0 {
let header = match proxy_protocol {
2 => ProxyProtocolV2Builder::new().build(),
_ => ProxyProtocolV1Builder::new().build(),
};
if let Some(header) = proxy_header.as_ref() {
stream.write_all(&header).await?;
}
stream.write_all(&client_hello).await?;
@@ -921,11 +963,12 @@ async fn fetch_via_raw_tls(
sock = %sock_path,
"Raw TLS fetch using mask unix socket"
);
let proxy_header = build_tls_fetch_proxy_header(proxy_protocol, None, None);
return fetch_via_raw_tls_stream(
stream,
sni,
connect_timeout,
proxy_protocol,
proxy_header,
profile,
grease_enabled,
deterministic,
@@ -956,11 +999,13 @@ async fn fetch_via_raw_tls(
let stream =
connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope, strict_route)
.await?;
let (src_addr, dst_addr) = socket_addrs_from_upstream_stream(&stream);
let proxy_header = build_tls_fetch_proxy_header(proxy_protocol, src_addr, dst_addr);
fetch_via_raw_tls_stream(
stream,
sni,
connect_timeout,
proxy_protocol,
proxy_header,
profile,
grease_enabled,
deterministic,
@@ -972,17 +1017,13 @@ async fn fetch_via_rustls_stream<S>(
mut stream: S,
host: &str,
sni: &str,
proxy_protocol: u8,
proxy_header: Option<Vec<u8>>,
) -> Result<TlsFetchResult>
where
S: AsyncRead + AsyncWrite + Unpin,
{
// rustls handshake path for certificate and basic negotiated metadata.
if proxy_protocol > 0 {
let header = match proxy_protocol {
2 => ProxyProtocolV2Builder::new().build(),
_ => ProxyProtocolV1Builder::new().build(),
};
if let Some(header) = proxy_header.as_ref() {
stream.write_all(&header).await?;
stream.flush().await?;
}
@@ -1082,7 +1123,8 @@ async fn fetch_via_rustls(
sock = %sock_path,
"Rustls fetch using mask unix socket"
);
return fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await;
let proxy_header = build_tls_fetch_proxy_header(proxy_protocol, None, None);
return fetch_via_rustls_stream(stream, host, sni, proxy_header).await;
}
Ok(Err(e)) => {
warn!(
@@ -1108,7 +1150,9 @@ async fn fetch_via_rustls(
let stream =
connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope, strict_route)
.await?;
fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await
let (src_addr, dst_addr) = socket_addrs_from_upstream_stream(&stream);
let proxy_header = build_tls_fetch_proxy_header(proxy_protocol, src_addr, dst_addr);
fetch_via_rustls_stream(stream, host, sni, proxy_header).await
}
/// Fetch real TLS metadata with an adaptive multi-profile strategy.
@@ -1278,11 +1322,13 @@ pub async fn fetch_real_tls(
#[cfg(test)]
mod tests {
use std::net::SocketAddr;
use std::time::{Duration, Instant};
use super::{
ProfileCacheValue, TlsFetchStrategy, build_client_hello, derive_behavior_profile,
encode_tls13_certificate_message, order_profiles, profile_cache, profile_cache_key,
ProfileCacheValue, TlsFetchStrategy, build_client_hello, build_tls_fetch_proxy_header,
derive_behavior_profile, encode_tls13_certificate_message, order_profiles, profile_cache,
profile_cache_key,
};
use crate::config::TlsFetchProfile;
use crate::crypto::SecureRandom;
@@ -1423,4 +1469,48 @@ mod tests {
assert_eq!(first, second);
}
#[test]
fn test_build_tls_fetch_proxy_header_v2_with_tcp_addrs() {
let src: SocketAddr = "198.51.100.10:42000".parse().expect("valid src");
let dst: SocketAddr = "203.0.113.20:443".parse().expect("valid dst");
let header = build_tls_fetch_proxy_header(2, Some(src), Some(dst)).expect("header");
assert_eq!(
&header[..12],
&[
0x0d, 0x0a, 0x0d, 0x0a, 0x00, 0x0d, 0x0a, 0x51, 0x55, 0x49, 0x54, 0x0a
]
);
assert_eq!(header[12], 0x21);
assert_eq!(header[13], 0x11);
assert_eq!(u16::from_be_bytes([header[14], header[15]]), 12);
assert_eq!(&header[16..20], &[198, 51, 100, 10]);
assert_eq!(&header[20..24], &[203, 0, 113, 20]);
assert_eq!(u16::from_be_bytes([header[24], header[25]]), 42000);
assert_eq!(u16::from_be_bytes([header[26], header[27]]), 443);
}
#[test]
fn test_build_tls_fetch_proxy_header_v2_mixed_family_falls_back_to_local_command() {
let src: SocketAddr = "198.51.100.10:42000".parse().expect("valid src");
let dst: SocketAddr = "[2001:db8::20]:443".parse().expect("valid dst");
let header = build_tls_fetch_proxy_header(2, Some(src), Some(dst)).expect("header");
assert_eq!(header[12], 0x20);
assert_eq!(header[13], 0x00);
assert_eq!(u16::from_be_bytes([header[14], header[15]]), 0);
}
#[test]
fn test_build_tls_fetch_proxy_header_v1_with_tcp_addrs() {
let src: SocketAddr = "198.51.100.10:42000".parse().expect("valid src");
let dst: SocketAddr = "203.0.113.20:443".parse().expect("valid dst");
let header = build_tls_fetch_proxy_header(1, Some(src), Some(dst)).expect("header");
assert_eq!(
header,
b"PROXY TCP4 198.51.100.10 203.0.113.20 42000 443\r\n"
);
}
}

View File

@@ -52,7 +52,7 @@ fn record_lengths_by_type(response: &[u8], wanted_type: u8) -> Vec<usize> {
}
#[test]
fn emulated_server_hello_replays_profile_change_cipher_spec_count() {
fn emulated_server_hello_keeps_single_change_cipher_spec_for_client_compatibility() {
let cached = make_cached();
let rng = SecureRandom::new();
@@ -69,12 +69,12 @@ fn emulated_server_hello_replays_profile_change_cipher_spec_count() {
assert_eq!(response[0], TLS_RECORD_HANDSHAKE);
let ccs_records = record_lengths_by_type(&response, TLS_RECORD_CHANGE_CIPHER);
assert_eq!(ccs_records.len(), 2);
assert_eq!(ccs_records.len(), 1);
assert!(ccs_records.iter().all(|len| *len == 1));
}
#[test]
fn emulated_server_hello_replays_profile_ticket_tail_lengths() {
fn emulated_server_hello_does_not_emit_profile_ticket_tail_when_disabled() {
let cached = make_cached();
let rng = SecureRandom::new();
@@ -90,6 +90,25 @@ fn emulated_server_hello_replays_profile_ticket_tail_lengths() {
);
let app_records = record_lengths_by_type(&response, TLS_RECORD_APPLICATION);
assert!(app_records.len() >= 4);
assert_eq!(&app_records[app_records.len() - 2..], &[220, 180]);
assert_eq!(app_records, vec![1200]);
}
#[test]
fn emulated_server_hello_uses_profile_ticket_lengths_when_enabled() {
let cached = make_cached();
let rng = SecureRandom::new();
let response = build_emulated_server_hello(
b"secret",
&[0x91; 32],
&[0x92; 16],
&cached,
false,
&rng,
None,
2,
);
let app_records = record_lengths_by_type(&response, TLS_RECORD_APPLICATION);
assert_eq!(app_records, vec![1200, 220, 180]);
}

View File

@@ -0,0 +1,13 @@
//! Backpressure-driven fairness control for ME reader routing.
//!
//! This module keeps fairness decisions worker-local:
//! each reader loop owns one scheduler instance and mutates it without locks.
mod model;
mod pressure;
mod scheduler;
#[cfg(test)]
pub(crate) use model::PressureState;
pub(crate) use model::{AdmissionDecision, DispatchAction, DispatchFeedback, SchedulerDecision};
pub(crate) use scheduler::{WorkerFairnessConfig, WorkerFairnessSnapshot, WorkerFairnessState};

View File

@@ -0,0 +1,140 @@
use std::time::Instant;
use bytes::Bytes;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[repr(u8)]
pub(crate) enum PressureState {
Normal = 0,
Pressured = 1,
Shedding = 2,
Saturated = 3,
}
impl PressureState {
pub(crate) fn as_u8(self) -> u8 {
self as u8
}
}
impl Default for PressureState {
fn default() -> Self {
Self::Normal
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum FlowPressureClass {
Healthy,
Bursty,
Backpressured,
Standing,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum StandingQueueState {
Transient,
Standing,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum FlowSchedulerState {
Idle,
Active,
Backpressured,
Penalized,
SheddingCandidate,
}
#[derive(Debug, Clone)]
pub(crate) struct QueuedFrame {
pub(crate) conn_id: u64,
pub(crate) flags: u32,
pub(crate) data: Bytes,
pub(crate) enqueued_at: Instant,
}
impl QueuedFrame {
#[inline]
pub(crate) fn queued_bytes(&self) -> u64 {
self.data.len() as u64
}
}
#[derive(Debug, Clone)]
pub(crate) struct FlowFairnessState {
pub(crate) _flow_id: u64,
pub(crate) _worker_id: u16,
pub(crate) pending_bytes: u64,
pub(crate) deficit_bytes: i64,
pub(crate) queue_started_at: Option<Instant>,
pub(crate) last_drain_at: Option<Instant>,
pub(crate) recent_drain_bytes: u64,
pub(crate) consecutive_stalls: u8,
pub(crate) consecutive_skips: u8,
pub(crate) penalty_score: u16,
pub(crate) pressure_class: FlowPressureClass,
pub(crate) standing_state: StandingQueueState,
pub(crate) scheduler_state: FlowSchedulerState,
pub(crate) bucket_id: usize,
pub(crate) in_active_ring: bool,
}
impl FlowFairnessState {
pub(crate) fn new(flow_id: u64, worker_id: u16, bucket_id: usize) -> Self {
Self {
_flow_id: flow_id,
_worker_id: worker_id,
pending_bytes: 0,
deficit_bytes: 0,
queue_started_at: None,
last_drain_at: None,
recent_drain_bytes: 0,
consecutive_stalls: 0,
consecutive_skips: 0,
penalty_score: 0,
pressure_class: FlowPressureClass::Healthy,
standing_state: StandingQueueState::Transient,
scheduler_state: FlowSchedulerState::Idle,
bucket_id,
in_active_ring: false,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum AdmissionDecision {
Admit,
RejectWorkerCap,
RejectFlowCap,
RejectBucketCap,
RejectSaturated,
RejectStandingFlow,
}
#[derive(Debug, Clone)]
pub(crate) enum SchedulerDecision {
Idle,
Dispatch(DispatchCandidate),
}
#[derive(Debug, Clone)]
pub(crate) struct DispatchCandidate {
pub(crate) frame: QueuedFrame,
pub(crate) pressure_state: PressureState,
pub(crate) flow_class: FlowPressureClass,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum DispatchFeedback {
Routed,
QueueFull,
ChannelClosed,
NoConn,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum DispatchAction {
Continue,
CloseFlow,
}

View File

@@ -0,0 +1,214 @@
use std::time::{Duration, Instant};
use super::model::PressureState;
#[derive(Debug, Clone, Copy)]
pub(crate) struct PressureSignals {
pub(crate) active_flows: usize,
pub(crate) total_queued_bytes: u64,
pub(crate) standing_flows: usize,
pub(crate) backpressured_flows: usize,
}
#[derive(Debug, Clone)]
pub(crate) struct PressureConfig {
pub(crate) evaluate_every_rounds: u32,
pub(crate) transition_hysteresis_rounds: u8,
pub(crate) standing_ratio_pressured_pct: u8,
pub(crate) standing_ratio_shedding_pct: u8,
pub(crate) standing_ratio_saturated_pct: u8,
pub(crate) queue_ratio_pressured_pct: u8,
pub(crate) queue_ratio_shedding_pct: u8,
pub(crate) queue_ratio_saturated_pct: u8,
pub(crate) reject_window: Duration,
pub(crate) rejects_pressured: u32,
pub(crate) rejects_shedding: u32,
pub(crate) rejects_saturated: u32,
pub(crate) stalls_pressured: u32,
pub(crate) stalls_shedding: u32,
pub(crate) stalls_saturated: u32,
}
impl Default for PressureConfig {
fn default() -> Self {
Self {
evaluate_every_rounds: 8,
transition_hysteresis_rounds: 3,
standing_ratio_pressured_pct: 20,
standing_ratio_shedding_pct: 35,
standing_ratio_saturated_pct: 50,
queue_ratio_pressured_pct: 65,
queue_ratio_shedding_pct: 82,
queue_ratio_saturated_pct: 94,
reject_window: Duration::from_secs(2),
rejects_pressured: 32,
rejects_shedding: 96,
rejects_saturated: 256,
stalls_pressured: 32,
stalls_shedding: 96,
stalls_saturated: 256,
}
}
}
#[derive(Debug)]
pub(crate) struct PressureEvaluator {
state: PressureState,
candidate_state: PressureState,
candidate_hits: u8,
rounds_since_eval: u32,
window_started_at: Instant,
admission_rejects_window: u32,
route_stalls_window: u32,
}
impl PressureEvaluator {
pub(crate) fn new(now: Instant) -> Self {
Self {
state: PressureState::Normal,
candidate_state: PressureState::Normal,
candidate_hits: 0,
rounds_since_eval: 0,
window_started_at: now,
admission_rejects_window: 0,
route_stalls_window: 0,
}
}
#[inline]
pub(crate) fn state(&self) -> PressureState {
self.state
}
pub(crate) fn note_admission_reject(&mut self, now: Instant, cfg: &PressureConfig) {
self.rotate_window_if_needed(now, cfg);
self.admission_rejects_window = self.admission_rejects_window.saturating_add(1);
}
pub(crate) fn note_route_stall(&mut self, now: Instant, cfg: &PressureConfig) {
self.rotate_window_if_needed(now, cfg);
self.route_stalls_window = self.route_stalls_window.saturating_add(1);
}
pub(crate) fn maybe_evaluate(
&mut self,
now: Instant,
cfg: &PressureConfig,
max_total_queued_bytes: u64,
signals: PressureSignals,
force: bool,
) -> PressureState {
self.rotate_window_if_needed(now, cfg);
self.rounds_since_eval = self.rounds_since_eval.saturating_add(1);
if !force && self.rounds_since_eval < cfg.evaluate_every_rounds.max(1) {
return self.state;
}
self.rounds_since_eval = 0;
let target = self.derive_target_state(cfg, max_total_queued_bytes, signals);
if target == self.state {
self.candidate_state = target;
self.candidate_hits = 0;
return self.state;
}
if self.candidate_state == target {
self.candidate_hits = self.candidate_hits.saturating_add(1);
} else {
self.candidate_state = target;
self.candidate_hits = 1;
}
if self.candidate_hits >= cfg.transition_hysteresis_rounds.max(1) {
self.state = target;
self.candidate_hits = 0;
}
self.state
}
fn derive_target_state(
&self,
cfg: &PressureConfig,
max_total_queued_bytes: u64,
signals: PressureSignals,
) -> PressureState {
let queue_ratio_pct = if max_total_queued_bytes == 0 {
100
} else {
((signals.total_queued_bytes.saturating_mul(100)) / max_total_queued_bytes).min(100)
as u8
};
let standing_ratio_pct = if signals.active_flows == 0 {
0
} else {
((signals.standing_flows.saturating_mul(100)) / signals.active_flows).min(100) as u8
};
let mut pressure_score = 0u8;
if queue_ratio_pct >= cfg.queue_ratio_pressured_pct {
pressure_score = pressure_score.max(1);
}
if queue_ratio_pct >= cfg.queue_ratio_shedding_pct {
pressure_score = pressure_score.max(2);
}
if queue_ratio_pct >= cfg.queue_ratio_saturated_pct {
pressure_score = pressure_score.max(3);
}
if standing_ratio_pct >= cfg.standing_ratio_pressured_pct {
pressure_score = pressure_score.max(1);
}
if standing_ratio_pct >= cfg.standing_ratio_shedding_pct {
pressure_score = pressure_score.max(2);
}
if standing_ratio_pct >= cfg.standing_ratio_saturated_pct {
pressure_score = pressure_score.max(3);
}
if self.admission_rejects_window >= cfg.rejects_pressured {
pressure_score = pressure_score.max(1);
}
if self.admission_rejects_window >= cfg.rejects_shedding {
pressure_score = pressure_score.max(2);
}
if self.admission_rejects_window >= cfg.rejects_saturated {
pressure_score = pressure_score.max(3);
}
if self.route_stalls_window >= cfg.stalls_pressured {
pressure_score = pressure_score.max(1);
}
if self.route_stalls_window >= cfg.stalls_shedding {
pressure_score = pressure_score.max(2);
}
if self.route_stalls_window >= cfg.stalls_saturated {
pressure_score = pressure_score.max(3);
}
if signals.backpressured_flows > signals.active_flows.saturating_div(2)
&& signals.active_flows > 0
{
pressure_score = pressure_score.max(2);
}
match pressure_score {
0 => PressureState::Normal,
1 => PressureState::Pressured,
2 => PressureState::Shedding,
_ => PressureState::Saturated,
}
}
fn rotate_window_if_needed(&mut self, now: Instant, cfg: &PressureConfig) {
if now.saturating_duration_since(self.window_started_at) < cfg.reject_window {
return;
}
self.window_started_at = now;
self.admission_rejects_window = 0;
self.route_stalls_window = 0;
}
}

View File

@@ -0,0 +1,556 @@
use std::collections::{HashMap, VecDeque};
use std::time::{Duration, Instant};
use bytes::Bytes;
use super::model::{
AdmissionDecision, DispatchAction, DispatchCandidate, DispatchFeedback, FlowFairnessState,
FlowPressureClass, FlowSchedulerState, PressureState, QueuedFrame, SchedulerDecision,
StandingQueueState,
};
use super::pressure::{PressureConfig, PressureEvaluator, PressureSignals};
#[derive(Debug, Clone)]
pub(crate) struct WorkerFairnessConfig {
pub(crate) worker_id: u16,
pub(crate) max_active_flows: usize,
pub(crate) max_total_queued_bytes: u64,
pub(crate) max_flow_queued_bytes: u64,
pub(crate) base_quantum_bytes: u32,
pub(crate) pressured_quantum_bytes: u32,
pub(crate) penalized_quantum_bytes: u32,
pub(crate) standing_queue_min_age: Duration,
pub(crate) standing_queue_min_backlog_bytes: u64,
pub(crate) standing_stall_threshold: u8,
pub(crate) max_consecutive_stalls_before_shed: u8,
pub(crate) max_consecutive_stalls_before_close: u8,
pub(crate) soft_bucket_count: usize,
pub(crate) soft_bucket_share_pct: u8,
pub(crate) pressure: PressureConfig,
}
impl Default for WorkerFairnessConfig {
fn default() -> Self {
Self {
worker_id: 0,
max_active_flows: 4096,
max_total_queued_bytes: 16 * 1024 * 1024,
max_flow_queued_bytes: 512 * 1024,
base_quantum_bytes: 32 * 1024,
pressured_quantum_bytes: 16 * 1024,
penalized_quantum_bytes: 8 * 1024,
standing_queue_min_age: Duration::from_millis(250),
standing_queue_min_backlog_bytes: 64 * 1024,
standing_stall_threshold: 3,
max_consecutive_stalls_before_shed: 4,
max_consecutive_stalls_before_close: 16,
soft_bucket_count: 64,
soft_bucket_share_pct: 25,
pressure: PressureConfig::default(),
}
}
}
struct FlowEntry {
fairness: FlowFairnessState,
queue: VecDeque<QueuedFrame>,
}
impl FlowEntry {
fn new(flow_id: u64, worker_id: u16, bucket_id: usize) -> Self {
Self {
fairness: FlowFairnessState::new(flow_id, worker_id, bucket_id),
queue: VecDeque::new(),
}
}
}
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct WorkerFairnessSnapshot {
pub(crate) pressure_state: PressureState,
pub(crate) active_flows: usize,
pub(crate) total_queued_bytes: u64,
pub(crate) standing_flows: usize,
pub(crate) backpressured_flows: usize,
pub(crate) scheduler_rounds: u64,
pub(crate) deficit_grants: u64,
pub(crate) deficit_skips: u64,
pub(crate) enqueue_rejects: u64,
pub(crate) shed_drops: u64,
pub(crate) fairness_penalties: u64,
pub(crate) downstream_stalls: u64,
}
pub(crate) struct WorkerFairnessState {
config: WorkerFairnessConfig,
pressure: PressureEvaluator,
flows: HashMap<u64, FlowEntry>,
active_ring: VecDeque<u64>,
total_queued_bytes: u64,
bucket_queued_bytes: Vec<u64>,
bucket_active_flows: Vec<usize>,
standing_flow_count: usize,
backpressured_flow_count: usize,
scheduler_rounds: u64,
deficit_grants: u64,
deficit_skips: u64,
enqueue_rejects: u64,
shed_drops: u64,
fairness_penalties: u64,
downstream_stalls: u64,
}
impl WorkerFairnessState {
pub(crate) fn new(config: WorkerFairnessConfig, now: Instant) -> Self {
let bucket_count = config.soft_bucket_count.max(1);
Self {
config,
pressure: PressureEvaluator::new(now),
flows: HashMap::new(),
active_ring: VecDeque::new(),
total_queued_bytes: 0,
bucket_queued_bytes: vec![0; bucket_count],
bucket_active_flows: vec![0; bucket_count],
standing_flow_count: 0,
backpressured_flow_count: 0,
scheduler_rounds: 0,
deficit_grants: 0,
deficit_skips: 0,
enqueue_rejects: 0,
shed_drops: 0,
fairness_penalties: 0,
downstream_stalls: 0,
}
}
pub(crate) fn pressure_state(&self) -> PressureState {
self.pressure.state()
}
pub(crate) fn snapshot(&self) -> WorkerFairnessSnapshot {
WorkerFairnessSnapshot {
pressure_state: self.pressure.state(),
active_flows: self.flows.len(),
total_queued_bytes: self.total_queued_bytes,
standing_flows: self.standing_flow_count,
backpressured_flows: self.backpressured_flow_count,
scheduler_rounds: self.scheduler_rounds,
deficit_grants: self.deficit_grants,
deficit_skips: self.deficit_skips,
enqueue_rejects: self.enqueue_rejects,
shed_drops: self.shed_drops,
fairness_penalties: self.fairness_penalties,
downstream_stalls: self.downstream_stalls,
}
}
pub(crate) fn enqueue_data(
&mut self,
conn_id: u64,
flags: u32,
data: Bytes,
now: Instant,
) -> AdmissionDecision {
let frame = QueuedFrame {
conn_id,
flags,
data,
enqueued_at: now,
};
let frame_bytes = frame.queued_bytes();
if self.pressure.state() == PressureState::Saturated {
self.pressure
.note_admission_reject(now, &self.config.pressure);
self.enqueue_rejects = self.enqueue_rejects.saturating_add(1);
return AdmissionDecision::RejectSaturated;
}
if self.total_queued_bytes.saturating_add(frame_bytes) > self.config.max_total_queued_bytes
{
self.pressure
.note_admission_reject(now, &self.config.pressure);
self.enqueue_rejects = self.enqueue_rejects.saturating_add(1);
self.evaluate_pressure(now, true);
return AdmissionDecision::RejectWorkerCap;
}
if !self.flows.contains_key(&conn_id) && self.flows.len() >= self.config.max_active_flows {
self.pressure
.note_admission_reject(now, &self.config.pressure);
self.enqueue_rejects = self.enqueue_rejects.saturating_add(1);
self.evaluate_pressure(now, true);
return AdmissionDecision::RejectWorkerCap;
}
let bucket_id = self.bucket_for(conn_id);
let bucket_cap = self
.config
.max_total_queued_bytes
.saturating_mul(self.config.soft_bucket_share_pct.max(1) as u64)
.saturating_div(100)
.max(self.config.max_flow_queued_bytes);
if self.bucket_queued_bytes[bucket_id].saturating_add(frame_bytes) > bucket_cap {
self.pressure
.note_admission_reject(now, &self.config.pressure);
self.enqueue_rejects = self.enqueue_rejects.saturating_add(1);
self.evaluate_pressure(now, true);
return AdmissionDecision::RejectBucketCap;
}
let entry = if let Some(flow) = self.flows.get_mut(&conn_id) {
flow
} else {
self.bucket_active_flows[bucket_id] =
self.bucket_active_flows[bucket_id].saturating_add(1);
self.flows.insert(
conn_id,
FlowEntry::new(conn_id, self.config.worker_id, bucket_id),
);
self.flows
.get_mut(&conn_id)
.expect("flow inserted must be retrievable")
};
if entry.fairness.pending_bytes.saturating_add(frame_bytes)
> self.config.max_flow_queued_bytes
{
self.pressure
.note_admission_reject(now, &self.config.pressure);
self.enqueue_rejects = self.enqueue_rejects.saturating_add(1);
self.evaluate_pressure(now, true);
return AdmissionDecision::RejectFlowCap;
}
if self.pressure.state() >= PressureState::Shedding
&& entry.fairness.standing_state == StandingQueueState::Standing
{
self.pressure
.note_admission_reject(now, &self.config.pressure);
self.enqueue_rejects = self.enqueue_rejects.saturating_add(1);
self.evaluate_pressure(now, true);
return AdmissionDecision::RejectStandingFlow;
}
entry.fairness.pending_bytes = entry.fairness.pending_bytes.saturating_add(frame_bytes);
if entry.fairness.queue_started_at.is_none() {
entry.fairness.queue_started_at = Some(now);
}
entry.queue.push_back(frame);
self.total_queued_bytes = self.total_queued_bytes.saturating_add(frame_bytes);
self.bucket_queued_bytes[bucket_id] =
self.bucket_queued_bytes[bucket_id].saturating_add(frame_bytes);
if !entry.fairness.in_active_ring {
entry.fairness.in_active_ring = true;
self.active_ring.push_back(conn_id);
}
self.evaluate_pressure(now, true);
AdmissionDecision::Admit
}
pub(crate) fn next_decision(&mut self, now: Instant) -> SchedulerDecision {
self.scheduler_rounds = self.scheduler_rounds.saturating_add(1);
self.evaluate_pressure(now, false);
let active_len = self.active_ring.len();
for _ in 0..active_len {
let Some(conn_id) = self.active_ring.pop_front() else {
break;
};
let mut candidate = None;
let mut requeue_active = false;
let mut drained_bytes = 0u64;
let mut bucket_id = 0usize;
let pressure_state = self.pressure.state();
if let Some(flow) = self.flows.get_mut(&conn_id) {
bucket_id = flow.fairness.bucket_id;
if flow.queue.is_empty() {
flow.fairness.in_active_ring = false;
flow.fairness.scheduler_state = FlowSchedulerState::Idle;
flow.fairness.pending_bytes = 0;
flow.fairness.queue_started_at = None;
continue;
}
Self::classify_flow(&self.config, pressure_state, now, &mut flow.fairness);
let quantum =
Self::effective_quantum_bytes(&self.config, pressure_state, &flow.fairness);
flow.fairness.deficit_bytes = flow
.fairness
.deficit_bytes
.saturating_add(i64::from(quantum));
self.deficit_grants = self.deficit_grants.saturating_add(1);
let front_len = flow.queue.front().map_or(0, |front| front.queued_bytes());
if flow.fairness.deficit_bytes < front_len as i64 {
flow.fairness.consecutive_skips =
flow.fairness.consecutive_skips.saturating_add(1);
self.deficit_skips = self.deficit_skips.saturating_add(1);
requeue_active = true;
} else if let Some(frame) = flow.queue.pop_front() {
drained_bytes = frame.queued_bytes();
flow.fairness.pending_bytes =
flow.fairness.pending_bytes.saturating_sub(drained_bytes);
flow.fairness.deficit_bytes = flow
.fairness
.deficit_bytes
.saturating_sub(drained_bytes as i64);
flow.fairness.consecutive_skips = 0;
flow.fairness.queue_started_at =
flow.queue.front().map(|front| front.enqueued_at);
requeue_active = !flow.queue.is_empty();
if !requeue_active {
flow.fairness.scheduler_state = FlowSchedulerState::Idle;
flow.fairness.in_active_ring = false;
}
candidate = Some(DispatchCandidate {
pressure_state,
flow_class: flow.fairness.pressure_class,
frame,
});
}
}
if drained_bytes > 0 {
self.total_queued_bytes = self.total_queued_bytes.saturating_sub(drained_bytes);
self.bucket_queued_bytes[bucket_id] =
self.bucket_queued_bytes[bucket_id].saturating_sub(drained_bytes);
}
if requeue_active {
if let Some(flow) = self.flows.get_mut(&conn_id) {
flow.fairness.in_active_ring = true;
}
self.active_ring.push_back(conn_id);
}
if let Some(candidate) = candidate {
return SchedulerDecision::Dispatch(candidate);
}
}
SchedulerDecision::Idle
}
pub(crate) fn apply_dispatch_feedback(
&mut self,
conn_id: u64,
candidate: DispatchCandidate,
feedback: DispatchFeedback,
now: Instant,
) -> DispatchAction {
match feedback {
DispatchFeedback::Routed => {
if let Some(flow) = self.flows.get_mut(&conn_id) {
flow.fairness.last_drain_at = Some(now);
flow.fairness.recent_drain_bytes = flow
.fairness
.recent_drain_bytes
.saturating_add(candidate.frame.queued_bytes());
flow.fairness.consecutive_stalls = 0;
if flow.fairness.scheduler_state != FlowSchedulerState::Idle {
flow.fairness.scheduler_state = FlowSchedulerState::Active;
}
}
self.evaluate_pressure(now, false);
DispatchAction::Continue
}
DispatchFeedback::QueueFull => {
self.pressure.note_route_stall(now, &self.config.pressure);
self.downstream_stalls = self.downstream_stalls.saturating_add(1);
let Some(flow) = self.flows.get_mut(&conn_id) else {
self.evaluate_pressure(now, true);
return DispatchAction::Continue;
};
flow.fairness.consecutive_stalls =
flow.fairness.consecutive_stalls.saturating_add(1);
flow.fairness.scheduler_state = FlowSchedulerState::Backpressured;
flow.fairness.pressure_class = FlowPressureClass::Backpressured;
let state = self.pressure.state();
let should_shed_frame = matches!(state, PressureState::Saturated)
|| (matches!(state, PressureState::Shedding)
&& flow.fairness.standing_state == StandingQueueState::Standing
&& flow.fairness.consecutive_stalls
>= self.config.max_consecutive_stalls_before_shed);
if should_shed_frame {
self.shed_drops = self.shed_drops.saturating_add(1);
self.fairness_penalties = self.fairness_penalties.saturating_add(1);
} else {
let frame_bytes = candidate.frame.queued_bytes();
flow.queue.push_front(candidate.frame);
flow.fairness.pending_bytes =
flow.fairness.pending_bytes.saturating_add(frame_bytes);
flow.fairness.queue_started_at =
flow.queue.front().map(|front| front.enqueued_at);
self.total_queued_bytes = self.total_queued_bytes.saturating_add(frame_bytes);
self.bucket_queued_bytes[flow.fairness.bucket_id] = self.bucket_queued_bytes
[flow.fairness.bucket_id]
.saturating_add(frame_bytes);
if !flow.fairness.in_active_ring {
flow.fairness.in_active_ring = true;
self.active_ring.push_back(conn_id);
}
}
if flow.fairness.consecutive_stalls
>= self.config.max_consecutive_stalls_before_close
&& self.pressure.state() == PressureState::Saturated
{
self.remove_flow(conn_id);
self.evaluate_pressure(now, true);
return DispatchAction::CloseFlow;
}
self.evaluate_pressure(now, true);
DispatchAction::Continue
}
DispatchFeedback::ChannelClosed | DispatchFeedback::NoConn => {
self.remove_flow(conn_id);
self.evaluate_pressure(now, true);
DispatchAction::CloseFlow
}
}
}
pub(crate) fn remove_flow(&mut self, conn_id: u64) {
let Some(entry) = self.flows.remove(&conn_id) else {
return;
};
self.bucket_active_flows[entry.fairness.bucket_id] =
self.bucket_active_flows[entry.fairness.bucket_id].saturating_sub(1);
let mut reclaimed = 0u64;
for frame in entry.queue {
reclaimed = reclaimed.saturating_add(frame.queued_bytes());
}
self.total_queued_bytes = self.total_queued_bytes.saturating_sub(reclaimed);
self.bucket_queued_bytes[entry.fairness.bucket_id] =
self.bucket_queued_bytes[entry.fairness.bucket_id].saturating_sub(reclaimed);
}
fn evaluate_pressure(&mut self, now: Instant, force: bool) {
let mut standing = 0usize;
let mut backpressured = 0usize;
for flow in self.flows.values_mut() {
Self::classify_flow(&self.config, self.pressure.state(), now, &mut flow.fairness);
if flow.fairness.standing_state == StandingQueueState::Standing {
standing = standing.saturating_add(1);
}
if matches!(
flow.fairness.scheduler_state,
FlowSchedulerState::Backpressured
| FlowSchedulerState::Penalized
| FlowSchedulerState::SheddingCandidate
) {
backpressured = backpressured.saturating_add(1);
}
}
self.standing_flow_count = standing;
self.backpressured_flow_count = backpressured;
let _ = self.pressure.maybe_evaluate(
now,
&self.config.pressure,
self.config.max_total_queued_bytes,
PressureSignals {
active_flows: self.flows.len(),
total_queued_bytes: self.total_queued_bytes,
standing_flows: standing,
backpressured_flows: backpressured,
},
force,
);
}
fn classify_flow(
config: &WorkerFairnessConfig,
pressure_state: PressureState,
now: Instant,
fairness: &mut FlowFairnessState,
) {
if fairness.pending_bytes == 0 {
fairness.pressure_class = FlowPressureClass::Healthy;
fairness.standing_state = StandingQueueState::Transient;
fairness.scheduler_state = FlowSchedulerState::Idle;
fairness.penalty_score = fairness.penalty_score.saturating_sub(1);
return;
}
let queue_age = fairness
.queue_started_at
.map(|ts| now.saturating_duration_since(ts))
.unwrap_or_default();
let drain_stalled = fairness
.last_drain_at
.map(|ts| now.saturating_duration_since(ts) >= config.standing_queue_min_age)
.unwrap_or(true);
let standing = fairness.pending_bytes >= config.standing_queue_min_backlog_bytes
&& queue_age >= config.standing_queue_min_age
&& (fairness.consecutive_stalls >= config.standing_stall_threshold || drain_stalled);
if standing {
fairness.standing_state = StandingQueueState::Standing;
fairness.pressure_class = FlowPressureClass::Standing;
fairness.penalty_score = fairness.penalty_score.saturating_add(1);
fairness.scheduler_state = if pressure_state >= PressureState::Shedding {
FlowSchedulerState::SheddingCandidate
} else {
FlowSchedulerState::Penalized
};
return;
}
fairness.standing_state = StandingQueueState::Transient;
if fairness.consecutive_stalls > 0 {
fairness.pressure_class = FlowPressureClass::Backpressured;
fairness.scheduler_state = FlowSchedulerState::Backpressured;
} else if fairness.pending_bytes >= config.standing_queue_min_backlog_bytes {
fairness.pressure_class = FlowPressureClass::Bursty;
fairness.scheduler_state = FlowSchedulerState::Active;
} else {
fairness.pressure_class = FlowPressureClass::Healthy;
fairness.scheduler_state = FlowSchedulerState::Active;
}
fairness.penalty_score = fairness.penalty_score.saturating_sub(1);
}
fn effective_quantum_bytes(
config: &WorkerFairnessConfig,
pressure_state: PressureState,
fairness: &FlowFairnessState,
) -> u32 {
let penalized = matches!(
fairness.scheduler_state,
FlowSchedulerState::Penalized | FlowSchedulerState::SheddingCandidate
);
if penalized {
return config.penalized_quantum_bytes.max(1);
}
match pressure_state {
PressureState::Normal => config.base_quantum_bytes.max(1),
PressureState::Pressured => config.pressured_quantum_bytes.max(1),
PressureState::Shedding => config.pressured_quantum_bytes.max(1),
PressureState::Saturated => config.penalized_quantum_bytes.max(1),
}
}
fn bucket_for(&self, conn_id: u64) -> usize {
(conn_id as usize) % self.bucket_queued_bytes.len().max(1)
}
}

View File

@@ -2,6 +2,10 @@
mod codec;
mod config_updater;
mod fairness;
#[cfg(test)]
#[path = "tests/fairness_security_tests.rs"]
mod fairness_security_tests;
mod handshake;
mod health;
#[cfg(test)]

View File

@@ -20,11 +20,15 @@ use crate::protocol::constants::*;
use crate::stats::Stats;
use super::codec::{RpcChecksumMode, WriterCommand, rpc_crc};
use super::fairness::{
AdmissionDecision, DispatchAction, DispatchFeedback, SchedulerDecision, WorkerFairnessConfig,
WorkerFairnessSnapshot, WorkerFairnessState,
};
use super::registry::RouteResult;
use super::{ConnRegistry, MeResponse};
const DATA_ROUTE_MAX_ATTEMPTS: usize = 3;
const DATA_ROUTE_QUEUE_FULL_STARVATION_THRESHOLD: u8 = 3;
const FAIRNESS_DRAIN_BUDGET_PER_LOOP: usize = 128;
fn should_close_on_route_result_for_data(result: RouteResult) -> bool {
matches!(result, RouteResult::NoConn | RouteResult::ChannelClosed)
@@ -77,6 +81,118 @@ async fn route_data_with_retry(
}
}
#[inline]
fn route_feedback(result: RouteResult) -> DispatchFeedback {
match result {
RouteResult::Routed => DispatchFeedback::Routed,
RouteResult::NoConn => DispatchFeedback::NoConn,
RouteResult::ChannelClosed => DispatchFeedback::ChannelClosed,
RouteResult::QueueFullBase | RouteResult::QueueFullHigh => DispatchFeedback::QueueFull,
}
}
fn report_route_drop(result: RouteResult, stats: &Stats) {
match result {
RouteResult::NoConn => stats.increment_me_route_drop_no_conn(),
RouteResult::ChannelClosed => stats.increment_me_route_drop_channel_closed(),
RouteResult::QueueFullBase => {
stats.increment_me_route_drop_queue_full();
stats.increment_me_route_drop_queue_full_base();
}
RouteResult::QueueFullHigh => {
stats.increment_me_route_drop_queue_full();
stats.increment_me_route_drop_queue_full_high();
}
RouteResult::Routed => {}
}
}
fn apply_fairness_metrics_delta(
stats: &Stats,
prev: &mut WorkerFairnessSnapshot,
current: WorkerFairnessSnapshot,
) {
stats.set_me_fair_active_flows_gauge(current.active_flows as u64);
stats.set_me_fair_queued_bytes_gauge(current.total_queued_bytes);
stats.set_me_fair_standing_flows_gauge(current.standing_flows as u64);
stats.set_me_fair_backpressured_flows_gauge(current.backpressured_flows as u64);
stats.set_me_fair_pressure_state_gauge(current.pressure_state.as_u8() as u64);
stats.add_me_fair_scheduler_rounds_total(
current
.scheduler_rounds
.saturating_sub(prev.scheduler_rounds),
);
stats.add_me_fair_deficit_grants_total(
current.deficit_grants.saturating_sub(prev.deficit_grants),
);
stats.add_me_fair_deficit_skips_total(current.deficit_skips.saturating_sub(prev.deficit_skips));
stats.add_me_fair_enqueue_rejects_total(
current.enqueue_rejects.saturating_sub(prev.enqueue_rejects),
);
stats.add_me_fair_shed_drops_total(current.shed_drops.saturating_sub(prev.shed_drops));
stats.add_me_fair_penalties_total(
current
.fairness_penalties
.saturating_sub(prev.fairness_penalties),
);
stats.add_me_fair_downstream_stalls_total(
current
.downstream_stalls
.saturating_sub(prev.downstream_stalls),
);
*prev = current;
}
async fn drain_fairness_scheduler(
fairness: &mut WorkerFairnessState,
reg: &ConnRegistry,
tx: &mpsc::Sender<WriterCommand>,
data_route_queue_full_streak: &mut HashMap<u64, u8>,
route_wait_ms: u64,
stats: &Stats,
) {
for _ in 0..FAIRNESS_DRAIN_BUDGET_PER_LOOP {
let now = Instant::now();
let SchedulerDecision::Dispatch(candidate) = fairness.next_decision(now) else {
break;
};
let cid = candidate.frame.conn_id;
let _pressure_state = candidate.pressure_state;
let _flow_class = candidate.flow_class;
let routed = route_data_with_retry(
reg,
cid,
candidate.frame.flags,
candidate.frame.data.clone(),
route_wait_ms,
)
.await;
if matches!(routed, RouteResult::Routed) {
data_route_queue_full_streak.remove(&cid);
} else {
report_route_drop(routed, stats);
}
let action = fairness.apply_dispatch_feedback(cid, candidate, route_feedback(routed), now);
if is_data_route_queue_full(routed) {
let streak = data_route_queue_full_streak.entry(cid).or_insert(0);
*streak = streak.saturating_add(1);
if should_close_on_queue_full_streak(*streak) {
fairness.remove_flow(cid);
data_route_queue_full_streak.remove(&cid);
reg.unregister(cid).await;
send_close_conn(tx, cid).await;
continue;
}
}
if action == DispatchAction::CloseFlow || should_close_on_route_result_for_data(routed) {
fairness.remove_flow(cid);
data_route_queue_full_streak.remove(&cid);
reg.unregister(cid).await;
send_close_conn(tx, cid).await;
}
}
}
pub(crate) async fn reader_loop(
mut rd: tokio::io::ReadHalf<TcpStream>,
dk: [u8; 32],
@@ -98,7 +214,21 @@ pub(crate) async fn reader_loop(
let mut raw = enc_leftover;
let mut expected_seq: i32 = 0;
let mut data_route_queue_full_streak = HashMap::<u64, u8>::new();
let mut fairness = WorkerFairnessState::new(
WorkerFairnessConfig {
worker_id: (writer_id as u16).saturating_add(1),
max_active_flows: reg.route_channel_capacity().saturating_mul(4).max(256),
max_total_queued_bytes: (reg.route_channel_capacity() as u64)
.saturating_mul(16 * 1024)
.max(4 * 1024 * 1024),
max_flow_queued_bytes: (reg.route_channel_capacity() as u64)
.saturating_mul(2 * 1024)
.clamp(64 * 1024, 2 * 1024 * 1024),
..WorkerFairnessConfig::default()
},
Instant::now(),
);
let mut fairness_snapshot = fairness.snapshot();
loop {
let mut tmp = [0u8; 65_536];
let n = tokio::select! {
@@ -181,36 +311,20 @@ pub(crate) async fn reader_loop(
let data = body.slice(12..);
trace!(cid, flags, len = data.len(), "RPC_PROXY_ANS");
let route_wait_ms = reader_route_data_wait_ms.load(Ordering::Relaxed);
let routed =
route_data_with_retry(reg.as_ref(), cid, flags, data, route_wait_ms).await;
if matches!(routed, RouteResult::Routed) {
data_route_queue_full_streak.remove(&cid);
continue;
}
match routed {
RouteResult::NoConn => stats.increment_me_route_drop_no_conn(),
RouteResult::ChannelClosed => stats.increment_me_route_drop_channel_closed(),
RouteResult::QueueFullBase => {
stats.increment_me_route_drop_queue_full();
stats.increment_me_route_drop_queue_full_base();
}
RouteResult::QueueFullHigh => {
stats.increment_me_route_drop_queue_full();
stats.increment_me_route_drop_queue_full_high();
}
RouteResult::Routed => {}
}
if should_close_on_route_result_for_data(routed) {
data_route_queue_full_streak.remove(&cid);
reg.unregister(cid).await;
send_close_conn(&tx, cid).await;
continue;
}
if is_data_route_queue_full(routed) {
let admission = fairness.enqueue_data(cid, flags, data, Instant::now());
if !matches!(admission, AdmissionDecision::Admit) {
stats.increment_me_route_drop_queue_full();
stats.increment_me_route_drop_queue_full_high();
let streak = data_route_queue_full_streak.entry(cid).or_insert(0);
*streak = streak.saturating_add(1);
if should_close_on_queue_full_streak(*streak) {
if should_close_on_queue_full_streak(*streak)
|| matches!(
admission,
AdmissionDecision::RejectSaturated
| AdmissionDecision::RejectStandingFlow
)
{
fairness.remove_flow(cid);
data_route_queue_full_streak.remove(&cid);
reg.unregister(cid).await;
send_close_conn(&tx, cid).await;
@@ -249,12 +363,14 @@ pub(crate) async fn reader_loop(
let _ = reg.route_nowait(cid, MeResponse::Close).await;
reg.unregister(cid).await;
data_route_queue_full_streak.remove(&cid);
fairness.remove_flow(cid);
} else if pt == RPC_CLOSE_CONN_U32 && body.len() >= 8 {
let cid = u64::from_le_bytes(body[0..8].try_into().unwrap());
debug!(cid, "RPC_CLOSE_CONN from ME");
let _ = reg.route_nowait(cid, MeResponse::Close).await;
reg.unregister(cid).await;
data_route_queue_full_streak.remove(&cid);
fairness.remove_flow(cid);
} else if pt == RPC_PING_U32 && body.len() >= 8 {
let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap());
trace!(ping_id, "RPC_PING -> RPC_PONG");
@@ -310,6 +426,19 @@ pub(crate) async fn reader_loop(
"Unknown RPC"
);
}
let route_wait_ms = reader_route_data_wait_ms.load(Ordering::Relaxed);
drain_fairness_scheduler(
&mut fairness,
reg.as_ref(),
&tx,
&mut data_route_queue_full_streak,
route_wait_ms,
stats.as_ref(),
)
.await;
let current_snapshot = fairness.snapshot();
apply_fairness_metrics_delta(stats.as_ref(), &mut fairness_snapshot, current_snapshot);
}
}
}

View File

@@ -140,6 +140,10 @@ impl ConnRegistry {
}
}
pub fn route_channel_capacity(&self) -> usize {
self.route_channel_capacity
}
#[cfg(test)]
pub fn new() -> Self {
Self::with_route_channel_capacity(4096)

View File

@@ -0,0 +1,185 @@
use std::time::{Duration, Instant};
use bytes::Bytes;
use crate::transport::middle_proxy::fairness::{
AdmissionDecision, DispatchAction, DispatchFeedback, PressureState, SchedulerDecision,
WorkerFairnessConfig, WorkerFairnessState,
};
fn enqueue_payload(size: usize) -> Bytes {
Bytes::from(vec![0xAB; size])
}
#[test]
fn fairness_rejects_when_worker_budget_is_exhausted() {
let now = Instant::now();
let mut fairness = WorkerFairnessState::new(
WorkerFairnessConfig {
max_total_queued_bytes: 1024,
max_flow_queued_bytes: 1024,
..WorkerFairnessConfig::default()
},
now,
);
assert_eq!(
fairness.enqueue_data(1, 0, enqueue_payload(700), now),
AdmissionDecision::Admit
);
assert_eq!(
fairness.enqueue_data(2, 0, enqueue_payload(400), now),
AdmissionDecision::RejectWorkerCap
);
let snapshot = fairness.snapshot();
assert!(snapshot.total_queued_bytes <= 1024);
assert_eq!(snapshot.enqueue_rejects, 1);
}
#[test]
fn fairness_marks_standing_queue_after_stall_and_age_threshold() {
let mut now = Instant::now();
let mut fairness = WorkerFairnessState::new(
WorkerFairnessConfig {
standing_queue_min_age: Duration::from_millis(50),
standing_queue_min_backlog_bytes: 256,
standing_stall_threshold: 1,
max_flow_queued_bytes: 4096,
max_total_queued_bytes: 4096,
..WorkerFairnessConfig::default()
},
now,
);
assert_eq!(
fairness.enqueue_data(11, 0, enqueue_payload(512), now),
AdmissionDecision::Admit
);
now += Duration::from_millis(100);
let SchedulerDecision::Dispatch(candidate) = fairness.next_decision(now) else {
panic!("expected dispatch candidate");
};
let action = fairness.apply_dispatch_feedback(11, candidate, DispatchFeedback::QueueFull, now);
assert!(matches!(action, DispatchAction::Continue));
let snapshot = fairness.snapshot();
assert_eq!(snapshot.standing_flows, 1);
assert!(snapshot.backpressured_flows >= 1);
}
#[test]
fn fairness_keeps_fast_flow_progress_under_slow_neighbor() {
let mut now = Instant::now();
let mut fairness = WorkerFairnessState::new(
WorkerFairnessConfig {
max_total_queued_bytes: 64 * 1024,
max_flow_queued_bytes: 32 * 1024,
..WorkerFairnessConfig::default()
},
now,
);
for _ in 0..16 {
assert_eq!(
fairness.enqueue_data(1, 0, enqueue_payload(512), now),
AdmissionDecision::Admit
);
assert_eq!(
fairness.enqueue_data(2, 0, enqueue_payload(512), now),
AdmissionDecision::Admit
);
}
let mut fast_routed = 0u64;
for _ in 0..128 {
now += Duration::from_millis(5);
let SchedulerDecision::Dispatch(candidate) = fairness.next_decision(now) else {
break;
};
let cid = candidate.frame.conn_id;
let feedback = if cid == 2 {
DispatchFeedback::QueueFull
} else {
fast_routed = fast_routed.saturating_add(1);
DispatchFeedback::Routed
};
let _ = fairness.apply_dispatch_feedback(cid, candidate, feedback, now);
}
let snapshot = fairness.snapshot();
assert!(fast_routed > 0, "fast flow must continue making progress");
assert!(snapshot.total_queued_bytes <= 64 * 1024);
}
#[test]
fn fairness_pressure_hysteresis_prevents_instant_flapping() {
let mut now = Instant::now();
let mut cfg = WorkerFairnessConfig::default();
cfg.max_total_queued_bytes = 4096;
cfg.max_flow_queued_bytes = 4096;
cfg.pressure.evaluate_every_rounds = 1;
cfg.pressure.transition_hysteresis_rounds = 3;
cfg.pressure.queue_ratio_pressured_pct = 40;
cfg.pressure.queue_ratio_shedding_pct = 60;
cfg.pressure.queue_ratio_saturated_pct = 80;
let mut fairness = WorkerFairnessState::new(cfg, now);
for _ in 0..4 {
assert_eq!(
fairness.enqueue_data(9, 0, enqueue_payload(900), now),
AdmissionDecision::Admit
);
}
for _ in 0..2 {
now += Duration::from_millis(1);
let _ = fairness.next_decision(now);
}
assert_eq!(
fairness.pressure_state(),
PressureState::Normal,
"state must not flip before hysteresis confirmations"
);
}
#[test]
fn fairness_randomized_sequence_preserves_memory_bounds() {
let mut now = Instant::now();
let mut fairness = WorkerFairnessState::new(
WorkerFairnessConfig {
max_total_queued_bytes: 32 * 1024,
max_flow_queued_bytes: 4 * 1024,
..WorkerFairnessConfig::default()
},
now,
);
let mut seed = 0xC0FFEE_u64;
for _ in 0..4096 {
seed ^= seed << 7;
seed ^= seed >> 9;
seed ^= seed << 8;
let flow = (seed % 32) + 1;
let size = ((seed >> 8) % 512 + 64) as usize;
let _ = fairness.enqueue_data(flow, 0, enqueue_payload(size), now);
now += Duration::from_millis(1);
if let SchedulerDecision::Dispatch(candidate) = fairness.next_decision(now) {
let feedback = if seed & 0x1 == 0 {
DispatchFeedback::Routed
} else {
DispatchFeedback::QueueFull
};
let _ =
fairness.apply_dispatch_feedback(candidate.frame.conn_id, candidate, feedback, now);
}
let snapshot = fairness.snapshot();
assert!(snapshot.total_queued_bytes <= 32 * 1024);
}
}

View File

@@ -279,6 +279,12 @@ pub struct UpstreamApiSummarySnapshot {
pub shadowsocks_total: usize,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct UpstreamApiHealthSummary {
pub configured_total: usize,
pub healthy_total: usize,
}
#[derive(Debug, Clone)]
pub struct UpstreamApiSnapshot {
pub summary: UpstreamApiSummarySnapshot,
@@ -444,6 +450,20 @@ impl UpstreamManager {
Some(UpstreamApiSnapshot { summary, upstreams })
}
pub async fn api_health_summary(&self) -> UpstreamApiHealthSummary {
let guard = self.upstreams.read().await;
let mut summary = UpstreamApiHealthSummary {
configured_total: guard.len(),
healthy_total: 0,
};
for upstream in guard.iter() {
if upstream.healthy {
summary.healthy_total += 1;
}
}
summary
}
fn describe_upstream(upstream_type: &UpstreamType) -> (UpstreamRouteKind, String) {
match upstream_type {
UpstreamType::Direct { .. } => (UpstreamRouteKind::Direct, "direct".to_string()),