mirror of
https://github.com/telemt/telemt.git
synced 2026-05-22 19:51:43 +03:00
Compare commits
6 Commits
3ca3e8ff0e
...
37b6f7b985
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
37b6f7b985 | ||
|
|
50e9e5cf32 | ||
|
|
1b25bada29 | ||
|
|
bde30eaf05 | ||
|
|
b447f60a72 | ||
|
|
093faed0c2 |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -2780,7 +2780,7 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
|
||||
|
||||
[[package]]
|
||||
name = "telemt"
|
||||
version = "3.4.2"
|
||||
version = "3.4.3"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"anyhow",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "telemt"
|
||||
version = "3.4.2"
|
||||
version = "3.4.3"
|
||||
edition = "2024"
|
||||
|
||||
[features]
|
||||
|
||||
26
Dockerfile
26
Dockerfile
@@ -82,6 +82,32 @@ HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 CMD ["/ap
|
||||
ENTRYPOINT ["/app/telemt"]
|
||||
CMD ["config.toml"]
|
||||
|
||||
# ==========================
|
||||
# Production Netfilter Profile
|
||||
# ==========================
|
||||
FROM debian:12-slim AS prod-netfilter
|
||||
|
||||
RUN set -eux; \
|
||||
apt-get update; \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
conntrack \
|
||||
nftables \
|
||||
iptables; \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=minimal /telemt /app/telemt
|
||||
COPY config.toml /app/config.toml
|
||||
|
||||
EXPOSE 443 9090 9091
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 CMD ["/app/telemt", "healthcheck", "/app/config.toml", "--mode", "liveness"]
|
||||
|
||||
ENTRYPOINT ["/app/telemt"]
|
||||
CMD ["config.toml"]
|
||||
|
||||
# ==========================
|
||||
# Production Distroless on MUSL
|
||||
# ==========================
|
||||
|
||||
10
docker-compose.host-netfilter.yml
Normal file
10
docker-compose.host-netfilter.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
services:
|
||||
telemt:
|
||||
build:
|
||||
context: .
|
||||
target: prod-netfilter
|
||||
network_mode: host
|
||||
ports: []
|
||||
cap_add:
|
||||
- NET_BIND_SERVICE
|
||||
- NET_ADMIN
|
||||
8
docker-compose.netfilter.yml
Normal file
8
docker-compose.netfilter.yml
Normal file
@@ -0,0 +1,8 @@
|
||||
services:
|
||||
telemt:
|
||||
build:
|
||||
context: .
|
||||
target: prod-netfilter
|
||||
cap_add:
|
||||
- NET_BIND_SERVICE
|
||||
- NET_ADMIN
|
||||
@@ -1,7 +1,9 @@
|
||||
services:
|
||||
telemt:
|
||||
image: ghcr.io/telemt/telemt:latest
|
||||
build: .
|
||||
build:
|
||||
context: .
|
||||
target: prod
|
||||
container_name: telemt
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
@@ -28,7 +30,6 @@ services:
|
||||
- ALL
|
||||
cap_add:
|
||||
- NET_BIND_SERVICE
|
||||
- NET_ADMIN
|
||||
read_only: true
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
@@ -41,8 +41,8 @@ use config_store::{current_revision, load_config_from_disk, parse_if_match};
|
||||
use events::ApiEventStore;
|
||||
use http_utils::{error_response, read_json, read_optional_json, success_response};
|
||||
use model::{
|
||||
ApiFailure, CreateUserRequest, DeleteUserResponse, HealthData, HealthReadyData, PatchUserRequest,
|
||||
RotateSecretRequest, SummaryData, UserActiveIps,
|
||||
ApiFailure, CreateUserRequest, DeleteUserResponse, HealthData, HealthReadyData,
|
||||
PatchUserRequest, RotateSecretRequest, SummaryData, UserActiveIps,
|
||||
};
|
||||
use runtime_edge::{
|
||||
EdgeConnectionsCacheEntry, build_runtime_connections_summary_data,
|
||||
|
||||
@@ -343,6 +343,10 @@ impl ProxyConfig {
|
||||
let network_table = parsed_toml
|
||||
.get("network")
|
||||
.and_then(|value| value.as_table());
|
||||
let server_table = parsed_toml.get("server").and_then(|value| value.as_table());
|
||||
let conntrack_control_table = server_table
|
||||
.and_then(|table| table.get("conntrack_control"))
|
||||
.and_then(|value| value.as_table());
|
||||
let update_every_is_explicit = general_table
|
||||
.map(|table| table.contains_key("update_every"))
|
||||
.unwrap_or(false);
|
||||
@@ -372,10 +376,17 @@ impl ProxyConfig {
|
||||
let stun_servers_is_explicit = network_table
|
||||
.map(|table| table.contains_key("stun_servers"))
|
||||
.unwrap_or(false);
|
||||
let inline_conntrack_control_is_explicit = conntrack_control_table
|
||||
.map(|table| table.contains_key("inline_conntrack_control"))
|
||||
.unwrap_or(false);
|
||||
|
||||
let mut config: ProxyConfig = parsed_toml
|
||||
.try_into()
|
||||
.map_err(|e| ProxyError::Config(e.to_string()))?;
|
||||
config
|
||||
.server
|
||||
.conntrack_control
|
||||
.inline_conntrack_control_explicit = inline_conntrack_control_is_explicit;
|
||||
|
||||
if !update_every_is_explicit && (legacy_secret_is_explicit || legacy_config_is_explicit) {
|
||||
config.general.update_every = None;
|
||||
@@ -1881,6 +1892,43 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conntrack_inline_explicit_flag_is_false_when_omitted() {
|
||||
let cfg = load_config_from_temp_toml(
|
||||
r#"
|
||||
[general]
|
||||
[network]
|
||||
[server]
|
||||
[server.conntrack_control]
|
||||
[access]
|
||||
"#,
|
||||
);
|
||||
assert!(
|
||||
!cfg.server
|
||||
.conntrack_control
|
||||
.inline_conntrack_control_explicit
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conntrack_inline_explicit_flag_is_true_when_present() {
|
||||
let cfg = load_config_from_temp_toml(
|
||||
r#"
|
||||
[general]
|
||||
[network]
|
||||
[server]
|
||||
[server.conntrack_control]
|
||||
inline_conntrack_control = true
|
||||
[access]
|
||||
"#,
|
||||
);
|
||||
assert!(
|
||||
cfg.server
|
||||
.conntrack_control
|
||||
.inline_conntrack_control_explicit
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_sni_action_parses_and_defaults_to_drop() {
|
||||
let cfg_default: ProxyConfig = toml::from_str(
|
||||
|
||||
@@ -1329,6 +1329,10 @@ pub struct ConntrackControlConfig {
|
||||
#[serde(default = "default_conntrack_control_enabled")]
|
||||
pub inline_conntrack_control: bool,
|
||||
|
||||
/// Tracks whether inline_conntrack_control was explicitly set in config.
|
||||
#[serde(skip)]
|
||||
pub inline_conntrack_control_explicit: bool,
|
||||
|
||||
/// Conntrack mode for listener ingress traffic.
|
||||
#[serde(default)]
|
||||
pub mode: ConntrackMode,
|
||||
@@ -1363,6 +1367,7 @@ impl Default for ConntrackControlConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inline_conntrack_control: default_conntrack_control_enabled(),
|
||||
inline_conntrack_control_explicit: false,
|
||||
mode: ConntrackMode::default(),
|
||||
backend: ConntrackBackend::default(),
|
||||
profile: ConntrackPressureProfile::default(),
|
||||
|
||||
@@ -24,6 +24,13 @@ enum NetfilterBackend {
|
||||
Iptables,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct ConntrackRuntimeSupport {
|
||||
netfilter_backend: Option<NetfilterBackend>,
|
||||
has_cap_net_admin: bool,
|
||||
has_conntrack_binary: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct PressureSample {
|
||||
conn_pct: Option<u8>,
|
||||
@@ -56,11 +63,8 @@ pub(crate) fn spawn_conntrack_controller(
|
||||
shared: Arc<ProxySharedState>,
|
||||
) {
|
||||
if !cfg!(target_os = "linux") {
|
||||
let enabled = config_rx
|
||||
.borrow()
|
||||
.server
|
||||
.conntrack_control
|
||||
.inline_conntrack_control;
|
||||
let cfg = config_rx.borrow();
|
||||
let enabled = cfg.server.conntrack_control.inline_conntrack_control;
|
||||
stats.set_conntrack_control_enabled(enabled);
|
||||
stats.set_conntrack_control_available(false);
|
||||
stats.set_conntrack_pressure_active(false);
|
||||
@@ -68,9 +72,14 @@ pub(crate) fn spawn_conntrack_controller(
|
||||
stats.set_conntrack_rule_apply_ok(false);
|
||||
shared.disable_conntrack_close_sender();
|
||||
shared.set_conntrack_pressure_active(false);
|
||||
if enabled {
|
||||
if enabled
|
||||
&& cfg
|
||||
.server
|
||||
.conntrack_control
|
||||
.inline_conntrack_control_explicit
|
||||
{
|
||||
warn!(
|
||||
"conntrack control is configured but unsupported on this OS; disabling runtime worker"
|
||||
"conntrack control explicitly enabled but unsupported on this OS; disabling runtime worker"
|
||||
);
|
||||
}
|
||||
return;
|
||||
@@ -92,16 +101,17 @@ async fn run_conntrack_controller(
|
||||
let mut cfg = config_rx.borrow().clone();
|
||||
let mut pressure_state = PressureState::new(stats.as_ref());
|
||||
let mut delete_budget_tokens = cfg.server.conntrack_control.delete_budget_per_sec;
|
||||
let mut backend = pick_backend(cfg.server.conntrack_control.backend);
|
||||
let mut runtime_support = probe_runtime_support(cfg.server.conntrack_control.backend);
|
||||
let mut effective_enabled = effective_conntrack_enabled(&cfg, runtime_support);
|
||||
|
||||
apply_runtime_state(
|
||||
stats.as_ref(),
|
||||
shared.as_ref(),
|
||||
&cfg,
|
||||
backend.is_some(),
|
||||
runtime_support,
|
||||
false,
|
||||
);
|
||||
reconcile_rules(&cfg, backend, stats.as_ref()).await;
|
||||
reconcile_rules(&cfg, runtime_support, stats.as_ref()).await;
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
@@ -110,17 +120,18 @@ async fn run_conntrack_controller(
|
||||
break;
|
||||
}
|
||||
cfg = config_rx.borrow_and_update().clone();
|
||||
backend = pick_backend(cfg.server.conntrack_control.backend);
|
||||
runtime_support = probe_runtime_support(cfg.server.conntrack_control.backend);
|
||||
effective_enabled = effective_conntrack_enabled(&cfg, runtime_support);
|
||||
delete_budget_tokens = cfg.server.conntrack_control.delete_budget_per_sec;
|
||||
apply_runtime_state(stats.as_ref(), shared.as_ref(), &cfg, backend.is_some(), pressure_state.active);
|
||||
reconcile_rules(&cfg, backend, stats.as_ref()).await;
|
||||
apply_runtime_state(stats.as_ref(), shared.as_ref(), &cfg, runtime_support, pressure_state.active);
|
||||
reconcile_rules(&cfg, runtime_support, stats.as_ref()).await;
|
||||
}
|
||||
event = close_rx.recv() => {
|
||||
let Some(event) = event else {
|
||||
break;
|
||||
};
|
||||
stats.set_conntrack_event_queue_depth(close_rx.len() as u64);
|
||||
if !cfg.server.conntrack_control.inline_conntrack_control {
|
||||
if !effective_enabled {
|
||||
continue;
|
||||
}
|
||||
if !pressure_state.active {
|
||||
@@ -156,6 +167,7 @@ async fn run_conntrack_controller(
|
||||
stats.as_ref(),
|
||||
shared.as_ref(),
|
||||
&cfg,
|
||||
effective_enabled,
|
||||
&sample,
|
||||
&mut pressure_state,
|
||||
);
|
||||
@@ -175,20 +187,30 @@ fn apply_runtime_state(
|
||||
stats: &Stats,
|
||||
shared: &ProxySharedState,
|
||||
cfg: &ProxyConfig,
|
||||
backend_available: bool,
|
||||
runtime_support: ConntrackRuntimeSupport,
|
||||
pressure_active: bool,
|
||||
) {
|
||||
let enabled = cfg.server.conntrack_control.inline_conntrack_control;
|
||||
let available = enabled && backend_available && has_cap_net_admin();
|
||||
if enabled && !available {
|
||||
let available = effective_conntrack_enabled(cfg, runtime_support);
|
||||
if enabled
|
||||
&& !available
|
||||
&& cfg
|
||||
.server
|
||||
.conntrack_control
|
||||
.inline_conntrack_control_explicit
|
||||
{
|
||||
warn!(
|
||||
"conntrack control enabled but unavailable (missing CAP_NET_ADMIN or backend binaries)"
|
||||
has_cap_net_admin = runtime_support.has_cap_net_admin,
|
||||
backend_available = runtime_support.netfilter_backend.is_some(),
|
||||
conntrack_binary_available = runtime_support.has_conntrack_binary,
|
||||
configured_backend = ?cfg.server.conntrack_control.backend,
|
||||
"conntrack control explicitly enabled but unavailable; disabling runtime features"
|
||||
);
|
||||
}
|
||||
stats.set_conntrack_control_enabled(enabled);
|
||||
stats.set_conntrack_control_available(available);
|
||||
shared.set_conntrack_pressure_active(enabled && pressure_active);
|
||||
stats.set_conntrack_pressure_active(enabled && pressure_active);
|
||||
shared.set_conntrack_pressure_active(available && pressure_active);
|
||||
stats.set_conntrack_pressure_active(available && pressure_active);
|
||||
}
|
||||
|
||||
fn collect_pressure_sample(
|
||||
@@ -228,10 +250,11 @@ fn update_pressure_state(
|
||||
stats: &Stats,
|
||||
shared: &ProxySharedState,
|
||||
cfg: &ProxyConfig,
|
||||
effective_enabled: bool,
|
||||
sample: &PressureSample,
|
||||
state: &mut PressureState,
|
||||
) {
|
||||
if !cfg.server.conntrack_control.inline_conntrack_control {
|
||||
if !effective_enabled {
|
||||
if state.active {
|
||||
state.active = false;
|
||||
state.low_streak = 0;
|
||||
@@ -285,22 +308,26 @@ fn update_pressure_state(
|
||||
state.low_streak = 0;
|
||||
}
|
||||
|
||||
async fn reconcile_rules(cfg: &ProxyConfig, backend: Option<NetfilterBackend>, stats: &Stats) {
|
||||
async fn reconcile_rules(
|
||||
cfg: &ProxyConfig,
|
||||
runtime_support: ConntrackRuntimeSupport,
|
||||
stats: &Stats,
|
||||
) {
|
||||
if !cfg.server.conntrack_control.inline_conntrack_control {
|
||||
clear_notrack_rules_all_backends().await;
|
||||
stats.set_conntrack_rule_apply_ok(true);
|
||||
return;
|
||||
}
|
||||
|
||||
if !has_cap_net_admin() {
|
||||
if !effective_conntrack_enabled(cfg, runtime_support) {
|
||||
clear_notrack_rules_all_backends().await;
|
||||
stats.set_conntrack_rule_apply_ok(false);
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(backend) = backend else {
|
||||
stats.set_conntrack_rule_apply_ok(false);
|
||||
return;
|
||||
};
|
||||
let backend = runtime_support
|
||||
.netfilter_backend
|
||||
.expect("netfilter backend must be available for effective conntrack control");
|
||||
|
||||
let apply_result = match backend {
|
||||
NetfilterBackend::Nftables => apply_nft_rules(cfg).await,
|
||||
@@ -315,6 +342,24 @@ async fn reconcile_rules(cfg: &ProxyConfig, backend: Option<NetfilterBackend>, s
|
||||
}
|
||||
}
|
||||
|
||||
fn probe_runtime_support(configured_backend: ConntrackBackend) -> ConntrackRuntimeSupport {
|
||||
ConntrackRuntimeSupport {
|
||||
netfilter_backend: pick_backend(configured_backend),
|
||||
has_cap_net_admin: has_cap_net_admin(),
|
||||
has_conntrack_binary: command_exists("conntrack"),
|
||||
}
|
||||
}
|
||||
|
||||
fn effective_conntrack_enabled(
|
||||
cfg: &ProxyConfig,
|
||||
runtime_support: ConntrackRuntimeSupport,
|
||||
) -> bool {
|
||||
cfg.server.conntrack_control.inline_conntrack_control
|
||||
&& runtime_support.has_cap_net_admin
|
||||
&& runtime_support.netfilter_backend.is_some()
|
||||
&& runtime_support.has_conntrack_binary
|
||||
}
|
||||
|
||||
fn pick_backend(configured: ConntrackBackend) -> Option<NetfilterBackend> {
|
||||
match configured {
|
||||
ConntrackBackend::Auto => {
|
||||
@@ -710,7 +755,7 @@ mod tests {
|
||||
me_queue_pressure_delta: 0,
|
||||
};
|
||||
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, &sample, &mut state);
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, true, &sample, &mut state);
|
||||
|
||||
assert!(state.active);
|
||||
assert!(shared.conntrack_pressure_active());
|
||||
@@ -731,7 +776,14 @@ mod tests {
|
||||
accept_timeout_delta: 0,
|
||||
me_queue_pressure_delta: 0,
|
||||
};
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, &high_sample, &mut state);
|
||||
update_pressure_state(
|
||||
&stats,
|
||||
shared.as_ref(),
|
||||
&cfg,
|
||||
true,
|
||||
&high_sample,
|
||||
&mut state,
|
||||
);
|
||||
assert!(state.active);
|
||||
|
||||
let low_sample = PressureSample {
|
||||
@@ -740,11 +792,11 @@ mod tests {
|
||||
accept_timeout_delta: 0,
|
||||
me_queue_pressure_delta: 0,
|
||||
};
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, &low_sample, &mut state);
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, true, &low_sample, &mut state);
|
||||
assert!(state.active);
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, &low_sample, &mut state);
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, true, &low_sample, &mut state);
|
||||
assert!(state.active);
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, &low_sample, &mut state);
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, true, &low_sample, &mut state);
|
||||
|
||||
assert!(!state.active);
|
||||
assert!(!shared.conntrack_pressure_active());
|
||||
@@ -765,7 +817,7 @@ mod tests {
|
||||
me_queue_pressure_delta: 10,
|
||||
};
|
||||
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, &sample, &mut state);
|
||||
update_pressure_state(&stats, shared.as_ref(), &cfg, false, &sample, &mut state);
|
||||
|
||||
assert!(!state.active);
|
||||
assert!(!shared.conntrack_pressure_active());
|
||||
|
||||
@@ -111,7 +111,10 @@ fn probe_target(listen: SocketAddr) -> SocketAddr {
|
||||
}
|
||||
|
||||
fn build_request(target: SocketAddr, path: &str, auth_header: &str) -> String {
|
||||
let mut request = format!("GET {path} HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n", target);
|
||||
let mut request = format!(
|
||||
"GET {path} HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n",
|
||||
target
|
||||
);
|
||||
if !auth_header.is_empty() {
|
||||
request.push_str("Authorization: ");
|
||||
request.push_str(auth_header);
|
||||
|
||||
@@ -253,7 +253,18 @@ pub fn build_emulated_server_hello(
|
||||
}
|
||||
|
||||
// --- ApplicationData (fake encrypted records) ---
|
||||
let mut sizes = jitter_and_clamp_sizes(&emulated_app_data_sizes(cached), rng);
|
||||
let mut sizes = {
|
||||
let base_sizes = emulated_app_data_sizes(cached);
|
||||
match cached.behavior_profile.source {
|
||||
TlsProfileSource::Raw | TlsProfileSource::Merged => base_sizes
|
||||
.into_iter()
|
||||
.map(|size| size.clamp(MIN_APP_DATA, MAX_APP_DATA))
|
||||
.collect(),
|
||||
TlsProfileSource::Default | TlsProfileSource::Rustls => {
|
||||
jitter_and_clamp_sizes(&base_sizes, rng)
|
||||
}
|
||||
}
|
||||
};
|
||||
let compact_payload = cached
|
||||
.cert_info
|
||||
.as_ref()
|
||||
|
||||
@@ -77,11 +77,12 @@ pub(crate) struct FlowFairnessState {
|
||||
pub(crate) standing_state: StandingQueueState,
|
||||
pub(crate) scheduler_state: FlowSchedulerState,
|
||||
pub(crate) bucket_id: usize,
|
||||
pub(crate) weight_quanta: u8,
|
||||
pub(crate) in_active_ring: bool,
|
||||
}
|
||||
|
||||
impl FlowFairnessState {
|
||||
pub(crate) fn new(flow_id: u64, worker_id: u16, bucket_id: usize) -> Self {
|
||||
pub(crate) fn new(flow_id: u64, worker_id: u16, bucket_id: usize, weight_quanta: u8) -> Self {
|
||||
Self {
|
||||
_flow_id: flow_id,
|
||||
_worker_id: worker_id,
|
||||
@@ -97,6 +98,7 @@ impl FlowFairnessState {
|
||||
standing_state: StandingQueueState::Transient,
|
||||
scheduler_state: FlowSchedulerState::Idle,
|
||||
bucket_id,
|
||||
weight_quanta: weight_quanta.max(1),
|
||||
in_active_ring: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,59 +146,55 @@ impl PressureEvaluator {
|
||||
((signals.standing_flows.saturating_mul(100)) / signals.active_flows).min(100) as u8
|
||||
};
|
||||
|
||||
let mut pressure_score = 0u8;
|
||||
let mut pressured = false;
|
||||
let mut saturated = false;
|
||||
|
||||
let queue_saturated_pct = cfg.queue_ratio_shedding_pct.min(cfg.queue_ratio_saturated_pct);
|
||||
if queue_ratio_pct >= cfg.queue_ratio_pressured_pct {
|
||||
pressure_score = pressure_score.max(1);
|
||||
pressured = true;
|
||||
}
|
||||
if queue_ratio_pct >= cfg.queue_ratio_shedding_pct {
|
||||
pressure_score = pressure_score.max(2);
|
||||
}
|
||||
if queue_ratio_pct >= cfg.queue_ratio_saturated_pct {
|
||||
pressure_score = pressure_score.max(3);
|
||||
if queue_ratio_pct >= queue_saturated_pct {
|
||||
saturated = true;
|
||||
}
|
||||
|
||||
let standing_saturated_pct = cfg
|
||||
.standing_ratio_shedding_pct
|
||||
.min(cfg.standing_ratio_saturated_pct);
|
||||
if standing_ratio_pct >= cfg.standing_ratio_pressured_pct {
|
||||
pressure_score = pressure_score.max(1);
|
||||
pressured = true;
|
||||
}
|
||||
if standing_ratio_pct >= cfg.standing_ratio_shedding_pct {
|
||||
pressure_score = pressure_score.max(2);
|
||||
}
|
||||
if standing_ratio_pct >= cfg.standing_ratio_saturated_pct {
|
||||
pressure_score = pressure_score.max(3);
|
||||
if standing_ratio_pct >= standing_saturated_pct {
|
||||
saturated = true;
|
||||
}
|
||||
|
||||
let rejects_saturated = cfg.rejects_shedding.min(cfg.rejects_saturated);
|
||||
if self.admission_rejects_window >= cfg.rejects_pressured {
|
||||
pressure_score = pressure_score.max(1);
|
||||
pressured = true;
|
||||
}
|
||||
if self.admission_rejects_window >= cfg.rejects_shedding {
|
||||
pressure_score = pressure_score.max(2);
|
||||
}
|
||||
if self.admission_rejects_window >= cfg.rejects_saturated {
|
||||
pressure_score = pressure_score.max(3);
|
||||
if self.admission_rejects_window >= rejects_saturated {
|
||||
saturated = true;
|
||||
}
|
||||
|
||||
let stalls_saturated = cfg.stalls_shedding.min(cfg.stalls_saturated);
|
||||
if self.route_stalls_window >= cfg.stalls_pressured {
|
||||
pressure_score = pressure_score.max(1);
|
||||
pressured = true;
|
||||
}
|
||||
if self.route_stalls_window >= cfg.stalls_shedding {
|
||||
pressure_score = pressure_score.max(2);
|
||||
}
|
||||
if self.route_stalls_window >= cfg.stalls_saturated {
|
||||
pressure_score = pressure_score.max(3);
|
||||
if self.route_stalls_window >= stalls_saturated {
|
||||
saturated = true;
|
||||
}
|
||||
|
||||
if signals.backpressured_flows > signals.active_flows.saturating_div(2)
|
||||
&& signals.active_flows > 0
|
||||
{
|
||||
pressure_score = pressure_score.max(2);
|
||||
pressured = true;
|
||||
}
|
||||
|
||||
match pressure_score {
|
||||
0 => PressureState::Normal,
|
||||
1 => PressureState::Pressured,
|
||||
2 => PressureState::Shedding,
|
||||
_ => PressureState::Saturated,
|
||||
if saturated {
|
||||
PressureState::Saturated
|
||||
} else if pressured {
|
||||
PressureState::Pressured
|
||||
} else {
|
||||
PressureState::Normal
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use bytes::Bytes;
|
||||
use crate::protocol::constants::RPC_FLAG_QUICKACK;
|
||||
|
||||
use super::model::{
|
||||
AdmissionDecision, DispatchAction, DispatchCandidate, DispatchFeedback, FlowFairnessState,
|
||||
@@ -26,6 +27,8 @@ pub(crate) struct WorkerFairnessConfig {
|
||||
pub(crate) max_consecutive_stalls_before_close: u8,
|
||||
pub(crate) soft_bucket_count: usize,
|
||||
pub(crate) soft_bucket_share_pct: u8,
|
||||
pub(crate) default_flow_weight: u8,
|
||||
pub(crate) quickack_flow_weight: u8,
|
||||
pub(crate) pressure: PressureConfig,
|
||||
}
|
||||
|
||||
@@ -46,6 +49,8 @@ impl Default for WorkerFairnessConfig {
|
||||
max_consecutive_stalls_before_close: 16,
|
||||
soft_bucket_count: 64,
|
||||
soft_bucket_share_pct: 25,
|
||||
default_flow_weight: 1,
|
||||
quickack_flow_weight: 4,
|
||||
pressure: PressureConfig::default(),
|
||||
}
|
||||
}
|
||||
@@ -57,9 +62,9 @@ struct FlowEntry {
|
||||
}
|
||||
|
||||
impl FlowEntry {
|
||||
fn new(flow_id: u64, worker_id: u16, bucket_id: usize) -> Self {
|
||||
fn new(flow_id: u64, worker_id: u16, bucket_id: usize, weight_quanta: u8) -> Self {
|
||||
Self {
|
||||
fairness: FlowFairnessState::new(flow_id, worker_id, bucket_id),
|
||||
fairness: FlowFairnessState::new(flow_id, worker_id, bucket_id, weight_quanta),
|
||||
queue: VecDeque::new(),
|
||||
}
|
||||
}
|
||||
@@ -86,6 +91,7 @@ pub(crate) struct WorkerFairnessState {
|
||||
pressure: PressureEvaluator,
|
||||
flows: HashMap<u64, FlowEntry>,
|
||||
active_ring: VecDeque<u64>,
|
||||
active_ring_members: HashSet<u64>,
|
||||
total_queued_bytes: u64,
|
||||
bucket_queued_bytes: Vec<u64>,
|
||||
bucket_active_flows: Vec<usize>,
|
||||
@@ -108,6 +114,7 @@ impl WorkerFairnessState {
|
||||
pressure: PressureEvaluator::new(now),
|
||||
flows: HashMap::new(),
|
||||
active_ring: VecDeque::new(),
|
||||
active_ring_members: HashSet::new(),
|
||||
total_queued_bytes: 0,
|
||||
bucket_queued_bytes: vec![0; bucket_count],
|
||||
bucket_active_flows: vec![0; bucket_count],
|
||||
@@ -184,6 +191,7 @@ impl WorkerFairnessState {
|
||||
}
|
||||
|
||||
let bucket_id = self.bucket_for(conn_id);
|
||||
let frame_weight = Self::weight_for_flags(&self.config, flags);
|
||||
let bucket_cap = self
|
||||
.config
|
||||
.max_total_queued_bytes
|
||||
@@ -205,12 +213,13 @@ impl WorkerFairnessState {
|
||||
self.bucket_active_flows[bucket_id].saturating_add(1);
|
||||
self.flows.insert(
|
||||
conn_id,
|
||||
FlowEntry::new(conn_id, self.config.worker_id, bucket_id),
|
||||
FlowEntry::new(conn_id, self.config.worker_id, bucket_id, frame_weight),
|
||||
);
|
||||
self.flows
|
||||
.get_mut(&conn_id)
|
||||
.expect("flow inserted must be retrievable")
|
||||
};
|
||||
entry.fairness.weight_quanta = entry.fairness.weight_quanta.max(frame_weight);
|
||||
|
||||
if entry.fairness.pending_bytes.saturating_add(frame_bytes)
|
||||
> self.config.max_flow_queued_bytes
|
||||
@@ -242,11 +251,24 @@ impl WorkerFairnessState {
|
||||
self.bucket_queued_bytes[bucket_id] =
|
||||
self.bucket_queued_bytes[bucket_id].saturating_add(frame_bytes);
|
||||
|
||||
let mut enqueue_active = false;
|
||||
if !entry.fairness.in_active_ring {
|
||||
entry.fairness.in_active_ring = true;
|
||||
self.active_ring.push_back(conn_id);
|
||||
enqueue_active = true;
|
||||
}
|
||||
|
||||
let pressure_state = self.pressure.state();
|
||||
let (before_membership, after_membership) = {
|
||||
let before = Self::flow_membership(&entry.fairness);
|
||||
Self::classify_flow(&self.config, pressure_state, now, &mut entry.fairness);
|
||||
let after = Self::flow_membership(&entry.fairness);
|
||||
(before, after)
|
||||
};
|
||||
if enqueue_active {
|
||||
self.enqueue_active_conn(conn_id);
|
||||
}
|
||||
self.apply_flow_membership_delta(before_membership, after_membership);
|
||||
|
||||
self.evaluate_pressure(now, true);
|
||||
AdmissionDecision::Admit
|
||||
}
|
||||
@@ -260,32 +282,44 @@ impl WorkerFairnessState {
|
||||
let Some(conn_id) = self.active_ring.pop_front() else {
|
||||
break;
|
||||
};
|
||||
if !self.active_ring_members.remove(&conn_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut candidate = None;
|
||||
let mut requeue_active = false;
|
||||
let mut drained_bytes = 0u64;
|
||||
let mut bucket_id = 0usize;
|
||||
let mut should_continue = false;
|
||||
let mut enqueue_active = false;
|
||||
let mut membership_delta = None;
|
||||
let pressure_state = self.pressure.state();
|
||||
|
||||
if let Some(flow) = self.flows.get_mut(&conn_id) {
|
||||
bucket_id = flow.fairness.bucket_id;
|
||||
flow.fairness.in_active_ring = false;
|
||||
let before_membership = Self::flow_membership(&flow.fairness);
|
||||
|
||||
if flow.queue.is_empty() {
|
||||
flow.fairness.in_active_ring = false;
|
||||
flow.fairness.scheduler_state = FlowSchedulerState::Idle;
|
||||
flow.fairness.pending_bytes = 0;
|
||||
flow.fairness.deficit_bytes = 0;
|
||||
flow.fairness.queue_started_at = None;
|
||||
continue;
|
||||
}
|
||||
|
||||
should_continue = true;
|
||||
} else {
|
||||
Self::classify_flow(&self.config, pressure_state, now, &mut flow.fairness);
|
||||
|
||||
let quantum =
|
||||
Self::effective_quantum_bytes(&self.config, pressure_state, &flow.fairness);
|
||||
let quantum = Self::effective_quantum_bytes(
|
||||
&self.config,
|
||||
pressure_state,
|
||||
&flow.fairness,
|
||||
);
|
||||
flow.fairness.deficit_bytes = flow
|
||||
.fairness
|
||||
.deficit_bytes
|
||||
.saturating_add(i64::from(quantum));
|
||||
Self::clamp_deficit_bytes(&self.config, &mut flow.fairness);
|
||||
self.deficit_grants = self.deficit_grants.saturating_add(1);
|
||||
|
||||
let front_len = flow.queue.front().map_or(0, |front| front.queued_bytes());
|
||||
@@ -294,6 +328,8 @@ impl WorkerFairnessState {
|
||||
flow.fairness.consecutive_skips.saturating_add(1);
|
||||
self.deficit_skips = self.deficit_skips.saturating_add(1);
|
||||
requeue_active = true;
|
||||
flow.fairness.in_active_ring = true;
|
||||
enqueue_active = true;
|
||||
} else if let Some(frame) = flow.queue.pop_front() {
|
||||
drained_bytes = frame.queued_bytes();
|
||||
flow.fairness.pending_bytes =
|
||||
@@ -302,6 +338,7 @@ impl WorkerFairnessState {
|
||||
.fairness
|
||||
.deficit_bytes
|
||||
.saturating_sub(drained_bytes as i64);
|
||||
Self::clamp_deficit_bytes(&self.config, &mut flow.fairness);
|
||||
flow.fairness.consecutive_skips = 0;
|
||||
flow.fairness.queue_started_at =
|
||||
flow.queue.front().map(|front| front.enqueued_at);
|
||||
@@ -309,6 +346,10 @@ impl WorkerFairnessState {
|
||||
if !requeue_active {
|
||||
flow.fairness.scheduler_state = FlowSchedulerState::Idle;
|
||||
flow.fairness.in_active_ring = false;
|
||||
flow.fairness.deficit_bytes = 0;
|
||||
} else {
|
||||
flow.fairness.in_active_ring = true;
|
||||
enqueue_active = true;
|
||||
}
|
||||
candidate = Some(DispatchCandidate {
|
||||
pressure_state,
|
||||
@@ -318,17 +359,25 @@ impl WorkerFairnessState {
|
||||
}
|
||||
}
|
||||
|
||||
membership_delta = Some((before_membership, Self::flow_membership(&flow.fairness)));
|
||||
}
|
||||
|
||||
if let Some((before_membership, after_membership)) = membership_delta {
|
||||
self.apply_flow_membership_delta(before_membership, after_membership);
|
||||
}
|
||||
|
||||
if should_continue {
|
||||
continue;
|
||||
}
|
||||
|
||||
if drained_bytes > 0 {
|
||||
self.total_queued_bytes = self.total_queued_bytes.saturating_sub(drained_bytes);
|
||||
self.bucket_queued_bytes[bucket_id] =
|
||||
self.bucket_queued_bytes[bucket_id].saturating_sub(drained_bytes);
|
||||
}
|
||||
|
||||
if requeue_active {
|
||||
if let Some(flow) = self.flows.get_mut(&conn_id) {
|
||||
flow.fairness.in_active_ring = true;
|
||||
}
|
||||
self.active_ring.push_back(conn_id);
|
||||
if requeue_active && enqueue_active {
|
||||
self.enqueue_active_conn(conn_id);
|
||||
}
|
||||
|
||||
if let Some(candidate) = candidate {
|
||||
@@ -348,7 +397,9 @@ impl WorkerFairnessState {
|
||||
) -> DispatchAction {
|
||||
match feedback {
|
||||
DispatchFeedback::Routed => {
|
||||
let mut membership_delta = None;
|
||||
if let Some(flow) = self.flows.get_mut(&conn_id) {
|
||||
let before_membership = Self::flow_membership(&flow.fairness);
|
||||
flow.fairness.last_drain_at = Some(now);
|
||||
flow.fairness.recent_drain_bytes = flow
|
||||
.fairness
|
||||
@@ -358,6 +409,17 @@ impl WorkerFairnessState {
|
||||
if flow.fairness.scheduler_state != FlowSchedulerState::Idle {
|
||||
flow.fairness.scheduler_state = FlowSchedulerState::Active;
|
||||
}
|
||||
Self::classify_flow(
|
||||
&self.config,
|
||||
self.pressure.state(),
|
||||
now,
|
||||
&mut flow.fairness,
|
||||
);
|
||||
membership_delta =
|
||||
Some((before_membership, Self::flow_membership(&flow.fairness)));
|
||||
}
|
||||
if let Some((before_membership, after_membership)) = membership_delta {
|
||||
self.apply_flow_membership_delta(before_membership, after_membership);
|
||||
}
|
||||
self.evaluate_pressure(now, false);
|
||||
DispatchAction::Continue
|
||||
@@ -365,17 +427,20 @@ impl WorkerFairnessState {
|
||||
DispatchFeedback::QueueFull => {
|
||||
self.pressure.note_route_stall(now, &self.config.pressure);
|
||||
self.downstream_stalls = self.downstream_stalls.saturating_add(1);
|
||||
let state = self.pressure.state();
|
||||
let Some(flow) = self.flows.get_mut(&conn_id) else {
|
||||
self.evaluate_pressure(now, true);
|
||||
return DispatchAction::Continue;
|
||||
};
|
||||
let (before_membership, after_membership, should_close_flow, enqueue_active) = {
|
||||
let before_membership = Self::flow_membership(&flow.fairness);
|
||||
let mut enqueue_active = false;
|
||||
|
||||
flow.fairness.consecutive_stalls =
|
||||
flow.fairness.consecutive_stalls.saturating_add(1);
|
||||
flow.fairness.scheduler_state = FlowSchedulerState::Backpressured;
|
||||
flow.fairness.pressure_class = FlowPressureClass::Backpressured;
|
||||
|
||||
let state = self.pressure.state();
|
||||
let should_shed_frame = matches!(state, PressureState::Saturated)
|
||||
|| (matches!(state, PressureState::Shedding)
|
||||
&& flow.fairness.standing_state == StandingQueueState::Standing
|
||||
@@ -392,20 +457,35 @@ impl WorkerFairnessState {
|
||||
flow.fairness.pending_bytes.saturating_add(frame_bytes);
|
||||
flow.fairness.queue_started_at =
|
||||
flow.queue.front().map(|front| front.enqueued_at);
|
||||
self.total_queued_bytes = self.total_queued_bytes.saturating_add(frame_bytes);
|
||||
self.bucket_queued_bytes[flow.fairness.bucket_id] = self.bucket_queued_bytes
|
||||
[flow.fairness.bucket_id]
|
||||
self.total_queued_bytes =
|
||||
self.total_queued_bytes.saturating_add(frame_bytes);
|
||||
self.bucket_queued_bytes[flow.fairness.bucket_id] = self
|
||||
.bucket_queued_bytes[flow.fairness.bucket_id]
|
||||
.saturating_add(frame_bytes);
|
||||
if !flow.fairness.in_active_ring {
|
||||
flow.fairness.in_active_ring = true;
|
||||
self.active_ring.push_back(conn_id);
|
||||
enqueue_active = true;
|
||||
}
|
||||
}
|
||||
|
||||
if flow.fairness.consecutive_stalls
|
||||
Self::classify_flow(&self.config, state, now, &mut flow.fairness);
|
||||
let after_membership = Self::flow_membership(&flow.fairness);
|
||||
let should_close_flow = flow.fairness.consecutive_stalls
|
||||
>= self.config.max_consecutive_stalls_before_close
|
||||
&& self.pressure.state() == PressureState::Saturated
|
||||
{
|
||||
&& self.pressure.state() == PressureState::Saturated;
|
||||
(
|
||||
before_membership,
|
||||
after_membership,
|
||||
should_close_flow,
|
||||
enqueue_active,
|
||||
)
|
||||
};
|
||||
if enqueue_active {
|
||||
self.enqueue_active_conn(conn_id);
|
||||
}
|
||||
self.apply_flow_membership_delta(before_membership, after_membership);
|
||||
|
||||
if should_close_flow {
|
||||
self.remove_flow(conn_id);
|
||||
self.evaluate_pressure(now, true);
|
||||
return DispatchAction::CloseFlow;
|
||||
@@ -426,6 +506,15 @@ impl WorkerFairnessState {
|
||||
let Some(entry) = self.flows.remove(&conn_id) else {
|
||||
return;
|
||||
};
|
||||
self.active_ring_members.remove(&conn_id);
|
||||
self.active_ring.retain(|queued_conn_id| *queued_conn_id != conn_id);
|
||||
let (was_standing, was_backpressured) = Self::flow_membership(&entry.fairness);
|
||||
if was_standing {
|
||||
self.standing_flow_count = self.standing_flow_count.saturating_sub(1);
|
||||
}
|
||||
if was_backpressured {
|
||||
self.backpressured_flow_count = self.backpressured_flow_count.saturating_sub(1);
|
||||
}
|
||||
|
||||
self.bucket_active_flows[entry.fairness.bucket_id] =
|
||||
self.bucket_active_flows[entry.fairness.bucket_id].saturating_sub(1);
|
||||
@@ -440,27 +529,6 @@ impl WorkerFairnessState {
|
||||
}
|
||||
|
||||
fn evaluate_pressure(&mut self, now: Instant, force: bool) {
|
||||
let mut standing = 0usize;
|
||||
let mut backpressured = 0usize;
|
||||
|
||||
for flow in self.flows.values_mut() {
|
||||
Self::classify_flow(&self.config, self.pressure.state(), now, &mut flow.fairness);
|
||||
if flow.fairness.standing_state == StandingQueueState::Standing {
|
||||
standing = standing.saturating_add(1);
|
||||
}
|
||||
if matches!(
|
||||
flow.fairness.scheduler_state,
|
||||
FlowSchedulerState::Backpressured
|
||||
| FlowSchedulerState::Penalized
|
||||
| FlowSchedulerState::SheddingCandidate
|
||||
) {
|
||||
backpressured = backpressured.saturating_add(1);
|
||||
}
|
||||
}
|
||||
|
||||
self.standing_flow_count = standing;
|
||||
self.backpressured_flow_count = backpressured;
|
||||
|
||||
let _ = self.pressure.maybe_evaluate(
|
||||
now,
|
||||
&self.config.pressure,
|
||||
@@ -468,8 +536,8 @@ impl WorkerFairnessState {
|
||||
PressureSignals {
|
||||
active_flows: self.flows.len(),
|
||||
total_queued_bytes: self.total_queued_bytes,
|
||||
standing_flows: standing,
|
||||
backpressured_flows: backpressured,
|
||||
standing_flows: self.standing_flow_count,
|
||||
backpressured_flows: self.backpressured_flow_count,
|
||||
},
|
||||
force,
|
||||
);
|
||||
@@ -481,12 +549,39 @@ impl WorkerFairnessState {
|
||||
now: Instant,
|
||||
fairness: &mut FlowFairnessState,
|
||||
) {
|
||||
if fairness.pending_bytes == 0 {
|
||||
fairness.pressure_class = FlowPressureClass::Healthy;
|
||||
fairness.standing_state = StandingQueueState::Transient;
|
||||
fairness.scheduler_state = FlowSchedulerState::Idle;
|
||||
let (pressure_class, standing_state, scheduler_state, standing) =
|
||||
Self::derive_flow_classification(config, pressure_state, now, fairness);
|
||||
fairness.pressure_class = pressure_class;
|
||||
fairness.standing_state = standing_state;
|
||||
fairness.scheduler_state = scheduler_state;
|
||||
if scheduler_state == FlowSchedulerState::Idle {
|
||||
fairness.deficit_bytes = 0;
|
||||
}
|
||||
if standing {
|
||||
fairness.penalty_score = fairness.penalty_score.saturating_add(1);
|
||||
} else {
|
||||
fairness.penalty_score = fairness.penalty_score.saturating_sub(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fn derive_flow_classification(
|
||||
config: &WorkerFairnessConfig,
|
||||
pressure_state: PressureState,
|
||||
now: Instant,
|
||||
fairness: &FlowFairnessState,
|
||||
) -> (
|
||||
FlowPressureClass,
|
||||
StandingQueueState,
|
||||
FlowSchedulerState,
|
||||
bool,
|
||||
) {
|
||||
if fairness.pending_bytes == 0 {
|
||||
return (
|
||||
FlowPressureClass::Healthy,
|
||||
StandingQueueState::Transient,
|
||||
FlowSchedulerState::Idle,
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
let queue_age = fairness
|
||||
@@ -503,29 +598,165 @@ impl WorkerFairnessState {
|
||||
&& (fairness.consecutive_stalls >= config.standing_stall_threshold || drain_stalled);
|
||||
|
||||
if standing {
|
||||
fairness.standing_state = StandingQueueState::Standing;
|
||||
fairness.pressure_class = FlowPressureClass::Standing;
|
||||
fairness.penalty_score = fairness.penalty_score.saturating_add(1);
|
||||
fairness.scheduler_state = if pressure_state >= PressureState::Shedding {
|
||||
let scheduler_state = if pressure_state >= PressureState::Shedding {
|
||||
FlowSchedulerState::SheddingCandidate
|
||||
} else {
|
||||
FlowSchedulerState::Penalized
|
||||
};
|
||||
return;
|
||||
return (
|
||||
FlowPressureClass::Standing,
|
||||
StandingQueueState::Standing,
|
||||
scheduler_state,
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
fairness.standing_state = StandingQueueState::Transient;
|
||||
if fairness.consecutive_stalls > 0 {
|
||||
fairness.pressure_class = FlowPressureClass::Backpressured;
|
||||
fairness.scheduler_state = FlowSchedulerState::Backpressured;
|
||||
} else if fairness.pending_bytes >= config.standing_queue_min_backlog_bytes {
|
||||
fairness.pressure_class = FlowPressureClass::Bursty;
|
||||
fairness.scheduler_state = FlowSchedulerState::Active;
|
||||
} else {
|
||||
fairness.pressure_class = FlowPressureClass::Healthy;
|
||||
fairness.scheduler_state = FlowSchedulerState::Active;
|
||||
return (
|
||||
FlowPressureClass::Backpressured,
|
||||
StandingQueueState::Transient,
|
||||
FlowSchedulerState::Backpressured,
|
||||
false,
|
||||
);
|
||||
}
|
||||
fairness.penalty_score = fairness.penalty_score.saturating_sub(1);
|
||||
|
||||
if fairness.pending_bytes >= config.standing_queue_min_backlog_bytes {
|
||||
return (
|
||||
FlowPressureClass::Bursty,
|
||||
StandingQueueState::Transient,
|
||||
FlowSchedulerState::Active,
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
(
|
||||
FlowPressureClass::Healthy,
|
||||
StandingQueueState::Transient,
|
||||
FlowSchedulerState::Active,
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn flow_membership(fairness: &FlowFairnessState) -> (bool, bool) {
|
||||
(
|
||||
fairness.standing_state == StandingQueueState::Standing,
|
||||
Self::scheduler_state_is_backpressured(fairness.scheduler_state),
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn scheduler_state_is_backpressured(state: FlowSchedulerState) -> bool {
|
||||
matches!(
|
||||
state,
|
||||
FlowSchedulerState::Backpressured
|
||||
| FlowSchedulerState::Penalized
|
||||
| FlowSchedulerState::SheddingCandidate
|
||||
)
|
||||
}
|
||||
|
||||
fn apply_flow_membership_delta(
|
||||
&mut self,
|
||||
before_membership: (bool, bool),
|
||||
after_membership: (bool, bool),
|
||||
) {
|
||||
if before_membership.0 != after_membership.0 {
|
||||
if after_membership.0 {
|
||||
self.standing_flow_count = self.standing_flow_count.saturating_add(1);
|
||||
} else {
|
||||
self.standing_flow_count = self.standing_flow_count.saturating_sub(1);
|
||||
}
|
||||
}
|
||||
if before_membership.1 != after_membership.1 {
|
||||
if after_membership.1 {
|
||||
self.backpressured_flow_count = self.backpressured_flow_count.saturating_add(1);
|
||||
} else {
|
||||
self.backpressured_flow_count = self.backpressured_flow_count.saturating_sub(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn clamp_deficit_bytes(config: &WorkerFairnessConfig, fairness: &mut FlowFairnessState) {
|
||||
let max_deficit = config.max_flow_queued_bytes.min(i64::MAX as u64) as i64;
|
||||
fairness.deficit_bytes = fairness.deficit_bytes.clamp(0, max_deficit);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn enqueue_active_conn(&mut self, conn_id: u64) {
|
||||
if self.active_ring_members.insert(conn_id) {
|
||||
self.active_ring.push_back(conn_id);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn weight_for_flags(config: &WorkerFairnessConfig, flags: u32) -> u8 {
|
||||
if (flags & RPC_FLAG_QUICKACK) != 0 {
|
||||
return config.quickack_flow_weight.max(1);
|
||||
}
|
||||
config.default_flow_weight.max(1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn debug_recompute_flow_counters(&self, now: Instant) -> (usize, usize) {
|
||||
let pressure_state = self.pressure.state();
|
||||
let mut standing = 0usize;
|
||||
let mut backpressured = 0usize;
|
||||
for flow in self.flows.values() {
|
||||
let (_, standing_state, scheduler_state, _) =
|
||||
Self::derive_flow_classification(&self.config, pressure_state, now, &flow.fairness);
|
||||
if standing_state == StandingQueueState::Standing {
|
||||
standing = standing.saturating_add(1);
|
||||
}
|
||||
if Self::scheduler_state_is_backpressured(scheduler_state) {
|
||||
backpressured = backpressured.saturating_add(1);
|
||||
}
|
||||
}
|
||||
(standing, backpressured)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn debug_check_active_ring_consistency(&self) -> bool {
|
||||
if self.active_ring.len() != self.active_ring_members.len() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut seen = HashSet::with_capacity(self.active_ring.len());
|
||||
for conn_id in self.active_ring.iter().copied() {
|
||||
if !seen.insert(conn_id) {
|
||||
return false;
|
||||
}
|
||||
if !self.active_ring_members.contains(&conn_id) {
|
||||
return false;
|
||||
}
|
||||
let Some(flow) = self.flows.get(&conn_id) else {
|
||||
return false;
|
||||
};
|
||||
if !flow.fairness.in_active_ring || flow.queue.is_empty() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (conn_id, flow) in self.flows.iter() {
|
||||
let in_ring = self.active_ring_members.contains(conn_id);
|
||||
if flow.fairness.in_active_ring != in_ring {
|
||||
return false;
|
||||
}
|
||||
if in_ring && flow.queue.is_empty() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn debug_max_deficit_bytes(&self) -> i64 {
|
||||
self.flows
|
||||
.values()
|
||||
.map(|entry| entry.fairness.deficit_bytes)
|
||||
.max()
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn effective_quantum_bytes(
|
||||
@@ -542,12 +773,14 @@ impl WorkerFairnessState {
|
||||
return config.penalized_quantum_bytes.max(1);
|
||||
}
|
||||
|
||||
match pressure_state {
|
||||
let base_quantum = match pressure_state {
|
||||
PressureState::Normal => config.base_quantum_bytes.max(1),
|
||||
PressureState::Pressured => config.pressured_quantum_bytes.max(1),
|
||||
PressureState::Shedding => config.pressured_quantum_bytes.max(1),
|
||||
PressureState::Saturated => config.penalized_quantum_bytes.max(1),
|
||||
}
|
||||
};
|
||||
let weighted_quantum = base_quantum.saturating_mul(fairness.weight_quanta.max(1) as u32);
|
||||
weighted_quantum.max(1)
|
||||
}
|
||||
|
||||
fn bucket_for(&self, conn_id: u64) -> usize {
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::time::{Duration, Instant};
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
use crate::protocol::constants::RPC_FLAG_QUICKACK;
|
||||
use crate::transport::middle_proxy::fairness::{
|
||||
AdmissionDecision, DispatchAction, DispatchFeedback, PressureState, SchedulerDecision,
|
||||
WorkerFairnessConfig, WorkerFairnessState,
|
||||
@@ -114,6 +115,62 @@ fn fairness_keeps_fast_flow_progress_under_slow_neighbor() {
|
||||
assert!(snapshot.total_queued_bytes <= 64 * 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fairness_prioritizes_quickack_flow_when_weights_enabled() {
|
||||
let mut now = Instant::now();
|
||||
let mut fairness = WorkerFairnessState::new(
|
||||
WorkerFairnessConfig {
|
||||
max_total_queued_bytes: 256 * 1024,
|
||||
max_flow_queued_bytes: 64 * 1024,
|
||||
base_quantum_bytes: 8 * 1024,
|
||||
pressured_quantum_bytes: 8 * 1024,
|
||||
penalized_quantum_bytes: 8 * 1024,
|
||||
default_flow_weight: 1,
|
||||
quickack_flow_weight: 4,
|
||||
..WorkerFairnessConfig::default()
|
||||
},
|
||||
now,
|
||||
);
|
||||
|
||||
for _ in 0..8 {
|
||||
assert_eq!(
|
||||
fairness.enqueue_data(10, RPC_FLAG_QUICKACK, enqueue_payload(16 * 1024), now),
|
||||
AdmissionDecision::Admit
|
||||
);
|
||||
assert_eq!(
|
||||
fairness.enqueue_data(20, 0, enqueue_payload(16 * 1024), now),
|
||||
AdmissionDecision::Admit
|
||||
);
|
||||
}
|
||||
|
||||
let mut quickack_dispatched = 0u64;
|
||||
let mut bulk_dispatched = 0u64;
|
||||
for _ in 0..64 {
|
||||
now += Duration::from_millis(1);
|
||||
let SchedulerDecision::Dispatch(candidate) = fairness.next_decision(now) else {
|
||||
break;
|
||||
};
|
||||
|
||||
if candidate.frame.conn_id == 10 {
|
||||
quickack_dispatched = quickack_dispatched.saturating_add(1);
|
||||
} else if candidate.frame.conn_id == 20 {
|
||||
bulk_dispatched = bulk_dispatched.saturating_add(1);
|
||||
}
|
||||
|
||||
let _ = fairness.apply_dispatch_feedback(
|
||||
candidate.frame.conn_id,
|
||||
candidate,
|
||||
DispatchFeedback::Routed,
|
||||
now,
|
||||
);
|
||||
}
|
||||
|
||||
assert!(
|
||||
quickack_dispatched > bulk_dispatched,
|
||||
"quickack flow must receive higher dispatch rate with larger weight"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fairness_pressure_hysteresis_prevents_instant_flapping() {
|
||||
let mut now = Instant::now();
|
||||
@@ -180,6 +237,12 @@ fn fairness_randomized_sequence_preserves_memory_bounds() {
|
||||
}
|
||||
|
||||
let snapshot = fairness.snapshot();
|
||||
let (standing_recomputed, backpressured_recomputed) =
|
||||
fairness.debug_recompute_flow_counters(now);
|
||||
assert!(snapshot.total_queued_bytes <= 32 * 1024);
|
||||
assert_eq!(snapshot.standing_flows, standing_recomputed);
|
||||
assert_eq!(snapshot.backpressured_flows, backpressured_recomputed);
|
||||
assert!(fairness.debug_check_active_ring_consistency());
|
||||
assert!(fairness.debug_max_deficit_bytes() <= 4 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user