Compare commits

...

39 Commits
3.2.2 ... 3.3.4

Author SHA1 Message Date
Alexey
dd12997744 Merge pull request #338 from telemt/flow-api
API Zero + API Docs
2026-03-06 13:08:12 +03:00
Alexey
fc160913bf Update API.md 2026-03-06 13:07:31 +03:00
Alexey
92c22ef16d API Zero
Added new endpoints:
- GET /v1/system/info
- GET /v1/runtime/gates
- GET /v1/limits/effective
- GET /v1/security/posture

Added API runtime state without impacting the hot path:
- config_reload_count
- last_config_reload_epoch_secs
- admission_open
- process_started_at_epoch_secs

Added background watcher tasks in api::serve:
- configuration reload tracking
- admission gate state tracking
2026-03-06 13:06:57 +03:00
Alexey
aff22d0855 Merge pull request #337 from telemt/readme
Update README.md
2026-03-06 12:47:06 +03:00
Alexey
b3d3bca15a Update README.md 2026-03-06 12:46:51 +03:00
Alexey
92f38392eb Merge pull request #336 from telemt/bump
Update Cargo.toml
2026-03-06 12:45:47 +03:00
Alexey
30ef8df1b3 Update Cargo.toml 2026-03-06 12:44:40 +03:00
Alexey
2e174adf16 Merge pull request #335 from telemt/flow-stunae
Update load.rs
2026-03-06 12:39:28 +03:00
Alexey
4e803b1412 Update load.rs 2026-03-06 12:08:43 +03:00
Alexey
9b174318ce Runtime Model: merge pull request #334 from telemt/docs
Runtime Model
2026-03-06 11:12:16 +03:00
Alexey
99edcbe818 Runtime Model 2026-03-06 11:11:44 +03:00
Alexey
ef7dc2b80f Merge pull request #332 from telemt/bump
Update Cargo.toml
2026-03-06 04:05:46 +03:00
Alexey
691607f269 Update Cargo.toml 2026-03-06 04:05:35 +03:00
Alexey
55561a23bc ME NoWait Routing + Upstream Connbudget + another fixes: merge pull request #331 from telemt/flow-hp
ME NoWait Routing + Upstream Connbudget + another fixes
2026-03-06 04:05:04 +03:00
Alexey
f32c34f126 ME NoWait Routing + Upstream Connbudget + PROXY Header t/o + allocation cuts 2026-03-06 03:58:08 +03:00
Alexey
8f3bdaec2c Merge pull request #329 from telemt/bump
Update Cargo.toml
2026-03-05 23:23:40 +03:00
Alexey
69b02caf77 Update Cargo.toml 2026-03-05 23:23:24 +03:00
Alexey
3854955069 Merge pull request #328 from telemt/flow-mep
Secret Atomic Snapshot + KDF Fingerprint on RwLock
2026-03-05 23:23:01 +03:00
Alexey
9b84fc7a5b Secret Atomic Snapshot + KDF Fingerprint on RwLock
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-05 23:18:26 +03:00
Alexey
e7cb9238dc Merge pull request #327 from telemt/bump
Update Cargo.toml
2026-03-05 22:32:20 +03:00
Alexey
0e2cbe6178 Update Cargo.toml 2026-03-05 22:32:08 +03:00
Alexey
cd076aeeeb Merge pull request #326 from telemt/flow-noroute
HybridAsyncPersistent - new ME Route NoWriter Mode
2026-03-05 22:31:46 +03:00
Alexey
d683faf922 HybridAsyncPersistent - new ME Route NoWriter Mode
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-05 22:31:01 +03:00
Alexey
0494f8ac8b Merge pull request #325 from telemt/bump
Update Cargo.toml
2026-03-05 16:40:40 +03:00
Alexey
48ce59900e Update Cargo.toml 2026-03-05 16:40:28 +03:00
Alexey
84e95fd229 ME Pool Init fixes: merge pull request #324 from telemt/flow-fixes
ME Pool Init fixes
2026-03-05 16:35:00 +03:00
Alexey
a80be78345 DC writer floor is below required only in runtime 2026-03-05 16:32:31 +03:00
Alexey
64130dd02e MEP not ready only after 3 attempts 2026-03-05 16:13:40 +03:00
Alexey
d62a6e0417 Shutdown Timer fixes 2026-03-05 16:04:32 +03:00
Alexey
3260746785 Init + Uptime timers 2026-03-05 15:48:09 +03:00
Alexey
8066ea2163 ME Pool Init fixes 2026-03-05 15:31:36 +03:00
Alexey
813f1df63e Performance improvements: merge pull request #323 from telemt/flow-perf
Performance improvements
2026-03-05 14:43:10 +03:00
Alexey
09bdafa718 Performance improvements 2026-03-05 14:39:32 +03:00
Alexey
fb0f75df43 Merge pull request #322 from Dimasssss/patch-3
Update README.md
2026-03-05 14:10:01 +03:00
Alexey
39255df549 Unique IP always in Metrics+API: merge pull request #321 from telemt/flow-iplimit
Unique IP always in Metrics+API
2026-03-05 14:09:40 +03:00
Dimasssss
456495fd62 Update README.md 2026-03-05 13:59:58 +03:00
Alexey
83cadc0bf3 No lock-contention in ip-tracker 2026-03-05 13:52:27 +03:00
Alexey
0b1a8cd3f8 IP Limit fixes 2026-03-05 13:41:41 +03:00
Alexey
565b4ee923 Unique IP always in Metrics+API
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-05 13:21:11 +03:00
34 changed files with 3043 additions and 543 deletions

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "telemt" name = "telemt"
version = "3.2.2" version = "3.3.4"
edition = "2024" edition = "2024"
[dependencies] [dependencies]

View File

@@ -3,7 +3,7 @@
***Löst Probleme, bevor andere überhaupt wissen, dass sie existieren*** / ***It solves problems before others even realize they exist*** ***Löst Probleme, bevor andere überhaupt wissen, dass sie existieren*** / ***It solves problems before others even realize they exist***
**Telemt** is a fast, secure, and feature-rich server written in Rust: it fully implements the official Telegram proxy algo and adds many production-ready improvements such as: **Telemt** is a fast, secure, and feature-rich server written in Rust: it fully implements the official Telegram proxy algo and adds many production-ready improvements such as:
- ME Pool + Reader/Writer + Registry + Refill + Adaptive Floor + Trio-State + Generation Lifecycle - [ME Pool + Reader/Writer + Registry + Refill + Adaptive Floor + Trio-State + Generation Lifecycle](https://github.com/telemt/telemt/blob/main/docs/model/MODEL.en.md)
- [Full-covered API w/ management](https://github.com/telemt/telemt/blob/main/docs/API.md) - [Full-covered API w/ management](https://github.com/telemt/telemt/blob/main/docs/API.md)
- Anti-Replay on Sliding Window - Anti-Replay on Sliding Window
- Prometheus-format Metrics - Prometheus-format Metrics
@@ -118,8 +118,8 @@ We welcome ideas, architectural feedback, and pull requests.
## Quick Start Guide ## Quick Start Guide
### [Quick Start Guid RU](docs/QUICK_START_GUIDE.ru.md) ### [Quick Start Guide RU](docs/QUICK_START_GUIDE.ru.md)
### [Quick Start Guid EN](docs/QUICK_START_GUIDE.en.md) ### [Quick Start Guide EN](docs/QUICK_START_GUIDE.en.md)
### Advanced ### Advanced

View File

@@ -76,6 +76,10 @@ Notes:
| Method | Path | Body | Success | `data` contract | | Method | Path | Body | Success | `data` contract |
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
| `GET` | `/v1/health` | none | `200` | `HealthData` | | `GET` | `/v1/health` | none | `200` | `HealthData` |
| `GET` | `/v1/system/info` | none | `200` | `SystemInfoData` |
| `GET` | `/v1/runtime/gates` | none | `200` | `RuntimeGatesData` |
| `GET` | `/v1/limits/effective` | none | `200` | `EffectiveLimitsData` |
| `GET` | `/v1/security/posture` | none | `200` | `SecurityPostureData` |
| `GET` | `/v1/stats/summary` | none | `200` | `SummaryData` | | `GET` | `/v1/stats/summary` | none | `200` | `SummaryData` |
| `GET` | `/v1/stats/zero/all` | none | `200` | `ZeroAllData` | | `GET` | `/v1/stats/zero/all` | none | `200` | `ZeroAllData` |
| `GET` | `/v1/stats/upstreams` | none | `200` | `UpstreamsData` | | `GET` | `/v1/stats/upstreams` | none | `200` | `UpstreamsData` |
@@ -176,6 +180,94 @@ Note: the request contract is defined, but the corresponding route currently ret
| `handshake_timeouts_total` | `u64` | Handshake timeout count. | | `handshake_timeouts_total` | `u64` | Handshake timeout count. |
| `configured_users` | `usize` | Number of configured users in config. | | `configured_users` | `usize` | Number of configured users in config. |
### `SystemInfoData`
| Field | Type | Description |
| --- | --- | --- |
| `version` | `string` | Binary version (`CARGO_PKG_VERSION`). |
| `target_arch` | `string` | Target architecture (`std::env::consts::ARCH`). |
| `target_os` | `string` | Target OS (`std::env::consts::OS`). |
| `build_profile` | `string` | Build profile (`PROFILE` env when available). |
| `git_commit` | `string?` | Optional commit hash from build env metadata. |
| `build_time_utc` | `string?` | Optional build timestamp from build env metadata. |
| `rustc_version` | `string?` | Optional compiler version from build env metadata. |
| `process_started_at_epoch_secs` | `u64` | Process start time as Unix epoch seconds. |
| `uptime_seconds` | `f64` | Process uptime in seconds. |
| `config_path` | `string` | Active config file path used by runtime. |
| `config_hash` | `string` | SHA-256 hash of current config content (same value as envelope `revision`). |
| `config_reload_count` | `u64` | Number of successfully observed config updates since process start. |
| `last_config_reload_epoch_secs` | `u64?` | Unix epoch seconds of the latest observed config reload; null/absent before first reload. |
### `RuntimeGatesData`
| Field | Type | Description |
| --- | --- | --- |
| `accepting_new_connections` | `bool` | Current admission-gate state for new listener accepts. |
| `conditional_cast_enabled` | `bool` | Whether conditional ME admission logic is enabled (`general.use_middle_proxy`). |
| `me_runtime_ready` | `bool` | Current ME runtime readiness status used for conditional gate decisions. |
| `me2dc_fallback_enabled` | `bool` | Whether ME -> direct fallback is enabled. |
| `use_middle_proxy` | `bool` | Current transport mode preference. |
### `EffectiveLimitsData`
| Field | Type | Description |
| --- | --- | --- |
| `update_every_secs` | `u64` | Effective unified updater interval. |
| `me_reinit_every_secs` | `u64` | Effective ME periodic reinit interval. |
| `me_pool_force_close_secs` | `u64` | Effective stale-writer force-close timeout. |
| `timeouts` | `EffectiveTimeoutLimits` | Effective timeout policy snapshot. |
| `upstream` | `EffectiveUpstreamLimits` | Effective upstream connect/retry limits. |
| `middle_proxy` | `EffectiveMiddleProxyLimits` | Effective ME pool/floor/reconnect limits. |
| `user_ip_policy` | `EffectiveUserIpPolicyLimits` | Effective unique-IP policy mode/window. |
#### `EffectiveTimeoutLimits`
| Field | Type | Description |
| --- | --- | --- |
| `client_handshake_secs` | `u64` | Client handshake timeout. |
| `tg_connect_secs` | `u64` | Upstream Telegram connect timeout. |
| `client_keepalive_secs` | `u64` | Client keepalive interval. |
| `client_ack_secs` | `u64` | ACK timeout. |
| `me_one_retry` | `u8` | Fast retry count for single-endpoint ME DC. |
| `me_one_timeout_ms` | `u64` | Fast retry timeout per attempt for single-endpoint ME DC. |
#### `EffectiveUpstreamLimits`
| Field | Type | Description |
| --- | --- | --- |
| `connect_retry_attempts` | `u32` | Upstream connect retry attempts. |
| `connect_retry_backoff_ms` | `u64` | Upstream retry backoff delay. |
| `connect_budget_ms` | `u64` | Total connect wall-clock budget across retries. |
| `unhealthy_fail_threshold` | `u32` | Consecutive fail threshold for unhealthy marking. |
| `connect_failfast_hard_errors` | `bool` | Whether hard errors skip additional retries. |
#### `EffectiveMiddleProxyLimits`
| Field | Type | Description |
| --- | --- | --- |
| `floor_mode` | `string` | Effective floor mode (`static` or `adaptive`). |
| `adaptive_floor_idle_secs` | `u64` | Adaptive floor idle threshold. |
| `adaptive_floor_min_writers_single_endpoint` | `u8` | Adaptive floor minimum for single-endpoint DCs. |
| `adaptive_floor_recover_grace_secs` | `u64` | Adaptive floor recovery grace period. |
| `reconnect_max_concurrent_per_dc` | `u32` | Max concurrent reconnects per DC. |
| `reconnect_backoff_base_ms` | `u64` | Reconnect base backoff. |
| `reconnect_backoff_cap_ms` | `u64` | Reconnect backoff cap. |
| `reconnect_fast_retry_count` | `u32` | Number of fast retries before standard backoff strategy. |
| `me2dc_fallback` | `bool` | Effective ME -> direct fallback flag. |
#### `EffectiveUserIpPolicyLimits`
| Field | Type | Description |
| --- | --- | --- |
| `mode` | `string` | Unique-IP policy mode (`active_window`, `time_window`, `combined`). |
| `window_secs` | `u64` | Time window length used by unique-IP policy. |
### `SecurityPostureData`
| Field | Type | Description |
| --- | --- | --- |
| `api_read_only` | `bool` | Current API read-only state. |
| `api_whitelist_enabled` | `bool` | Whether whitelist filtering is active. |
| `api_whitelist_entries` | `usize` | Number of configured whitelist CIDRs. |
| `api_auth_header_enabled` | `bool` | Whether `Authorization` header validation is active. |
| `proxy_protocol_enabled` | `bool` | Global PROXY protocol accept setting. |
| `log_level` | `string` | Effective log level (`debug`, `verbose`, `normal`, `silent`). |
| `telemetry_core_enabled` | `bool` | Core telemetry toggle. |
| `telemetry_user_enabled` | `bool` | Per-user telemetry toggle. |
| `telemetry_me_level` | `string` | ME telemetry level (`silent`, `normal`, `debug`). |
### `ZeroAllData` ### `ZeroAllData`
| Field | Type | Description | | Field | Type | Description |
| --- | --- | --- | | --- | --- | --- |

285
docs/model/MODEL.en.md Normal file
View File

@@ -0,0 +1,285 @@
# Telemt Runtime Model
## Scope
This document defines runtime concepts used by the Middle-End (ME) transport pipeline and the orchestration logic around it.
It focuses on:
- `ME Pool / Reader / Writer / Refill / Registry`
- `Adaptive Floor`
- `Trio-State`
- `Generation Lifecycle`
## Core Entities
### ME Pool
`ME Pool` is the runtime orchestrator for all Middle-End writers.
Responsibilities:
- Holds writer inventory by DC/family/endpoint.
- Maintains routing primitives and writer selection policy.
- Tracks generation state (`active`, `warm`, `draining` context).
- Applies runtime policies (floor mode, refill, reconnect, reinit, fallback behavior).
- Exposes readiness gates used by admission logic (for conditional accept/cast behavior).
Non-goals:
- It does not own client protocol decoding.
- It does not own per-client business policy (quotas/limits).
### ME Writer
`ME Writer` is a long-lived ME RPC tunnel bound to one concrete ME endpoint (`ip:port`), with:
- Outbound command channel (send path).
- Associated reader loop (inbound path).
- Health/degraded flags.
- Contour/state and generation metadata.
A writer is the actual data plane carrier for client sessions once bound.
### ME Reader
`ME Reader` is the inbound parser/dispatcher for one writer:
- Reads/decrypts ME RPC frames.
- Validates sequence/checksum.
- Routes payloads to client-connection channels via `Registry`.
- Emits close/ack/data events and updates telemetry.
Design intent:
- Reader must stay non-blocking as much as possible.
- Backpressure on a single client route must not stall the whole writer stream.
### Refill
`Refill` is the recovery mechanism that restores writer coverage when capacity drops:
- Per-endpoint restore (same endpoint first).
- Per-DC restore to satisfy required floor.
- Optional outage-mode/shadow behavior for fragile single-endpoint DCs.
Refill works asynchronously and should not block hot routing paths.
### Registry
`Registry` is the routing index between ME and client sessions:
- `conn_id -> client response channel`
- `conn_id <-> writer_id` binding map
- writer activity snapshots and idle tracking
Main invariants:
- A `conn_id` routes to at most one active response channel.
- Writer loss triggers safe unbind/cleanup and close propagation.
- Registry state is the source of truth for active ME-bound session mapping.
## Adaptive Floor
### What it is
`Adaptive Floor` is a runtime policy that changes target writer count per DC based on observed activity, instead of always holding static peak floor.
### Why it exists
Goals:
- Reduce idle writer churn under low traffic.
- Keep enough warm capacity to avoid client-visible stalls on burst recovery.
- Limit needless reconnect storms on unstable endpoints.
### Behavioral model
- Under activity: floor converges toward configured static requirement.
- Under prolonged idle: floor can shrink to a safe minimum.
- Recovery/grace windows prevent aggressive oscillation.
### Safety constraints
- Never violate minimal survivability floor for a DC group.
- Refill must still restore quickly on demand.
- Floor adaptation must not force-drop already bound healthy sessions.
## Trio-State
`Trio-State` is writer contouring:
- `Warm`
- `Active`
- `Draining`
### State semantics
- `Warm`: connected and validated, not primary for new binds.
- `Active`: preferred for new binds and normal traffic.
- `Draining`: no new regular binds; existing sessions continue until graceful retirement rules apply.
### Transition intent
- `Warm -> Active`: when coverage/readiness conditions are satisfied.
- `Active -> Draining`: on generation swap, endpoint replacement, or controlled retirement.
- `Draining -> removed`: after drain TTL/force-close policy (or when naturally empty).
This separation reduces SPOF and keeps cutovers predictable.
## Generation Lifecycle
Generation isolates pool epochs during reinit/reconfiguration.
### Lifecycle phases
1. `Bootstrap`: initial writers are established.
2. `Warmup`: next generation writers are created and validated.
3. `Activation`: generation promoted to active when coverage gate passes.
4. `Drain`: previous generation becomes draining, existing sessions are allowed to finish.
5. `Retire`: old generation writers are removed after graceful rules.
### Operational guarantees
- No partial generation activation without minimum coverage.
- Existing healthy client sessions should not be dropped just because a new generation appears.
- Draining generation exists to absorb in-flight traffic during swap.
### Readiness and admission
Pool readiness is not equivalent to “all endpoints fully saturated”.
Typical gating strategy:
- Open admission when per-DC minimal alive coverage exists.
- Continue background saturation for multi-endpoint DCs.
This keeps startup latency low while preserving eventual full capacity.
## Interactions Between Concepts
- `Generation` defines pool epochs.
- `Trio-State` defines per-writer role inside/around those epochs.
- `Adaptive Floor` defines how much capacity should be maintained right now.
- `Refill` is the actuator that closes the gap between desired and current capacity.
- `Registry` keeps per-session routing correctness while all of the above changes over time.
## Architectural Approach
### Layered Design
The runtime is intentionally split into two planes:
- `Control Plane`: decides desired topology and policy (`floor`, `generation swap`, `refill`, `fallback`).
- `Data Plane`: executes packet/session transport (`reader`, `writer`, routing, acks, close propagation).
Architectural rule:
- Control Plane may change writer inventory and policy.
- Data Plane must remain stable and low-latency while those changes happen.
### Ownership Model
Ownership is centered around explicit state domains:
- `MePool` owns writer lifecycle and policy state.
- `Registry` owns per-connection routing bindings.
- `Writer task` owns outbound ME socket send progression.
- `Reader task` owns inbound ME socket parsing and event dispatch.
This prevents accidental cross-layer mutation and keeps invariants local.
### Control Plane Responsibilities
Control Plane is event-driven and policy-driven:
- Startup initialization and readiness gates.
- Runtime reinit (periodic or config-triggered).
- Coverage checks per DC/family/endpoint group.
- Floor enforcement (static/adaptive).
- Refill scheduling and retry orchestration.
- Generation transition (`warm -> active`, previous `active -> draining`).
Control Plane must prioritize determinism over short-term aggressiveness.
### Data Plane Responsibilities
Data Plane is throughput-first and allocation-sensitive:
- Session bind to writer.
- Per-frame parsing/validation and dispatch.
- Ack and close signal propagation.
- Route drop behavior under missing connection or closed channel.
- Minimal critical logging in hot path.
Data Plane should avoid waiting on operations that are not strictly required for frame correctness.
## Concurrency and Synchronization
### Concurrency Principles
- Per-writer isolation: each writer has independent send/read task loops.
- Per-connection isolation: client channel state is scoped by `conn_id`.
- Asynchronous recovery: refill/reconnect runs outside the packet hot path.
### Synchronization Strategy
- Shared maps use fine-grained, short-lived locking.
- Read-mostly paths avoid broad write-lock windows.
- Backpressure decisions are localized at route/channel boundary.
Design target:
- A slow consumer should degrade only itself (or its route), not global writer progress.
### Cancellation and Shutdown
Writer and reader loops are cancellation-aware:
- explicit cancel token / close command support;
- safe unbind and cleanup via registry;
- deterministic order: stop admission -> drain/close -> release resources.
## Consistency Model
### Session Consistency
For one `conn_id`:
- exactly one active route target at a time;
- close and unbind must be idempotent;
- writer loss must not leave dangling bindings.
### Generation Consistency
Generational consistency guarantees:
- New generation is not promoted before minimum coverage gate.
- Previous generation remains available in `draining` state during handover.
- Forced retirement is policy-bound (`drain ttl`, optional force-close), not immediate.
### Policy Consistency
Policy changes (`adaptive/static floor`, fallback mode, retries) should apply without violating established active-session routing invariants.
## Backpressure and Flow Control
### Route-Level Backpressure
Route channels are bounded by design.
When pressure increases:
- short burst absorption is allowed;
- prolonged congestion triggers controlled drop semantics;
- drop accounting is explicit via metrics/counters.
### Reader Non-Blocking Priority
Inbound ME reader path should never be serialized behind one congested client route.
Practical implication:
- prefer non-blocking route attempt in the parser loop;
- move heavy recovery to async side paths.
## Failure Domain Strategy
### Endpoint-Level Failure
Failure of one endpoint should trigger endpoint-scoped recovery first:
- same endpoint reconnect;
- endpoint replacement within same DC group if applicable.
### DC-Level Degradation
If a DC group cannot satisfy floor:
- keep service via remaining coverage if policy allows;
- continue asynchronous refill saturation in background.
### Whole-Pool Readiness Loss
If no sufficient ME coverage exists:
- admission gate can hold new accepts (conditional policy);
- existing sessions should continue when their path remains healthy.
## Performance Architecture Notes
### Hotpath Discipline
Allowed in hotpath:
- fixed-size parsing and cheap validation;
- bounded channel operations;
- precomputed or low-allocation access patterns.
Avoid in hotpath:
- repeated expensive decoding;
- broad locks with awaits inside critical sections;
- verbose high-frequency logging.
### Throughput Stability Over Peak Spikes
Architecture prefers stable throughput and predictable latency over short peak gains that increase churn or long-tail reconnect times.
## Evolution and Extension Rules
To evolve this model safely:
- Add new policy knobs in Control Plane first.
- Keep Data Plane contracts stable (`conn_id`, route semantics, close semantics).
- Validate generation and registry invariants before enabling by default.
- Introduce new retry/recovery strategies behind explicit config.
## Failure and Recovery Notes
- Single-endpoint DC failure is a normal degraded mode case; policy should prioritize fast reconnect and optional shadow/probing strategies.
- Idle close by peer should be treated as expected when upstream enforces idle timeout.
- Reconnect backoff must protect against synchronized churn while still allowing fast first retries.
- Fallback (`ME -> direct DC`) is a policy switch, not a transport bug by itself.
## Terminology Summary
- `Coverage`: enough live writers to satisfy per-DC acceptance policy.
- `Floor`: target minimum writer count policy.
- `Churn`: frequent writer reconnect/remove cycles.
- `Hotpath`: per-packet/per-connection data path where extra waits/allocations are expensive.

285
docs/model/MODEL.ru.md Normal file
View File

@@ -0,0 +1,285 @@
# Runtime-модель Telemt
## Область описания
Документ фиксирует ключевые runtime-понятия пайплайна Middle-End (ME) и оркестрации вокруг него.
Фокус:
- `ME Pool / Reader / Writer / Refill / Registry`
- `Adaptive Floor`
- `Trio-State`
- `Generation Lifecycle`
## Базовые сущности
### ME Pool
`ME Pool` — центральный оркестратор всех Middle-End writer-ов.
Зона ответственности:
- хранит инвентарь writer-ов по DC/family/endpoint;
- управляет выбором writer-а и маршрутизацией;
- ведёт состояние поколений (`active`, `warm`, `draining` контекст);
- применяет runtime-политики (floor, refill, reconnect, reinit, fallback);
- отдаёт сигналы готовности для admission-логики (conditional accept/cast).
Что не делает:
- не декодирует клиентский протокол;
- не реализует бизнес-политику пользователя (квоты/лимиты).
### ME Writer
`ME Writer` — долгоживущий ME RPC-канал к конкретному endpoint (`ip:port`), у которого есть:
- канал команд на отправку;
- связанный reader loop для входящего потока;
- флаги состояния/деградации;
- метаданные contour/state и generation.
Writer — это фактический data-plane носитель клиентских сессий после бинда.
### ME Reader
`ME Reader` — входной parser/dispatcher одного writer-а:
- читает и расшифровывает ME RPC-фреймы;
- проверяет sequence/checksum;
- маршрутизирует payload в client-каналы через `Registry`;
- обрабатывает close/ack/data и обновляет телеметрию.
Инженерный принцип:
- Reader должен оставаться неблокирующим.
- Backpressure одной клиентской сессии не должен останавливать весь поток writer-а.
### Refill
`Refill` — механизм восстановления покрытия writer-ов при просадке:
- восстановление на том же endpoint в первую очередь;
- восстановление по DC до требуемого floor;
- опциональные outage/shadow-режимы для хрупких single-endpoint DC.
Refill работает асинхронно и не должен блокировать hotpath.
### Registry
`Registry` — маршрутизационный индекс между ME и клиентскими сессиями:
- `conn_id -> канал ответа клиенту`;
- map биндов `conn_id <-> writer_id`;
- снимки активности writer-ов и idle-трекинг.
Ключевые инварианты:
- один `conn_id` маршрутизируется максимум в один активный канал ответа;
- потеря writer-а приводит к безопасному unbind/cleanup и отправке close;
- именно `Registry` является источником истины по активным ME-биндам.
## Adaptive Floor
### Что это
`Adaptive Floor` — runtime-политика, которая динамически меняет целевое число writer-ов на DC в зависимости от активности, а не держит всегда фиксированный статический floor.
### Зачем
Цели:
- уменьшить churn на idle-трафике;
- сохранить достаточную прогретую ёмкость для быстрых всплесков;
- снизить лишние reconnect-штормы на нестабильных endpoint.
### Модель поведения
- при активности floor стремится к статическому требованию;
- при длительном idle floor может снижаться до безопасного минимума;
- grace/recovery окна не дают системе "флапать" слишком резко.
### Ограничения безопасности
- нельзя нарушать минимальный floor выживаемости DC-группы;
- refill обязан быстро нарастить покрытие по запросу;
- адаптация не должна принудительно ронять уже привязанные healthy-сессии.
## Trio-State
`Trio-State` — контурная роль writer-а:
- `Warm`
- `Active`
- `Draining`
### Семантика состояний
- `Warm`: writer подключён и валиден, но не основной для новых биндов.
- `Active`: приоритетный для новых биндов и обычного трафика.
- `Draining`: новые обычные бинды не назначаются; текущие сессии живут до правил graceful-вывода.
### Логика переходов
- `Warm -> Active`: когда достигнуты условия покрытия/готовности.
- `Active -> Draining`: при swap поколения, замене endpoint или контролируемом выводе.
- `Draining -> removed`: после drain TTL/force-close политики (или естественного опустошения).
Такое разделение снижает SPOF-риски и делает cutover предсказуемым.
## Generation Lifecycle
Generation изолирует эпохи пула при reinit/reconfiguration.
### Фазы жизненного цикла
1. `Bootstrap`: поднимается начальный набор writer-ов.
2. `Warmup`: создаётся и валидируется новое поколение.
3. `Activation`: новое поколение становится active после прохождения coverage-gate.
4. `Drain`: предыдущее поколение переводится в draining, текущим сессиям дают завершиться.
5. `Retire`: старое поколение удаляется по graceful-правилам.
### Операционные гарантии
- нельзя активировать поколение частично без минимального покрытия;
- healthy-клиенты не должны теряться только из-за появления нового поколения;
- draining-поколение служит буфером для in-flight трафика во время swap.
### Готовность и приём клиентов
Готовность пула не равна "все endpoint полностью насыщены".
Типичная стратегия:
- открыть admission при минимально достаточном alive-покрытии по DC;
- параллельно продолжать saturation для multi-endpoint DC.
Это уменьшает startup latency и сохраняет выход на полную ёмкость.
## Как понятия связаны между собой
- `Generation` задаёт эпохи пула.
- `Trio-State` задаёт роль каждого writer-а внутри/между эпохами.
- `Adaptive Floor` задаёт, сколько ёмкости нужно сейчас.
- `Refill` — исполнитель, который закрывает разницу между desired и current capacity.
- `Registry` гарантирует корректную маршрутизацию сессий, пока всё выше меняется.
## Архитектурный подход
### Слоистая модель
Runtime специально разделён на две плоскости:
- `Control Plane`: принимает решения о целевой топологии и политиках (`floor`, `generation swap`, `refill`, `fallback`).
- `Data Plane`: исполняет транспорт сессий и пакетов (`reader`, `writer`, маршрутизация, ack, close).
Ключевое правило:
- Control Plane может менять состав writer-ов и policy.
- Data Plane должен оставаться стабильным и низколатентным в момент этих изменений.
### Модель владения состоянием
Владение разделено по доменам:
- `MePool` владеет жизненным циклом writer-ов и policy-state.
- `Registry` владеет routing-биндами клиентских сессий.
- `Writer task` владеет исходящей прогрессией ME-сокета.
- `Reader task` владеет входящим парсингом и dispatch-событиями.
Это ограничивает побочные мутации и локализует инварианты.
### Обязанности Control Plane
Control Plane работает событийно и policy-ориентированно:
- стартовая инициализация и readiness-gate;
- runtime reinit (периодический и/или по изменению конфигурации);
- проверки покрытия по DC/family/endpoint group;
- применение floor-политики (static/adaptive);
- планирование refill и orchestration retry;
- переходы поколений (`warm -> active`, прежний `active -> draining`).
Для него важнее детерминизм, чем агрессивная краткосрочная реакция.
### Обязанности Data Plane
Data Plane ориентирован на пропускную способность и предсказуемую задержку:
- bind клиентской сессии к writer-у;
- per-frame parsing/validation/dispatch;
- распространение ack/close;
- корректная реакция на missing conn/closed channel;
- минимальный лог-шум в hotpath.
Data Plane не должен ждать операций, не критичных для корректности текущего фрейма.
## Конкурентность и синхронизация
### Принципы конкурентности
- Изоляция по writer-у: у каждого writer-а независимые send/read loop.
- Изоляция по сессии: состояние канала локально для `conn_id`.
- Асинхронное восстановление: refill/reconnect выполняются вне пакетного hotpath.
### Стратегия синхронизации
- Для shared map используются короткие и узкие lock-секции.
- Read-heavy пути избегают длительных write-lock окон.
- Решения по backpressure локализованы на границе route/channel.
Цель:
- медленный consumer должен деградировать локально, не останавливая глобальный прогресс writer-а.
### Cancellation и shutdown
Reader/Writer loop должны быть cancellation-aware:
- явные cancel token / close command;
- безопасный unbind/cleanup через registry;
- детерминированный порядок: stop admission -> drain/close -> release resources.
## Модель согласованности
### Согласованность сессии
Для одного `conn_id`:
- одновременно ровно один активный route-target;
- close/unbind операции идемпотентны;
- потеря writer-а не оставляет dangling-бинды.
### Согласованность поколения
Гарантии generation:
- новое поколение не активируется до прохождения минимального coverage-gate;
- предыдущее поколение остаётся в `draining` на время handover;
- принудительный вывод writer-ов ограничен policy (`drain ttl`, optional force-close), а не мгновенный.
### Согласованность политик
Изменение policy (`adaptive/static floor`, fallback mode, retries) не должно ломать инварианты маршрутизации уже активных сессий.
## Backpressure и управление потоком
### Route-level backpressure
Route-каналы намеренно bounded.
При росте нагрузки:
- кратковременный burst поглощается;
- длительная перегрузка переходит в контролируемую drop-семантику;
- все drop-сценарии должны быть прозрачно видны в метриках.
### Приоритет неблокирующего Reader
Входящий ME-reader path не должен сериализоваться из-за одной перегруженной клиентской сессии.
Практически это означает:
- использовать неблокирующую попытку route в parser loop;
- выносить тяжёлое восстановление в асинхронные side-path.
## Стратегия доменов отказа
### Отказ отдельного endpoint
Сначала применяется endpoint-local recovery:
- reconnect в тот же endpoint;
- затем замена endpoint внутри той же DC-группы (если доступно).
### Деградация уровня DC
Если DC-группа не набирает floor:
- сервис сохраняется на остаточном покрытии (если policy разрешает);
- saturation refill продолжается асинхронно в фоне.
### Потеря готовности всего пула
Если достаточного ME-покрытия нет:
- admission gate может временно закрыть приём новых подключений (conditional policy);
- уже активные сессии продолжают работать, пока их маршрут остаётся healthy.
## Архитектурные заметки по производительности
### Дисциплина hotpath
Допустимо в hotpath:
- фиксированный и дешёвый parsing/validation;
- bounded channel operations;
- precomputed/low-allocation доступ к данным.
Нежелательно в hotpath:
- повторные дорогие decode;
- широкие lock-секции с `await` внутри;
- высокочастотный подробный logging.
### Стабильность важнее пиков
Архитектура приоритетно выбирает стабильную пропускную способность и предсказуемую latency, а не краткосрочные пики ценой churn и long-tail reconnect.
## Правила эволюции модели
Чтобы расширять модель безопасно:
- новые policy knobs сначала внедрять в Control Plane;
- контракты Data Plane (`conn_id`, route/close семантика) держать стабильными;
- перед дефолтным включением проверять generation/registry инварианты;
- новые recovery/retry стратегии вводить через явный config-флаг.
## Нюансы отказов и восстановления
- падение single-endpoint DC — штатный деградированный сценарий; приоритет: быстрый reconnect и, при необходимости, shadow/probing;
- idle-close со стороны peer должен считаться нормальным событием при upstream idle-timeout;
- backoff reconnect-логики должен ограничивать синхронный churn, но сохранять быстрые первые попытки;
- fallback (`ME -> direct DC`) — это переключаемая policy-ветка, а не автоматический признак бага транспорта.
## Краткий словарь
- `Coverage`: достаточное число живых writer-ов для политики приёма по DC.
- `Floor`: целевая минимальная ёмкость writer-ов.
- `Churn`: частые циклы reconnect/remove writer-ов.
- `Hotpath`: пер-пакетный/пер-коннектный путь, где любые лишние ожидания и аллокации особенно дороги.

View File

@@ -2,7 +2,8 @@ use std::convert::Infallible;
use std::net::{IpAddr, SocketAddr}; use std::net::{IpAddr, SocketAddr};
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};
use http_body_util::{BodyExt, Full}; use http_body_util::{BodyExt, Full};
use hyper::body::{Bytes, Incoming}; use hyper::body::{Bytes, Incoming};
@@ -25,6 +26,7 @@ use crate::transport::UpstreamManager;
mod config_store; mod config_store;
mod model; mod model;
mod runtime_stats; mod runtime_stats;
mod runtime_zero;
mod users; mod users;
use config_store::{current_revision, parse_if_match}; use config_store::{current_revision, parse_if_match};
@@ -36,8 +38,19 @@ use runtime_stats::{
MinimalCacheEntry, build_dcs_data, build_me_writers_data, build_minimal_all_data, MinimalCacheEntry, build_dcs_data, build_me_writers_data, build_minimal_all_data,
build_upstreams_data, build_zero_all_data, build_upstreams_data, build_zero_all_data,
}; };
use runtime_zero::{
build_limits_effective_data, build_runtime_gates_data, build_security_posture_data,
build_system_info_data,
};
use users::{create_user, delete_user, patch_user, rotate_secret, users_from_config}; use users::{create_user, delete_user, patch_user, rotate_secret, users_from_config};
pub(super) struct ApiRuntimeState {
pub(super) process_started_at_epoch_secs: u64,
pub(super) config_reload_count: AtomicU64,
pub(super) last_config_reload_epoch_secs: AtomicU64,
pub(super) admission_open: AtomicBool,
}
#[derive(Clone)] #[derive(Clone)]
pub(super) struct ApiShared { pub(super) struct ApiShared {
pub(super) stats: Arc<Stats>, pub(super) stats: Arc<Stats>,
@@ -50,6 +63,7 @@ pub(super) struct ApiShared {
pub(super) mutation_lock: Arc<Mutex<()>>, pub(super) mutation_lock: Arc<Mutex<()>>,
pub(super) minimal_cache: Arc<Mutex<Option<MinimalCacheEntry>>>, pub(super) minimal_cache: Arc<Mutex<Option<MinimalCacheEntry>>>,
pub(super) request_id: Arc<AtomicU64>, pub(super) request_id: Arc<AtomicU64>,
pub(super) runtime_state: Arc<ApiRuntimeState>,
} }
impl ApiShared { impl ApiShared {
@@ -65,9 +79,11 @@ pub async fn serve(
me_pool: Option<Arc<MePool>>, me_pool: Option<Arc<MePool>>,
upstream_manager: Arc<UpstreamManager>, upstream_manager: Arc<UpstreamManager>,
config_rx: watch::Receiver<Arc<ProxyConfig>>, config_rx: watch::Receiver<Arc<ProxyConfig>>,
admission_rx: watch::Receiver<bool>,
config_path: PathBuf, config_path: PathBuf,
startup_detected_ip_v4: Option<IpAddr>, startup_detected_ip_v4: Option<IpAddr>,
startup_detected_ip_v6: Option<IpAddr>, startup_detected_ip_v6: Option<IpAddr>,
process_started_at_epoch_secs: u64,
) { ) {
let listener = match TcpListener::bind(listen).await { let listener = match TcpListener::bind(listen).await {
Ok(listener) => listener, Ok(listener) => listener,
@@ -83,6 +99,13 @@ pub async fn serve(
info!("API endpoint: http://{}/v1/*", listen); info!("API endpoint: http://{}/v1/*", listen);
let runtime_state = Arc::new(ApiRuntimeState {
process_started_at_epoch_secs,
config_reload_count: AtomicU64::new(0),
last_config_reload_epoch_secs: AtomicU64::new(0),
admission_open: AtomicBool::new(*admission_rx.borrow()),
});
let shared = Arc::new(ApiShared { let shared = Arc::new(ApiShared {
stats, stats,
ip_tracker, ip_tracker,
@@ -94,6 +117,38 @@ pub async fn serve(
mutation_lock: Arc::new(Mutex::new(())), mutation_lock: Arc::new(Mutex::new(())),
minimal_cache: Arc::new(Mutex::new(None)), minimal_cache: Arc::new(Mutex::new(None)),
request_id: Arc::new(AtomicU64::new(1)), request_id: Arc::new(AtomicU64::new(1)),
runtime_state: runtime_state.clone(),
});
let mut config_rx_reload = config_rx.clone();
let runtime_state_reload = runtime_state.clone();
tokio::spawn(async move {
loop {
if config_rx_reload.changed().await.is_err() {
break;
}
runtime_state_reload
.config_reload_count
.fetch_add(1, Ordering::Relaxed);
runtime_state_reload
.last_config_reload_epoch_secs
.store(now_epoch_secs(), Ordering::Relaxed);
}
});
let mut admission_rx_watch = admission_rx.clone();
tokio::spawn(async move {
runtime_state
.admission_open
.store(*admission_rx_watch.borrow(), Ordering::Relaxed);
loop {
if admission_rx_watch.changed().await.is_err() {
break;
}
runtime_state
.admission_open
.store(*admission_rx_watch.borrow(), Ordering::Relaxed);
}
}); });
loop { loop {
@@ -189,6 +244,26 @@ async fn handle(
}; };
Ok(success_response(StatusCode::OK, data, revision)) Ok(success_response(StatusCode::OK, data, revision))
} }
("GET", "/v1/system/info") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_system_info_data(shared.as_ref(), cfg.as_ref(), &revision);
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/runtime/gates") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_runtime_gates_data(shared.as_ref(), cfg.as_ref());
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/limits/effective") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_limits_effective_data(cfg.as_ref());
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/security/posture") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_security_posture_data(cfg.as_ref());
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/summary") => { ("GET", "/v1/stats/summary") => {
let revision = current_revision(&shared.config_path).await?; let revision = current_revision(&shared.config_path).await?;
let data = SummaryData { let data = SummaryData {
@@ -441,3 +516,10 @@ async fn read_body_with_limit(body: Incoming, limit: usize) -> Result<Vec<u8>, A
} }
Ok(collected) Ok(collected)
} }
fn now_epoch_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}

View File

@@ -1,3 +1,5 @@
use std::net::IpAddr;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use hyper::StatusCode; use hyper::StatusCode;
use rand::Rng; use rand::Rng;
@@ -369,6 +371,9 @@ pub(super) struct UserInfo {
pub(super) max_unique_ips: Option<usize>, pub(super) max_unique_ips: Option<usize>,
pub(super) current_connections: u64, pub(super) current_connections: u64,
pub(super) active_unique_ips: usize, pub(super) active_unique_ips: usize,
pub(super) active_unique_ips_list: Vec<IpAddr>,
pub(super) recent_unique_ips: usize,
pub(super) recent_unique_ips_list: Vec<IpAddr>,
pub(super) total_octets: u64, pub(super) total_octets: u64,
pub(super) links: UserLinks, pub(super) links: UserLinks,
} }

227
src/api/runtime_zero.rs Normal file
View File

@@ -0,0 +1,227 @@
use std::sync::atomic::Ordering;
use serde::Serialize;
use crate::config::{MeFloorMode, ProxyConfig, UserMaxUniqueIpsMode};
use super::ApiShared;
#[derive(Serialize)]
pub(super) struct SystemInfoData {
pub(super) version: String,
pub(super) target_arch: String,
pub(super) target_os: String,
pub(super) build_profile: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) git_commit: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) build_time_utc: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) rustc_version: Option<String>,
pub(super) process_started_at_epoch_secs: u64,
pub(super) uptime_seconds: f64,
pub(super) config_path: String,
pub(super) config_hash: String,
pub(super) config_reload_count: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) last_config_reload_epoch_secs: Option<u64>,
}
#[derive(Serialize)]
pub(super) struct RuntimeGatesData {
pub(super) accepting_new_connections: bool,
pub(super) conditional_cast_enabled: bool,
pub(super) me_runtime_ready: bool,
pub(super) me2dc_fallback_enabled: bool,
pub(super) use_middle_proxy: bool,
}
#[derive(Serialize)]
pub(super) struct EffectiveTimeoutLimits {
pub(super) client_handshake_secs: u64,
pub(super) tg_connect_secs: u64,
pub(super) client_keepalive_secs: u64,
pub(super) client_ack_secs: u64,
pub(super) me_one_retry: u8,
pub(super) me_one_timeout_ms: u64,
}
#[derive(Serialize)]
pub(super) struct EffectiveUpstreamLimits {
pub(super) connect_retry_attempts: u32,
pub(super) connect_retry_backoff_ms: u64,
pub(super) connect_budget_ms: u64,
pub(super) unhealthy_fail_threshold: u32,
pub(super) connect_failfast_hard_errors: bool,
}
#[derive(Serialize)]
pub(super) struct EffectiveMiddleProxyLimits {
pub(super) floor_mode: &'static str,
pub(super) adaptive_floor_idle_secs: u64,
pub(super) adaptive_floor_min_writers_single_endpoint: u8,
pub(super) adaptive_floor_recover_grace_secs: u64,
pub(super) reconnect_max_concurrent_per_dc: u32,
pub(super) reconnect_backoff_base_ms: u64,
pub(super) reconnect_backoff_cap_ms: u64,
pub(super) reconnect_fast_retry_count: u32,
pub(super) me2dc_fallback: bool,
}
#[derive(Serialize)]
pub(super) struct EffectiveUserIpPolicyLimits {
pub(super) mode: &'static str,
pub(super) window_secs: u64,
}
#[derive(Serialize)]
pub(super) struct EffectiveLimitsData {
pub(super) update_every_secs: u64,
pub(super) me_reinit_every_secs: u64,
pub(super) me_pool_force_close_secs: u64,
pub(super) timeouts: EffectiveTimeoutLimits,
pub(super) upstream: EffectiveUpstreamLimits,
pub(super) middle_proxy: EffectiveMiddleProxyLimits,
pub(super) user_ip_policy: EffectiveUserIpPolicyLimits,
}
#[derive(Serialize)]
pub(super) struct SecurityPostureData {
pub(super) api_read_only: bool,
pub(super) api_whitelist_enabled: bool,
pub(super) api_whitelist_entries: usize,
pub(super) api_auth_header_enabled: bool,
pub(super) proxy_protocol_enabled: bool,
pub(super) log_level: String,
pub(super) telemetry_core_enabled: bool,
pub(super) telemetry_user_enabled: bool,
pub(super) telemetry_me_level: String,
}
pub(super) fn build_system_info_data(
shared: &ApiShared,
_cfg: &ProxyConfig,
revision: &str,
) -> SystemInfoData {
let last_reload_epoch_secs = shared
.runtime_state
.last_config_reload_epoch_secs
.load(Ordering::Relaxed);
let last_config_reload_epoch_secs = (last_reload_epoch_secs > 0).then_some(last_reload_epoch_secs);
let git_commit = option_env!("TELEMT_GIT_COMMIT")
.or(option_env!("VERGEN_GIT_SHA"))
.or(option_env!("GIT_COMMIT"))
.map(ToString::to_string);
let build_time_utc = option_env!("BUILD_TIME_UTC")
.or(option_env!("VERGEN_BUILD_TIMESTAMP"))
.map(ToString::to_string);
let rustc_version = option_env!("RUSTC_VERSION")
.or(option_env!("VERGEN_RUSTC_SEMVER"))
.map(ToString::to_string);
SystemInfoData {
version: env!("CARGO_PKG_VERSION").to_string(),
target_arch: std::env::consts::ARCH.to_string(),
target_os: std::env::consts::OS.to_string(),
build_profile: option_env!("PROFILE").unwrap_or("unknown").to_string(),
git_commit,
build_time_utc,
rustc_version,
process_started_at_epoch_secs: shared.runtime_state.process_started_at_epoch_secs,
uptime_seconds: shared.stats.uptime_secs(),
config_path: shared.config_path.display().to_string(),
config_hash: revision.to_string(),
config_reload_count: shared.runtime_state.config_reload_count.load(Ordering::Relaxed),
last_config_reload_epoch_secs,
}
}
pub(super) fn build_runtime_gates_data(shared: &ApiShared, cfg: &ProxyConfig) -> RuntimeGatesData {
let me_runtime_ready = if !cfg.general.use_middle_proxy {
true
} else {
shared
.me_pool
.as_ref()
.map(|pool| pool.is_runtime_ready())
.unwrap_or(false)
};
RuntimeGatesData {
accepting_new_connections: shared.runtime_state.admission_open.load(Ordering::Relaxed),
conditional_cast_enabled: cfg.general.use_middle_proxy,
me_runtime_ready,
me2dc_fallback_enabled: cfg.general.me2dc_fallback,
use_middle_proxy: cfg.general.use_middle_proxy,
}
}
pub(super) fn build_limits_effective_data(cfg: &ProxyConfig) -> EffectiveLimitsData {
EffectiveLimitsData {
update_every_secs: cfg.general.effective_update_every_secs(),
me_reinit_every_secs: cfg.general.effective_me_reinit_every_secs(),
me_pool_force_close_secs: cfg.general.effective_me_pool_force_close_secs(),
timeouts: EffectiveTimeoutLimits {
client_handshake_secs: cfg.timeouts.client_handshake,
tg_connect_secs: cfg.timeouts.tg_connect,
client_keepalive_secs: cfg.timeouts.client_keepalive,
client_ack_secs: cfg.timeouts.client_ack,
me_one_retry: cfg.timeouts.me_one_retry,
me_one_timeout_ms: cfg.timeouts.me_one_timeout_ms,
},
upstream: EffectiveUpstreamLimits {
connect_retry_attempts: cfg.general.upstream_connect_retry_attempts,
connect_retry_backoff_ms: cfg.general.upstream_connect_retry_backoff_ms,
connect_budget_ms: cfg.general.upstream_connect_budget_ms,
unhealthy_fail_threshold: cfg.general.upstream_unhealthy_fail_threshold,
connect_failfast_hard_errors: cfg.general.upstream_connect_failfast_hard_errors,
},
middle_proxy: EffectiveMiddleProxyLimits {
floor_mode: me_floor_mode_label(cfg.general.me_floor_mode),
adaptive_floor_idle_secs: cfg.general.me_adaptive_floor_idle_secs,
adaptive_floor_min_writers_single_endpoint: cfg
.general
.me_adaptive_floor_min_writers_single_endpoint,
adaptive_floor_recover_grace_secs: cfg.general.me_adaptive_floor_recover_grace_secs,
reconnect_max_concurrent_per_dc: cfg.general.me_reconnect_max_concurrent_per_dc,
reconnect_backoff_base_ms: cfg.general.me_reconnect_backoff_base_ms,
reconnect_backoff_cap_ms: cfg.general.me_reconnect_backoff_cap_ms,
reconnect_fast_retry_count: cfg.general.me_reconnect_fast_retry_count,
me2dc_fallback: cfg.general.me2dc_fallback,
},
user_ip_policy: EffectiveUserIpPolicyLimits {
mode: user_max_unique_ips_mode_label(cfg.access.user_max_unique_ips_mode),
window_secs: cfg.access.user_max_unique_ips_window_secs,
},
}
}
pub(super) fn build_security_posture_data(cfg: &ProxyConfig) -> SecurityPostureData {
SecurityPostureData {
api_read_only: cfg.server.api.read_only,
api_whitelist_enabled: !cfg.server.api.whitelist.is_empty(),
api_whitelist_entries: cfg.server.api.whitelist.len(),
api_auth_header_enabled: !cfg.server.api.auth_header.is_empty(),
proxy_protocol_enabled: cfg.server.proxy_protocol,
log_level: cfg.general.log_level.to_string(),
telemetry_core_enabled: cfg.general.telemetry.core_enabled,
telemetry_user_enabled: cfg.general.telemetry.user_enabled,
telemetry_me_level: cfg.general.telemetry.me_level.to_string(),
}
}
fn user_max_unique_ips_mode_label(mode: UserMaxUniqueIpsMode) -> &'static str {
match mode {
UserMaxUniqueIpsMode::ActiveWindow => "active_window",
UserMaxUniqueIpsMode::TimeWindow => "time_window",
UserMaxUniqueIpsMode::Combined => "combined",
}
}
fn me_floor_mode_label(mode: MeFloorMode) -> &'static str {
match mode {
MeFloorMode::Static => "static",
MeFloorMode::Adaptive => "adaptive",
}
}

View File

@@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::net::IpAddr; use std::net::IpAddr;
use hyper::StatusCode; use hyper::StatusCode;
@@ -112,6 +111,9 @@ pub(super) async fn create_user(
max_unique_ips: updated_limit, max_unique_ips: updated_limit,
current_connections: 0, current_connections: 0,
active_unique_ips: 0, active_unique_ips: 0,
active_unique_ips_list: Vec::new(),
recent_unique_ips: 0,
recent_unique_ips_list: Vec::new(),
total_octets: 0, total_octets: 0,
links: build_user_links( links: build_user_links(
&cfg, &cfg,
@@ -300,18 +302,21 @@ pub(super) async fn users_from_config(
startup_detected_ip_v4: Option<IpAddr>, startup_detected_ip_v4: Option<IpAddr>,
startup_detected_ip_v6: Option<IpAddr>, startup_detected_ip_v6: Option<IpAddr>,
) -> Vec<UserInfo> { ) -> Vec<UserInfo> {
let ip_counts = ip_tracker
.get_stats()
.await
.into_iter()
.map(|(user, count, _)| (user, count))
.collect::<HashMap<_, _>>();
let mut names = cfg.access.users.keys().cloned().collect::<Vec<_>>(); let mut names = cfg.access.users.keys().cloned().collect::<Vec<_>>();
names.sort(); names.sort();
let active_ip_lists = ip_tracker.get_active_ips_for_users(&names).await;
let recent_ip_lists = ip_tracker.get_recent_ips_for_users(&names).await;
let mut users = Vec::with_capacity(names.len()); let mut users = Vec::with_capacity(names.len());
for username in names { for username in names {
let active_ip_list = active_ip_lists
.get(&username)
.cloned()
.unwrap_or_else(Vec::new);
let recent_ip_list = recent_ip_lists
.get(&username)
.cloned()
.unwrap_or_else(Vec::new);
let links = cfg let links = cfg
.access .access
.users .users
@@ -340,7 +345,10 @@ pub(super) async fn users_from_config(
data_quota_bytes: cfg.access.user_data_quota.get(&username).copied(), data_quota_bytes: cfg.access.user_data_quota.get(&username).copied(),
max_unique_ips: cfg.access.user_max_unique_ips.get(&username).copied(), max_unique_ips: cfg.access.user_max_unique_ips.get(&username).copied(),
current_connections: stats.get_user_curr_connects(&username), current_connections: stats.get_user_curr_connects(&username),
active_unique_ips: ip_counts.get(&username).copied().unwrap_or(0), active_unique_ips: active_ip_list.len(),
active_unique_ips_list: active_ip_list,
recent_unique_ips: recent_ip_list.len(),
recent_unique_ips_list: recent_ip_list,
total_octets: stats.get_user_total_octets(&username), total_octets: stats.get_user_total_octets(&username),
links, links,
username, username,

View File

@@ -15,6 +15,7 @@ const DEFAULT_ME_ADAPTIVE_FLOOR_RECOVER_GRACE_SECS: u64 = 180;
const DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS: u64 = 30; const DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS: u64 = 30;
const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 2; const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 2;
const DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD: u32 = 5; const DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD: u32 = 5;
const DEFAULT_UPSTREAM_CONNECT_BUDGET_MS: u64 = 3000;
const DEFAULT_LISTEN_ADDR_IPV6: &str = "::"; const DEFAULT_LISTEN_ADDR_IPV6: &str = "::";
const DEFAULT_ACCESS_USER: &str = "default"; const DEFAULT_ACCESS_USER: &str = "default";
const DEFAULT_ACCESS_SECRET: &str = "00000000000000000000000000000000"; const DEFAULT_ACCESS_SECRET: &str = "00000000000000000000000000000000";
@@ -113,6 +114,10 @@ pub(crate) fn default_api_minimal_runtime_cache_ttl_ms() -> u64 {
1000 1000
} }
pub(crate) fn default_proxy_protocol_header_timeout_ms() -> u64 {
500
}
pub(crate) fn default_prefer_4() -> u8 { pub(crate) fn default_prefer_4() -> u8 {
4 4
} }
@@ -129,6 +134,10 @@ pub(crate) fn default_unknown_dc_log_path() -> Option<String> {
Some("unknown-dc.txt".to_string()) Some("unknown-dc.txt".to_string())
} }
pub(crate) fn default_unknown_dc_file_log_enabled() -> bool {
false
}
pub(crate) fn default_pool_size() -> usize { pub(crate) fn default_pool_size() -> usize {
8 8
} }
@@ -137,6 +146,14 @@ pub(crate) fn default_proxy_secret_path() -> Option<String> {
Some("proxy-secret".to_string()) Some("proxy-secret".to_string())
} }
pub(crate) fn default_proxy_config_v4_cache_path() -> Option<String> {
Some("cache/proxy-config-v4.txt".to_string())
}
pub(crate) fn default_proxy_config_v6_cache_path() -> Option<String> {
Some("cache/proxy-config-v6.txt".to_string())
}
pub(crate) fn default_middle_proxy_nat_stun() -> Option<String> { pub(crate) fn default_middle_proxy_nat_stun() -> Option<String> {
None None
} }
@@ -241,6 +258,10 @@ pub(crate) fn default_upstream_unhealthy_fail_threshold() -> u32 {
DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD
} }
pub(crate) fn default_upstream_connect_budget_ms() -> u64 {
DEFAULT_UPSTREAM_CONNECT_BUDGET_MS
}
pub(crate) fn default_upstream_connect_failfast_hard_errors() -> bool { pub(crate) fn default_upstream_connect_failfast_hard_errors() -> bool {
false false
} }
@@ -273,6 +294,18 @@ pub(crate) fn default_me_route_backpressure_high_watermark_pct() -> u8 {
80 80
} }
pub(crate) fn default_me_route_no_writer_wait_ms() -> u64 {
250
}
pub(crate) fn default_me_route_inline_recovery_attempts() -> u32 {
3
}
pub(crate) fn default_me_route_inline_recovery_wait_ms() -> u64 {
3000
}
pub(crate) fn default_beobachten_minutes() -> u64 { pub(crate) fn default_beobachten_minutes() -> u64 {
10 10
} }

View File

@@ -381,6 +381,22 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
warned = true; warned = true;
warn!("config reload: general.middle_proxy_pool_size changed; restart required"); warn!("config reload: general.middle_proxy_pool_size changed; restart required");
} }
if old.general.me_route_no_writer_mode != new.general.me_route_no_writer_mode
|| old.general.me_route_no_writer_wait_ms != new.general.me_route_no_writer_wait_ms
|| old.general.me_route_inline_recovery_attempts
!= new.general.me_route_inline_recovery_attempts
|| old.general.me_route_inline_recovery_wait_ms
!= new.general.me_route_inline_recovery_wait_ms
{
warned = true;
warn!("config reload: general.me_route_no_writer_* changed; restart required");
}
if old.general.unknown_dc_log_path != new.general.unknown_dc_log_path
|| old.general.unknown_dc_file_log_enabled != new.general.unknown_dc_file_log_enabled
{
warned = true;
warn!("config reload: general.unknown_dc_* changed; restart required");
}
if old.general.me_init_retry_attempts != new.general.me_init_retry_attempts { if old.general.me_init_retry_attempts != new.general.me_init_retry_attempts {
warned = true; warned = true;
warn!("config reload: general.me_init_retry_attempts changed; restart required"); warn!("config reload: general.me_init_retry_attempts changed; restart required");
@@ -389,6 +405,12 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
warned = true; warned = true;
warn!("config reload: general.me2dc_fallback changed; restart required"); warn!("config reload: general.me2dc_fallback changed; restart required");
} }
if old.general.proxy_config_v4_cache_path != new.general.proxy_config_v4_cache_path
|| old.general.proxy_config_v6_cache_path != new.general.proxy_config_v6_cache_path
{
warned = true;
warn!("config reload: general.proxy_config_*_cache_path changed; restart required");
}
if old.general.me_keepalive_enabled != new.general.me_keepalive_enabled if old.general.me_keepalive_enabled != new.general.me_keepalive_enabled
|| old.general.me_keepalive_interval_secs != new.general.me_keepalive_interval_secs || old.general.me_keepalive_interval_secs != new.general.me_keepalive_interval_secs
|| old.general.me_keepalive_jitter_secs != new.general.me_keepalive_jitter_secs || old.general.me_keepalive_jitter_secs != new.general.me_keepalive_jitter_secs

View File

@@ -203,6 +203,22 @@ impl ProxyConfig {
sanitize_ad_tag(&mut config.general.ad_tag); sanitize_ad_tag(&mut config.general.ad_tag);
if let Some(path) = &config.general.proxy_config_v4_cache_path
&& path.trim().is_empty()
{
return Err(ProxyError::Config(
"general.proxy_config_v4_cache_path cannot be empty when provided".to_string(),
));
}
if let Some(path) = &config.general.proxy_config_v6_cache_path
&& path.trim().is_empty()
{
return Err(ProxyError::Config(
"general.proxy_config_v6_cache_path cannot be empty when provided".to_string(),
));
}
if let Some(update_every) = config.general.update_every { if let Some(update_every) = config.general.update_every {
if update_every == 0 { if update_every == 0 {
return Err(ProxyError::Config( return Err(ProxyError::Config(
@@ -249,6 +265,12 @@ impl ProxyConfig {
)); ));
} }
if config.general.upstream_connect_budget_ms == 0 {
return Err(ProxyError::Config(
"general.upstream_connect_budget_ms must be > 0".to_string(),
));
}
if config.general.upstream_unhealthy_fail_threshold == 0 { if config.general.upstream_unhealthy_fail_threshold == 0 {
return Err(ProxyError::Config( return Err(ProxyError::Config(
"general.upstream_unhealthy_fail_threshold must be > 0".to_string(), "general.upstream_unhealthy_fail_threshold must be > 0".to_string(),
@@ -410,6 +432,24 @@ impl ProxyConfig {
)); ));
} }
if !(10..=5000).contains(&config.general.me_route_no_writer_wait_ms) {
return Err(ProxyError::Config(
"general.me_route_no_writer_wait_ms must be within [10, 5000]".to_string(),
));
}
if config.general.me_route_inline_recovery_attempts == 0 {
return Err(ProxyError::Config(
"general.me_route_inline_recovery_attempts must be > 0".to_string(),
));
}
if !(10..=30000).contains(&config.general.me_route_inline_recovery_wait_ms) {
return Err(ProxyError::Config(
"general.me_route_inline_recovery_wait_ms must be within [10, 30000]".to_string(),
));
}
if config.server.api.request_body_limit_bytes == 0 { if config.server.api.request_body_limit_bytes == 0 {
return Err(ProxyError::Config( return Err(ProxyError::Config(
"server.api.request_body_limit_bytes must be > 0".to_string(), "server.api.request_body_limit_bytes must be > 0".to_string(),
@@ -428,6 +468,12 @@ impl ProxyConfig {
)); ));
} }
if config.server.proxy_protocol_header_timeout_ms == 0 {
return Err(ProxyError::Config(
"server.proxy_protocol_header_timeout_ms must be > 0".to_string(),
));
}
if config.general.effective_me_pool_force_close_secs() > 0 if config.general.effective_me_pool_force_close_secs() > 0
&& config.general.effective_me_pool_force_close_secs() && config.general.effective_me_pool_force_close_secs()
< config.general.me_pool_drain_ttl_secs < config.general.me_pool_drain_ttl_secs
@@ -509,10 +555,11 @@ impl ProxyConfig {
warn!("prefer_ipv6 is deprecated, use [network].prefer = 6"); warn!("prefer_ipv6 is deprecated, use [network].prefer = 6");
} }
// Auto-enable NAT probe when Middle Proxy is requested. if config.general.use_middle_proxy && !config.general.me_secret_atomic_snapshot {
if config.general.use_middle_proxy && !config.general.middle_proxy_nat_probe { config.general.me_secret_atomic_snapshot = true;
config.general.middle_proxy_nat_probe = true; warn!(
warn!("Auto-enabled middle_proxy_nat_probe for middle proxy mode"); "Auto-enabled me_secret_atomic_snapshot for middle proxy mode to keep KDF key_selector/secret coherent"
);
} }
validate_network_cfg(&mut config.network)?; validate_network_cfg(&mut config.network)?;
@@ -673,6 +720,14 @@ mod tests {
cfg.general.me2dc_fallback, cfg.general.me2dc_fallback,
default_me2dc_fallback() default_me2dc_fallback()
); );
assert_eq!(
cfg.general.proxy_config_v4_cache_path,
default_proxy_config_v4_cache_path()
);
assert_eq!(
cfg.general.proxy_config_v6_cache_path,
default_proxy_config_v6_cache_path()
);
assert_eq!( assert_eq!(
cfg.general.me_single_endpoint_shadow_writers, cfg.general.me_single_endpoint_shadow_writers,
default_me_single_endpoint_shadow_writers() default_me_single_endpoint_shadow_writers()
@@ -783,6 +838,14 @@ mod tests {
default_me_init_retry_attempts() default_me_init_retry_attempts()
); );
assert_eq!(general.me2dc_fallback, default_me2dc_fallback()); assert_eq!(general.me2dc_fallback, default_me2dc_fallback());
assert_eq!(
general.proxy_config_v4_cache_path,
default_proxy_config_v4_cache_path()
);
assert_eq!(
general.proxy_config_v6_cache_path,
default_proxy_config_v6_cache_path()
);
assert_eq!( assert_eq!(
general.me_single_endpoint_shadow_writers, general.me_single_endpoint_shadow_writers,
default_me_single_endpoint_shadow_writers() default_me_single_endpoint_shadow_writers()
@@ -1206,6 +1269,85 @@ mod tests {
let _ = std::fs::remove_file(path_valid); let _ = std::fs::remove_file(path_valid);
} }
#[test]
fn me_route_no_writer_wait_ms_out_of_range_is_rejected() {
let toml = r#"
[general]
me_route_no_writer_wait_ms = 5
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_route_no_writer_wait_ms_out_of_range_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.me_route_no_writer_wait_ms must be within [10, 5000]"));
let _ = std::fs::remove_file(path);
}
#[test]
fn me_route_no_writer_mode_is_parsed() {
let toml = r#"
[general]
me_route_no_writer_mode = "inline_recovery_legacy"
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_route_no_writer_mode_parse_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert_eq!(
cfg.general.me_route_no_writer_mode,
crate::config::MeRouteNoWriterMode::InlineRecoveryLegacy
);
let _ = std::fs::remove_file(path);
}
#[test]
fn proxy_config_cache_paths_empty_are_rejected() {
let toml = r#"
[general]
proxy_config_v4_cache_path = " "
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_proxy_config_v4_cache_path_empty_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.proxy_config_v4_cache_path cannot be empty"));
let _ = std::fs::remove_file(path);
let toml_v6 = r#"
[general]
proxy_config_v6_cache_path = ""
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let path_v6 = dir.join("telemt_proxy_config_v6_cache_path_empty_test.toml");
std::fs::write(&path_v6, toml_v6).unwrap();
let err_v6 = ProxyConfig::load(&path_v6).unwrap_err().to_string();
assert!(err_v6.contains("general.proxy_config_v6_cache_path cannot be empty"));
let _ = std::fs::remove_file(path_v6);
}
#[test] #[test]
fn me_hardswap_warmup_defaults_are_set() { fn me_hardswap_warmup_defaults_are_set() {
let toml = r#" let toml = r#"

View File

@@ -183,6 +183,35 @@ impl MeFloorMode {
} }
} }
/// Middle-End route behavior when no writer is immediately available.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum MeRouteNoWriterMode {
AsyncRecoveryFailfast,
InlineRecoveryLegacy,
#[default]
HybridAsyncPersistent,
}
impl MeRouteNoWriterMode {
pub fn as_u8(self) -> u8 {
match self {
MeRouteNoWriterMode::AsyncRecoveryFailfast => 0,
MeRouteNoWriterMode::InlineRecoveryLegacy => 1,
MeRouteNoWriterMode::HybridAsyncPersistent => 2,
}
}
pub fn from_u8(raw: u8) -> Self {
match raw {
0 => MeRouteNoWriterMode::AsyncRecoveryFailfast,
1 => MeRouteNoWriterMode::InlineRecoveryLegacy,
2 => MeRouteNoWriterMode::HybridAsyncPersistent,
_ => MeRouteNoWriterMode::HybridAsyncPersistent,
}
}
}
/// Per-user unique source IP limit mode. /// Per-user unique source IP limit mode.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
@@ -318,6 +347,14 @@ pub struct GeneralConfig {
#[serde(default = "default_proxy_secret_path")] #[serde(default = "default_proxy_secret_path")]
pub proxy_secret_path: Option<String>, pub proxy_secret_path: Option<String>,
/// Optional path to cache raw getProxyConfig (IPv4) snapshot for startup fallback.
#[serde(default = "default_proxy_config_v4_cache_path")]
pub proxy_config_v4_cache_path: Option<String>,
/// Optional path to cache raw getProxyConfigV6 snapshot for startup fallback.
#[serde(default = "default_proxy_config_v6_cache_path")]
pub proxy_config_v6_cache_path: Option<String>,
/// Global ad_tag (32 hex chars from @MTProxybot). Fallback when user has no per-user tag in access.user_ad_tags. /// Global ad_tag (32 hex chars from @MTProxybot). Fallback when user has no per-user tag in access.user_ad_tags.
#[serde(default)] #[serde(default)]
pub ad_tag: Option<String>, pub ad_tag: Option<String>,
@@ -495,6 +532,10 @@ pub struct GeneralConfig {
#[serde(default = "default_upstream_connect_retry_backoff_ms")] #[serde(default = "default_upstream_connect_retry_backoff_ms")]
pub upstream_connect_retry_backoff_ms: u64, pub upstream_connect_retry_backoff_ms: u64,
/// Total wall-clock budget in milliseconds for one upstream connect request across retries.
#[serde(default = "default_upstream_connect_budget_ms")]
pub upstream_connect_budget_ms: u64,
/// Consecutive failed requests before upstream is marked unhealthy. /// Consecutive failed requests before upstream is marked unhealthy.
#[serde(default = "default_upstream_unhealthy_fail_threshold")] #[serde(default = "default_upstream_unhealthy_fail_threshold")]
pub upstream_unhealthy_fail_threshold: u32, pub upstream_unhealthy_fail_threshold: u32,
@@ -511,6 +552,10 @@ pub struct GeneralConfig {
#[serde(default = "default_unknown_dc_log_path")] #[serde(default = "default_unknown_dc_log_path")]
pub unknown_dc_log_path: Option<String>, pub unknown_dc_log_path: Option<String>,
/// Enable unknown-DC file logging.
#[serde(default = "default_unknown_dc_file_log_enabled")]
pub unknown_dc_file_log_enabled: bool,
#[serde(default)] #[serde(default)]
pub log_level: LogLevel, pub log_level: LogLevel,
@@ -538,6 +583,22 @@ pub struct GeneralConfig {
#[serde(default = "default_me_route_backpressure_high_watermark_pct")] #[serde(default = "default_me_route_backpressure_high_watermark_pct")]
pub me_route_backpressure_high_watermark_pct: u8, pub me_route_backpressure_high_watermark_pct: u8,
/// ME route behavior when no writer is immediately available.
#[serde(default)]
pub me_route_no_writer_mode: MeRouteNoWriterMode,
/// Maximum wait time in milliseconds for async-recovery failfast mode.
#[serde(default = "default_me_route_no_writer_wait_ms")]
pub me_route_no_writer_wait_ms: u64,
/// Number of inline recovery attempts in legacy mode.
#[serde(default = "default_me_route_inline_recovery_attempts")]
pub me_route_inline_recovery_attempts: u32,
/// Maximum wait time in milliseconds for inline recovery in legacy mode.
#[serde(default = "default_me_route_inline_recovery_wait_ms")]
pub me_route_inline_recovery_wait_ms: u64,
/// [general.links] — proxy link generation overrides. /// [general.links] — proxy link generation overrides.
#[serde(default)] #[serde(default)]
pub links: LinksConfig, pub links: LinksConfig,
@@ -682,6 +743,8 @@ impl Default for GeneralConfig {
use_middle_proxy: default_true(), use_middle_proxy: default_true(),
ad_tag: None, ad_tag: None,
proxy_secret_path: default_proxy_secret_path(), proxy_secret_path: default_proxy_secret_path(),
proxy_config_v4_cache_path: default_proxy_config_v4_cache_path(),
proxy_config_v6_cache_path: default_proxy_config_v6_cache_path(),
middle_proxy_nat_ip: None, middle_proxy_nat_ip: None,
middle_proxy_nat_probe: default_true(), middle_proxy_nat_probe: default_true(),
middle_proxy_nat_stun: default_middle_proxy_nat_stun(), middle_proxy_nat_stun: default_middle_proxy_nat_stun(),
@@ -715,10 +778,12 @@ impl Default for GeneralConfig {
me_adaptive_floor_recover_grace_secs: default_me_adaptive_floor_recover_grace_secs(), me_adaptive_floor_recover_grace_secs: default_me_adaptive_floor_recover_grace_secs(),
upstream_connect_retry_attempts: default_upstream_connect_retry_attempts(), upstream_connect_retry_attempts: default_upstream_connect_retry_attempts(),
upstream_connect_retry_backoff_ms: default_upstream_connect_retry_backoff_ms(), upstream_connect_retry_backoff_ms: default_upstream_connect_retry_backoff_ms(),
upstream_connect_budget_ms: default_upstream_connect_budget_ms(),
upstream_unhealthy_fail_threshold: default_upstream_unhealthy_fail_threshold(), upstream_unhealthy_fail_threshold: default_upstream_unhealthy_fail_threshold(),
upstream_connect_failfast_hard_errors: default_upstream_connect_failfast_hard_errors(), upstream_connect_failfast_hard_errors: default_upstream_connect_failfast_hard_errors(),
stun_iface_mismatch_ignore: false, stun_iface_mismatch_ignore: false,
unknown_dc_log_path: default_unknown_dc_log_path(), unknown_dc_log_path: default_unknown_dc_log_path(),
unknown_dc_file_log_enabled: default_unknown_dc_file_log_enabled(),
log_level: LogLevel::Normal, log_level: LogLevel::Normal,
disable_colors: false, disable_colors: false,
telemetry: TelemetryConfig::default(), telemetry: TelemetryConfig::default(),
@@ -726,6 +791,10 @@ impl Default for GeneralConfig {
me_route_backpressure_base_timeout_ms: default_me_route_backpressure_base_timeout_ms(), me_route_backpressure_base_timeout_ms: default_me_route_backpressure_base_timeout_ms(),
me_route_backpressure_high_timeout_ms: default_me_route_backpressure_high_timeout_ms(), me_route_backpressure_high_timeout_ms: default_me_route_backpressure_high_timeout_ms(),
me_route_backpressure_high_watermark_pct: default_me_route_backpressure_high_watermark_pct(), me_route_backpressure_high_watermark_pct: default_me_route_backpressure_high_watermark_pct(),
me_route_no_writer_mode: MeRouteNoWriterMode::default(),
me_route_no_writer_wait_ms: default_me_route_no_writer_wait_ms(),
me_route_inline_recovery_attempts: default_me_route_inline_recovery_attempts(),
me_route_inline_recovery_wait_ms: default_me_route_inline_recovery_wait_ms(),
links: LinksConfig::default(), links: LinksConfig::default(),
crypto_pending_buffer: default_crypto_pending_buffer(), crypto_pending_buffer: default_crypto_pending_buffer(),
max_client_frame: default_max_client_frame(), max_client_frame: default_max_client_frame(),
@@ -898,6 +967,10 @@ pub struct ServerConfig {
#[serde(default)] #[serde(default)]
pub proxy_protocol: bool, pub proxy_protocol: bool,
/// Timeout in milliseconds for reading and parsing PROXY protocol headers.
#[serde(default = "default_proxy_protocol_header_timeout_ms")]
pub proxy_protocol_header_timeout_ms: u64,
#[serde(default)] #[serde(default)]
pub metrics_port: Option<u16>, pub metrics_port: Option<u16>,
@@ -921,6 +994,7 @@ impl Default for ServerConfig {
listen_unix_sock_perm: None, listen_unix_sock_perm: None,
listen_tcp: None, listen_tcp: None,
proxy_protocol: false, proxy_protocol: false,
proxy_protocol_header_timeout_ms: default_proxy_protocol_header_timeout_ms(),
metrics_port: None, metrics_port: None,
metrics_whitelist: default_metrics_whitelist(), metrics_whitelist: default_metrics_whitelist(),
api: ApiConfig::default(), api: ApiConfig::default(),

View File

@@ -21,6 +21,7 @@ struct SecureRandomInner {
rng: StdRng, rng: StdRng,
cipher: AesCtr, cipher: AesCtr,
buffer: Vec<u8>, buffer: Vec<u8>,
buffer_start: usize,
} }
impl Drop for SecureRandomInner { impl Drop for SecureRandomInner {
@@ -48,6 +49,7 @@ impl SecureRandom {
rng, rng,
cipher, cipher,
buffer: Vec::with_capacity(1024), buffer: Vec::with_capacity(1024),
buffer_start: 0,
}), }),
} }
} }
@@ -59,16 +61,29 @@ impl SecureRandom {
let mut written = 0usize; let mut written = 0usize;
while written < out.len() { while written < out.len() {
if inner.buffer_start >= inner.buffer.len() {
inner.buffer.clear();
inner.buffer_start = 0;
}
if inner.buffer.is_empty() { if inner.buffer.is_empty() {
let mut chunk = vec![0u8; CHUNK_SIZE]; let mut chunk = vec![0u8; CHUNK_SIZE];
inner.rng.fill_bytes(&mut chunk); inner.rng.fill_bytes(&mut chunk);
inner.cipher.apply(&mut chunk); inner.cipher.apply(&mut chunk);
inner.buffer.extend_from_slice(&chunk); inner.buffer.extend_from_slice(&chunk);
inner.buffer_start = 0;
} }
let take = (out.len() - written).min(inner.buffer.len()); let available = inner.buffer.len().saturating_sub(inner.buffer_start);
out[written..written + take].copy_from_slice(&inner.buffer[..take]); let take = (out.len() - written).min(available);
inner.buffer.drain(..take); let start = inner.buffer_start;
let end = start + take;
out[written..written + take].copy_from_slice(&inner.buffer[start..end]);
inner.buffer_start = end;
if inner.buffer_start >= inner.buffer.len() {
inner.buffer.clear();
inner.buffer_start = 0;
}
written += take; written += take;
} }
} }

View File

@@ -2,7 +2,7 @@
#![allow(dead_code)] #![allow(dead_code)]
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
use std::net::IpAddr; use std::net::IpAddr;
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
@@ -13,7 +13,7 @@ use crate::config::UserMaxUniqueIpsMode;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct UserIpTracker { pub struct UserIpTracker {
active_ips: Arc<RwLock<HashMap<String, HashSet<IpAddr>>>>, active_ips: Arc<RwLock<HashMap<String, HashMap<IpAddr, usize>>>>,
recent_ips: Arc<RwLock<HashMap<String, HashMap<IpAddr, Instant>>>>, recent_ips: Arc<RwLock<HashMap<String, HashMap<IpAddr, Instant>>>>,
max_ips: Arc<RwLock<HashMap<String, usize>>>, max_ips: Arc<RwLock<HashMap<String, usize>>>,
limit_mode: Arc<RwLock<UserMaxUniqueIpsMode>>, limit_mode: Arc<RwLock<UserMaxUniqueIpsMode>>,
@@ -67,21 +67,14 @@ impl UserIpTracker {
let max_ips = self.max_ips.read().await; let max_ips = self.max_ips.read().await;
max_ips.get(username).copied() max_ips.get(username).copied()
}; };
let mode = *self.limit_mode.read().await;
let window = *self.limit_window.read().await;
let now = Instant::now();
let mut active_ips = self.active_ips.write().await; let mut active_ips = self.active_ips.write().await;
let user_active = active_ips let user_active = active_ips
.entry(username.to_string()) .entry(username.to_string())
.or_insert_with(HashSet::new); .or_insert_with(HashMap::new);
if limit.is_none() {
user_active.insert(ip);
return Ok(());
}
let limit = limit.unwrap_or_default();
let mode = *self.limit_mode.read().await;
let window = *self.limit_window.read().await;
let now = Instant::now();
let mut recent_ips = self.recent_ips.write().await; let mut recent_ips = self.recent_ips.write().await;
let user_recent = recent_ips let user_recent = recent_ips
@@ -89,32 +82,35 @@ impl UserIpTracker {
.or_insert_with(HashMap::new); .or_insert_with(HashMap::new);
Self::prune_recent(user_recent, now, window); Self::prune_recent(user_recent, now, window);
if user_active.contains(&ip) { if let Some(count) = user_active.get_mut(&ip) {
*count = count.saturating_add(1);
user_recent.insert(ip, now); user_recent.insert(ip, now);
return Ok(()); return Ok(());
} }
let active_limit_reached = user_active.len() >= limit; if let Some(limit) = limit {
let recent_limit_reached = user_recent.len() >= limit; let active_limit_reached = user_active.len() >= limit;
let deny = match mode { let recent_limit_reached = user_recent.len() >= limit;
UserMaxUniqueIpsMode::ActiveWindow => active_limit_reached, let deny = match mode {
UserMaxUniqueIpsMode::TimeWindow => recent_limit_reached, UserMaxUniqueIpsMode::ActiveWindow => active_limit_reached,
UserMaxUniqueIpsMode::Combined => active_limit_reached || recent_limit_reached, UserMaxUniqueIpsMode::TimeWindow => recent_limit_reached,
}; UserMaxUniqueIpsMode::Combined => active_limit_reached || recent_limit_reached,
};
if deny { if deny {
return Err(format!( return Err(format!(
"IP limit reached for user '{}': active={}/{} recent={}/{} mode={:?}", "IP limit reached for user '{}': active={}/{} recent={}/{} mode={:?}",
username, username,
user_active.len(), user_active.len(),
limit, limit,
user_recent.len(), user_recent.len(),
limit, limit,
mode mode
)); ));
}
} }
user_active.insert(ip); user_active.insert(ip, 1);
user_recent.insert(ip, now); user_recent.insert(ip, now);
Ok(()) Ok(())
} }
@@ -122,23 +118,73 @@ impl UserIpTracker {
pub async fn remove_ip(&self, username: &str, ip: IpAddr) { pub async fn remove_ip(&self, username: &str, ip: IpAddr) {
let mut active_ips = self.active_ips.write().await; let mut active_ips = self.active_ips.write().await;
if let Some(user_ips) = active_ips.get_mut(username) { if let Some(user_ips) = active_ips.get_mut(username) {
user_ips.remove(&ip); if let Some(count) = user_ips.get_mut(&ip) {
if *count > 1 {
*count -= 1;
} else {
user_ips.remove(&ip);
}
}
if user_ips.is_empty() { if user_ips.is_empty() {
active_ips.remove(username); active_ips.remove(username);
} }
} }
drop(active_ips); }
let mode = *self.limit_mode.read().await; pub async fn get_recent_counts_for_users(&self, users: &[String]) -> HashMap<String, usize> {
if matches!(mode, UserMaxUniqueIpsMode::ActiveWindow) { let window = *self.limit_window.read().await;
let mut recent_ips = self.recent_ips.write().await; let now = Instant::now();
if let Some(user_recent) = recent_ips.get_mut(username) { let recent_ips = self.recent_ips.read().await;
user_recent.remove(&ip);
if user_recent.is_empty() { let mut counts = HashMap::with_capacity(users.len());
recent_ips.remove(username); for user in users {
} let count = if let Some(user_recent) = recent_ips.get(user) {
} user_recent
.values()
.filter(|seen_at| now.duration_since(**seen_at) <= window)
.count()
} else {
0
};
counts.insert(user.clone(), count);
} }
counts
}
pub async fn get_active_ips_for_users(&self, users: &[String]) -> HashMap<String, Vec<IpAddr>> {
let active_ips = self.active_ips.read().await;
let mut out = HashMap::with_capacity(users.len());
for user in users {
let mut ips = active_ips
.get(user)
.map(|per_ip| per_ip.keys().copied().collect::<Vec<_>>())
.unwrap_or_else(Vec::new);
ips.sort();
out.insert(user.clone(), ips);
}
out
}
pub async fn get_recent_ips_for_users(&self, users: &[String]) -> HashMap<String, Vec<IpAddr>> {
let window = *self.limit_window.read().await;
let now = Instant::now();
let recent_ips = self.recent_ips.read().await;
let mut out = HashMap::with_capacity(users.len());
for user in users {
let mut ips = if let Some(user_recent) = recent_ips.get(user) {
user_recent
.iter()
.filter(|(_, seen_at)| now.duration_since(**seen_at) <= window)
.map(|(ip, _)| *ip)
.collect::<Vec<_>>()
} else {
Vec::new()
};
ips.sort();
out.insert(user.clone(), ips);
}
out
} }
pub async fn get_active_ip_count(&self, username: &str) -> usize { pub async fn get_active_ip_count(&self, username: &str) -> usize {
@@ -150,7 +196,7 @@ impl UserIpTracker {
let active_ips = self.active_ips.read().await; let active_ips = self.active_ips.read().await;
active_ips active_ips
.get(username) .get(username)
.map(|ips| ips.iter().copied().collect()) .map(|ips| ips.keys().copied().collect())
.unwrap_or_else(Vec::new) .unwrap_or_else(Vec::new)
} }
@@ -190,7 +236,7 @@ impl UserIpTracker {
let active_ips = self.active_ips.read().await; let active_ips = self.active_ips.read().await;
active_ips active_ips
.get(username) .get(username)
.map(|ips| ips.contains(&ip)) .map(|ips| ips.contains_key(&ip))
.unwrap_or(false) .unwrap_or(false)
} }
@@ -266,6 +312,26 @@ mod tests {
assert_eq!(tracker.get_active_ip_count("test_user").await, 2); assert_eq!(tracker.get_active_ip_count("test_user").await, 2);
} }
#[tokio::test]
async fn test_active_window_rejects_new_ip_and_keeps_existing_session() {
let tracker = UserIpTracker::new();
tracker.set_user_limit("test_user", 1).await;
tracker
.set_limit_policy(UserMaxUniqueIpsMode::ActiveWindow, 30)
.await;
let ip1 = test_ipv4(10, 10, 10, 1);
let ip2 = test_ipv4(10, 10, 10, 2);
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert!(tracker.is_ip_active("test_user", ip1).await);
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
// Existing session remains active; only new unique IP is denied.
assert!(tracker.is_ip_active("test_user", ip1).await);
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
}
#[tokio::test] #[tokio::test]
async fn test_reconnection_from_same_ip() { async fn test_reconnection_from_same_ip() {
let tracker = UserIpTracker::new(); let tracker = UserIpTracker::new();
@@ -278,6 +344,24 @@ mod tests {
assert_eq!(tracker.get_active_ip_count("test_user").await, 1); assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
} }
#[tokio::test]
async fn test_same_ip_disconnect_keeps_active_while_other_session_alive() {
let tracker = UserIpTracker::new();
tracker.set_user_limit("test_user", 2).await;
let ip1 = test_ipv4(192, 168, 1, 1);
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
tracker.remove_ip("test_user", ip1).await;
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
tracker.remove_ip("test_user", ip1).await;
assert_eq!(tracker.get_active_ip_count("test_user").await, 0);
}
#[tokio::test] #[tokio::test]
async fn test_ip_removal() { async fn test_ip_removal() {
let tracker = UserIpTracker::new(); let tracker = UserIpTracker::new();

View File

@@ -4,11 +4,11 @@
use std::net::SocketAddr; use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use rand::Rng; use rand::Rng;
use tokio::net::TcpListener; use tokio::net::TcpListener;
use tokio::signal; use tokio::signal;
use tokio::sync::{Semaphore, mpsc}; use tokio::sync::{Semaphore, mpsc, watch};
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, warn};
use tracing_subscriber::{EnvFilter, fmt, prelude::*, reload}; use tracing_subscriber::{EnvFilter, fmt, prelude::*, reload};
#[cfg(unix)] #[cfg(unix)]
@@ -41,8 +41,9 @@ use crate::stats::telemetry::TelemetryPolicy;
use crate::stats::{ReplayChecker, Stats}; use crate::stats::{ReplayChecker, Stats};
use crate::stream::BufferPool; use crate::stream::BufferPool;
use crate::transport::middle_proxy::{ use crate::transport::middle_proxy::{
MePool, fetch_proxy_config, run_me_ping, MePingFamily, MePingSample, MeReinitTrigger, format_sample_line, MePool, ProxyConfigData, fetch_proxy_config_with_raw, format_me_route, format_sample_line,
format_me_route, load_proxy_config_cache, run_me_ping, save_proxy_config_cache, MePingFamily, MePingSample,
MeReinitTrigger,
}; };
use crate::transport::{ListenOptions, UpstreamManager, create_listener, find_listener_processes}; use crate::transport::{ListenOptions, UpstreamManager, create_listener, find_listener_processes};
use crate::tls_front::TlsFrontCache; use crate::tls_front::TlsFrontCache;
@@ -172,8 +173,206 @@ async fn write_beobachten_snapshot(path: &str, payload: &str) -> std::io::Result
tokio::fs::write(path, payload).await tokio::fs::write(path, payload).await
} }
fn unit_label(value: u64, singular: &'static str, plural: &'static str) -> &'static str {
if value == 1 { singular } else { plural }
}
fn format_uptime(total_secs: u64) -> String {
const SECS_PER_MINUTE: u64 = 60;
const SECS_PER_HOUR: u64 = 60 * SECS_PER_MINUTE;
const SECS_PER_DAY: u64 = 24 * SECS_PER_HOUR;
const SECS_PER_MONTH: u64 = 30 * SECS_PER_DAY;
const SECS_PER_YEAR: u64 = 12 * SECS_PER_MONTH;
let mut remaining = total_secs;
let years = remaining / SECS_PER_YEAR;
remaining %= SECS_PER_YEAR;
let months = remaining / SECS_PER_MONTH;
remaining %= SECS_PER_MONTH;
let days = remaining / SECS_PER_DAY;
remaining %= SECS_PER_DAY;
let hours = remaining / SECS_PER_HOUR;
remaining %= SECS_PER_HOUR;
let minutes = remaining / SECS_PER_MINUTE;
let seconds = remaining % SECS_PER_MINUTE;
let mut parts = Vec::new();
if total_secs > SECS_PER_YEAR {
parts.push(format!(
"{} {}",
years,
unit_label(years, "year", "years")
));
}
if total_secs > SECS_PER_MONTH {
parts.push(format!(
"{} {}",
months,
unit_label(months, "month", "months")
));
}
if total_secs > SECS_PER_DAY {
parts.push(format!(
"{} {}",
days,
unit_label(days, "day", "days")
));
}
if total_secs > SECS_PER_HOUR {
parts.push(format!(
"{} {}",
hours,
unit_label(hours, "hour", "hours")
));
}
if total_secs > SECS_PER_MINUTE {
parts.push(format!(
"{} {}",
minutes,
unit_label(minutes, "minute", "minutes")
));
}
parts.push(format!(
"{} {}",
seconds,
unit_label(seconds, "second", "seconds")
));
format!("{} / {} seconds", parts.join(", "), total_secs)
}
async fn wait_until_admission_open(admission_rx: &mut watch::Receiver<bool>) -> bool {
loop {
if *admission_rx.borrow() {
return true;
}
if admission_rx.changed().await.is_err() {
return *admission_rx.borrow();
}
}
}
async fn load_startup_proxy_config_snapshot(
url: &str,
cache_path: Option<&str>,
me2dc_fallback: bool,
label: &'static str,
) -> Option<ProxyConfigData> {
loop {
match fetch_proxy_config_with_raw(url).await {
Ok((cfg, raw)) => {
if !cfg.map.is_empty() {
if let Some(path) = cache_path
&& let Err(e) = save_proxy_config_cache(path, &raw).await
{
warn!(error = %e, path, snapshot = label, "Failed to store startup proxy-config cache");
}
return Some(cfg);
}
warn!(snapshot = label, url, "Startup proxy-config is empty; trying disk cache");
if let Some(path) = cache_path {
match load_proxy_config_cache(path).await {
Ok(cached) if !cached.map.is_empty() => {
info!(
snapshot = label,
path,
proxy_for_lines = cached.proxy_for_lines,
"Loaded startup proxy-config from disk cache"
);
return Some(cached);
}
Ok(_) => {
warn!(
snapshot = label,
path,
"Startup proxy-config cache is empty; ignoring cache file"
);
}
Err(cache_err) => {
debug!(
snapshot = label,
path,
error = %cache_err,
"Startup proxy-config cache unavailable"
);
}
}
}
if me2dc_fallback {
error!(
snapshot = label,
"Startup proxy-config unavailable and no saved config found; falling back to direct mode"
);
return None;
}
warn!(
snapshot = label,
retry_in_secs = 2,
"Startup proxy-config unavailable and no saved config found; retrying because me2dc_fallback=false"
);
tokio::time::sleep(Duration::from_secs(2)).await;
}
Err(fetch_err) => {
if let Some(path) = cache_path {
match load_proxy_config_cache(path).await {
Ok(cached) if !cached.map.is_empty() => {
info!(
snapshot = label,
path,
proxy_for_lines = cached.proxy_for_lines,
"Loaded startup proxy-config from disk cache"
);
return Some(cached);
}
Ok(_) => {
warn!(
snapshot = label,
path,
"Startup proxy-config cache is empty; ignoring cache file"
);
}
Err(cache_err) => {
debug!(
snapshot = label,
path,
error = %cache_err,
"Startup proxy-config cache unavailable"
);
}
}
}
if me2dc_fallback {
error!(
snapshot = label,
error = %fetch_err,
"Startup proxy-config unavailable and no cached data; falling back to direct mode"
);
return None;
}
warn!(
snapshot = label,
error = %fetch_err,
retry_in_secs = 2,
"Startup proxy-config unavailable; retrying because me2dc_fallback=false"
);
tokio::time::sleep(Duration::from_secs(2)).await;
}
}
}
}
#[tokio::main] #[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> { async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let process_started_at = Instant::now();
let process_started_at_epoch_secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let (config_path, cli_silent, cli_log_level) = parse_cli(); let (config_path, cli_silent, cli_log_level) = parse_cli();
let mut config = match ProxyConfig::load(&config_path) { let mut config = match ProxyConfig::load(&config_path) {
@@ -269,6 +468,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
config.upstreams.clone(), config.upstreams.clone(),
config.general.upstream_connect_retry_attempts, config.general.upstream_connect_retry_attempts,
config.general.upstream_connect_retry_backoff_ms, config.general.upstream_connect_retry_backoff_ms,
config.general.upstream_connect_budget_ms,
config.general.upstream_unhealthy_fail_threshold, config.general.upstream_unhealthy_fail_threshold,
config.general.upstream_connect_failfast_hard_errors, config.general.upstream_connect_failfast_hard_errors,
stats.clone(), stats.clone(),
@@ -445,6 +645,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let me2dc_fallback = config.general.me2dc_fallback; let me2dc_fallback = config.general.me2dc_fallback;
let me_init_retry_attempts = config.general.me_init_retry_attempts; let me_init_retry_attempts = config.general.me_init_retry_attempts;
let me_init_warn_after_attempts: u32 = 3;
if use_middle_proxy && !decision.ipv4_me && !decision.ipv6_me { if use_middle_proxy && !decision.ipv4_me && !decision.ipv6_me {
if me2dc_fallback { if me2dc_fallback {
warn!("No usable IP family for Middle Proxy detected; falling back to direct DC"); warn!("No usable IP family for Middle Proxy detected; falling back to direct DC");
@@ -484,189 +685,199 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
// ============================================================= // =============================================================
let proxy_secret_path = config.general.proxy_secret_path.as_deref(); let proxy_secret_path = config.general.proxy_secret_path.as_deref();
let pool_size = config.general.middle_proxy_pool_size.max(1); let pool_size = config.general.middle_proxy_pool_size.max(1);
let mut init_attempt: u32 = 0; let proxy_secret = loop {
loop { match crate::transport::middle_proxy::fetch_proxy_secret(
init_attempt = init_attempt.saturating_add(1);
let proxy_secret = match crate::transport::middle_proxy::fetch_proxy_secret(
proxy_secret_path, proxy_secret_path,
config.general.proxy_secret_len_max, config.general.proxy_secret_len_max,
) )
.await .await
{ {
Ok(proxy_secret) => proxy_secret, Ok(proxy_secret) => break Some(proxy_secret),
Err(e) => { Err(e) => {
let retries_limited = me2dc_fallback && me_init_retry_attempts > 0; if me2dc_fallback {
if retries_limited && init_attempt >= me_init_retry_attempts {
error!( error!(
error = %e, error = %e,
attempt = init_attempt, "ME startup failed: proxy-secret is unavailable and no saved secret found; falling back to direct mode"
retry_limit = me_init_retry_attempts,
"ME startup retries exhausted while loading proxy-secret; falling back to direct mode"
); );
break None; break None;
} }
warn!( warn!(
error = %e, error = %e,
attempt = init_attempt,
retry_limit = if me_init_retry_attempts == 0 {
String::from("unlimited")
} else {
me_init_retry_attempts.to_string()
},
me2dc_fallback = me2dc_fallback,
retry_in_secs = 2, retry_in_secs = 2,
"Failed to fetch proxy-secret; retrying ME startup" "ME startup failed: proxy-secret is unavailable and no saved secret found; retrying because me2dc_fallback=false"
); );
tokio::time::sleep(Duration::from_secs(2)).await; tokio::time::sleep(Duration::from_secs(2)).await;
continue;
}
};
info!(
secret_len = proxy_secret.len(),
key_sig = format_args!(
"0x{:08x}",
if proxy_secret.len() >= 4 {
u32::from_le_bytes([
proxy_secret[0],
proxy_secret[1],
proxy_secret[2],
proxy_secret[3],
])
} else {
0
}
),
"Proxy-secret loaded"
);
// Load ME config (v4/v6) + default DC
let mut cfg_v4 = fetch_proxy_config(
"https://core.telegram.org/getProxyConfig",
)
.await
.unwrap_or_default();
let mut cfg_v6 = fetch_proxy_config(
"https://core.telegram.org/getProxyConfigV6",
)
.await
.unwrap_or_default();
if cfg_v4.map.is_empty() {
cfg_v4.map = crate::protocol::constants::TG_MIDDLE_PROXIES_V4.clone();
}
if cfg_v6.map.is_empty() {
cfg_v6.map = crate::protocol::constants::TG_MIDDLE_PROXIES_V6.clone();
}
let pool = MePool::new(
proxy_tag.clone(),
proxy_secret,
config.general.middle_proxy_nat_ip,
me_nat_probe,
None,
config.network.stun_servers.clone(),
config.general.stun_nat_probe_concurrency,
probe.detected_ipv6,
config.timeouts.me_one_retry,
config.timeouts.me_one_timeout_ms,
cfg_v4.map.clone(),
cfg_v6.map.clone(),
cfg_v4.default_dc.or(cfg_v6.default_dc),
decision.clone(),
Some(upstream_manager.clone()),
rng.clone(),
stats.clone(),
config.general.me_keepalive_enabled,
config.general.me_keepalive_interval_secs,
config.general.me_keepalive_jitter_secs,
config.general.me_keepalive_payload_random,
config.general.rpc_proxy_req_every,
config.general.me_warmup_stagger_enabled,
config.general.me_warmup_step_delay_ms,
config.general.me_warmup_step_jitter_ms,
config.general.me_reconnect_max_concurrent_per_dc,
config.general.me_reconnect_backoff_base_ms,
config.general.me_reconnect_backoff_cap_ms,
config.general.me_reconnect_fast_retry_count,
config.general.me_single_endpoint_shadow_writers,
config.general.me_single_endpoint_outage_mode_enabled,
config.general.me_single_endpoint_outage_disable_quarantine,
config.general.me_single_endpoint_outage_backoff_min_ms,
config.general.me_single_endpoint_outage_backoff_max_ms,
config.general.me_single_endpoint_shadow_rotate_every_secs,
config.general.me_floor_mode,
config.general.me_adaptive_floor_idle_secs,
config.general.me_adaptive_floor_min_writers_single_endpoint,
config.general.me_adaptive_floor_recover_grace_secs,
config.general.hardswap,
config.general.me_pool_drain_ttl_secs,
config.general.effective_me_pool_force_close_secs(),
config.general.me_pool_min_fresh_ratio,
config.general.me_hardswap_warmup_delay_min_ms,
config.general.me_hardswap_warmup_delay_max_ms,
config.general.me_hardswap_warmup_extra_passes,
config.general.me_hardswap_warmup_pass_backoff_base_ms,
config.general.me_bind_stale_mode,
config.general.me_bind_stale_ttl_secs,
config.general.me_secret_atomic_snapshot,
config.general.me_deterministic_writer_sort,
config.general.me_socks_kdf_policy,
config.general.me_route_backpressure_base_timeout_ms,
config.general.me_route_backpressure_high_timeout_ms,
config.general.me_route_backpressure_high_watermark_pct,
);
match pool.init(pool_size, &rng).await {
Ok(()) => {
info!(
attempt = init_attempt,
"Middle-End pool initialized successfully"
);
// Phase 4: Start health monitor
let pool_clone = pool.clone();
let rng_clone = rng.clone();
let min_conns = pool_size;
tokio::spawn(async move {
crate::transport::middle_proxy::me_health_monitor(
pool_clone, rng_clone, min_conns,
)
.await;
});
break Some(pool);
}
Err(e) => {
let retries_limited = me2dc_fallback && me_init_retry_attempts > 0;
if retries_limited && init_attempt >= me_init_retry_attempts {
error!(
error = %e,
attempt = init_attempt,
retry_limit = me_init_retry_attempts,
"ME pool init retries exhausted; falling back to direct mode"
);
break None;
}
warn!(
error = %e,
attempt = init_attempt,
retry_limit = if me_init_retry_attempts == 0 {
String::from("unlimited")
} else {
me_init_retry_attempts.to_string()
},
me2dc_fallback = me2dc_fallback,
retry_in_secs = 2,
"ME pool is not ready yet; retrying startup initialization"
);
pool.reset_stun_state();
tokio::time::sleep(Duration::from_secs(2)).await;
} }
} }
};
match proxy_secret {
Some(proxy_secret) => {
info!(
secret_len = proxy_secret.len(),
key_sig = format_args!(
"0x{:08x}",
if proxy_secret.len() >= 4 {
u32::from_le_bytes([
proxy_secret[0],
proxy_secret[1],
proxy_secret[2],
proxy_secret[3],
])
} else {
0
}
),
"Proxy-secret loaded"
);
let cfg_v4 = load_startup_proxy_config_snapshot(
"https://core.telegram.org/getProxyConfig",
config.general.proxy_config_v4_cache_path.as_deref(),
me2dc_fallback,
"getProxyConfig",
)
.await;
let cfg_v6 = load_startup_proxy_config_snapshot(
"https://core.telegram.org/getProxyConfigV6",
config.general.proxy_config_v6_cache_path.as_deref(),
me2dc_fallback,
"getProxyConfigV6",
)
.await;
if let (Some(cfg_v4), Some(cfg_v6)) = (cfg_v4, cfg_v6) {
let pool = MePool::new(
proxy_tag.clone(),
proxy_secret,
config.general.middle_proxy_nat_ip,
me_nat_probe,
None,
config.network.stun_servers.clone(),
config.general.stun_nat_probe_concurrency,
probe.detected_ipv6,
config.timeouts.me_one_retry,
config.timeouts.me_one_timeout_ms,
cfg_v4.map.clone(),
cfg_v6.map.clone(),
cfg_v4.default_dc.or(cfg_v6.default_dc),
decision.clone(),
Some(upstream_manager.clone()),
rng.clone(),
stats.clone(),
config.general.me_keepalive_enabled,
config.general.me_keepalive_interval_secs,
config.general.me_keepalive_jitter_secs,
config.general.me_keepalive_payload_random,
config.general.rpc_proxy_req_every,
config.general.me_warmup_stagger_enabled,
config.general.me_warmup_step_delay_ms,
config.general.me_warmup_step_jitter_ms,
config.general.me_reconnect_max_concurrent_per_dc,
config.general.me_reconnect_backoff_base_ms,
config.general.me_reconnect_backoff_cap_ms,
config.general.me_reconnect_fast_retry_count,
config.general.me_single_endpoint_shadow_writers,
config.general.me_single_endpoint_outage_mode_enabled,
config.general.me_single_endpoint_outage_disable_quarantine,
config.general.me_single_endpoint_outage_backoff_min_ms,
config.general.me_single_endpoint_outage_backoff_max_ms,
config.general.me_single_endpoint_shadow_rotate_every_secs,
config.general.me_floor_mode,
config.general.me_adaptive_floor_idle_secs,
config.general.me_adaptive_floor_min_writers_single_endpoint,
config.general.me_adaptive_floor_recover_grace_secs,
config.general.hardswap,
config.general.me_pool_drain_ttl_secs,
config.general.effective_me_pool_force_close_secs(),
config.general.me_pool_min_fresh_ratio,
config.general.me_hardswap_warmup_delay_min_ms,
config.general.me_hardswap_warmup_delay_max_ms,
config.general.me_hardswap_warmup_extra_passes,
config.general.me_hardswap_warmup_pass_backoff_base_ms,
config.general.me_bind_stale_mode,
config.general.me_bind_stale_ttl_secs,
config.general.me_secret_atomic_snapshot,
config.general.me_deterministic_writer_sort,
config.general.me_socks_kdf_policy,
config.general.me_route_backpressure_base_timeout_ms,
config.general.me_route_backpressure_high_timeout_ms,
config.general.me_route_backpressure_high_watermark_pct,
config.general.me_route_no_writer_mode,
config.general.me_route_no_writer_wait_ms,
config.general.me_route_inline_recovery_attempts,
config.general.me_route_inline_recovery_wait_ms,
);
let mut init_attempt: u32 = 0;
loop {
init_attempt = init_attempt.saturating_add(1);
match pool.init(pool_size, &rng).await {
Ok(()) => {
info!(
attempt = init_attempt,
"Middle-End pool initialized successfully"
);
// Phase 4: Start health monitor
let pool_clone = pool.clone();
let rng_clone = rng.clone();
let min_conns = pool_size;
tokio::spawn(async move {
crate::transport::middle_proxy::me_health_monitor(
pool_clone, rng_clone, min_conns,
)
.await;
});
break Some(pool);
}
Err(e) => {
let retries_limited = me2dc_fallback && me_init_retry_attempts > 0;
if retries_limited && init_attempt >= me_init_retry_attempts {
error!(
error = %e,
attempt = init_attempt,
retry_limit = me_init_retry_attempts,
"ME pool init retries exhausted; falling back to direct mode"
);
break None;
}
let retry_limit = if !me2dc_fallback || me_init_retry_attempts == 0 {
String::from("unlimited")
} else {
me_init_retry_attempts.to_string()
};
if init_attempt >= me_init_warn_after_attempts {
warn!(
error = %e,
attempt = init_attempt,
retry_limit = retry_limit,
me2dc_fallback = me2dc_fallback,
retry_in_secs = 2,
"ME pool is not ready yet; retrying startup initialization"
);
} else {
info!(
error = %e,
attempt = init_attempt,
retry_limit = retry_limit,
me2dc_fallback = me2dc_fallback,
retry_in_secs = 2,
"ME pool startup warmup: retrying initialization"
);
}
pool.reset_stun_state();
tokio::time::sleep(Duration::from_secs(2)).await;
}
}
}
} else {
None
}
}
None => None,
} }
} else { } else {
None None
@@ -847,6 +1058,19 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
} }
} }
let initialized_secs = process_started_at.elapsed().as_secs();
let second_suffix = if initialized_secs == 1 { "" } else { "s" };
info!("===================== Telegram Startup =====================");
info!(
" DC/ME Initialized in {} second{}",
initialized_secs, second_suffix
);
info!("============================================================");
if let Some(ref pool) = me_pool {
pool.set_runtime_ready(true);
}
// Background tasks // Background tasks
let um_clone = upstream_manager.clone(); let um_clone = upstream_manager.clone();
let decision_clone = decision.clone(); let decision_clone = decision.clone();
@@ -1117,6 +1341,60 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
print_proxy_links(&host, port, &config); print_proxy_links(&host, port, &config);
} }
let (admission_tx, admission_rx) = watch::channel(true);
if config.general.use_middle_proxy {
if let Some(pool) = me_pool.as_ref() {
let initial_open = pool.admission_ready_conditional_cast().await;
admission_tx.send_replace(initial_open);
if initial_open {
info!("Conditional-admission gate: open (ME pool ready)");
} else {
warn!("Conditional-admission gate: closed (ME pool is not ready)");
}
let pool_for_gate = pool.clone();
let admission_tx_gate = admission_tx.clone();
tokio::spawn(async move {
let mut gate_open = initial_open;
let mut open_streak = if initial_open { 1u32 } else { 0u32 };
let mut close_streak = if initial_open { 0u32 } else { 1u32 };
loop {
let ready = pool_for_gate.admission_ready_conditional_cast().await;
if ready {
open_streak = open_streak.saturating_add(1);
close_streak = 0;
if !gate_open && open_streak >= 2 {
gate_open = true;
admission_tx_gate.send_replace(true);
info!(
open_streak,
"Conditional-admission gate opened (ME pool recovered)"
);
}
} else {
close_streak = close_streak.saturating_add(1);
open_streak = 0;
if gate_open && close_streak >= 2 {
gate_open = false;
admission_tx_gate.send_replace(false);
warn!(
close_streak,
"Conditional-admission gate closed (ME pool has uncovered DC groups)"
);
}
}
tokio::time::sleep(Duration::from_millis(250)).await;
}
});
} else {
admission_tx.send_replace(false);
warn!("Conditional-admission gate: closed (ME pool is unavailable)");
}
} else {
admission_tx.send_replace(true);
}
let _admission_tx_hold = admission_tx;
// Unix socket setup (before listeners check so unix-only config works) // Unix socket setup (before listeners check so unix-only config works)
let mut has_unix_listener = false; let mut has_unix_listener = false;
#[cfg(unix)] #[cfg(unix)]
@@ -1150,6 +1428,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
has_unix_listener = true; has_unix_listener = true;
let mut config_rx_unix: tokio::sync::watch::Receiver<Arc<ProxyConfig>> = config_rx.clone(); let mut config_rx_unix: tokio::sync::watch::Receiver<Arc<ProxyConfig>> = config_rx.clone();
let mut admission_rx_unix = admission_rx.clone();
let stats = stats.clone(); let stats = stats.clone();
let upstream_manager = upstream_manager.clone(); let upstream_manager = upstream_manager.clone();
let replay_checker = replay_checker.clone(); let replay_checker = replay_checker.clone();
@@ -1165,6 +1444,10 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let unix_conn_counter = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(1)); let unix_conn_counter = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(1));
loop { loop {
if !wait_until_admission_open(&mut admission_rx_unix).await {
warn!("Conditional-admission gate channel closed for unix listener");
break;
}
match unix_listener.accept().await { match unix_listener.accept().await {
Ok((stream, _)) => { Ok((stream, _)) => {
let permit = match max_connections_unix.clone().acquire_owned().await { let permit = match max_connections_unix.clone().acquire_owned().await {
@@ -1277,6 +1560,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let me_pool_api = me_pool.clone(); let me_pool_api = me_pool.clone();
let upstream_manager_api = upstream_manager.clone(); let upstream_manager_api = upstream_manager.clone();
let config_rx_api = config_rx.clone(); let config_rx_api = config_rx.clone();
let admission_rx_api = admission_rx.clone();
let config_path_api = std::path::PathBuf::from(&config_path); let config_path_api = std::path::PathBuf::from(&config_path);
let startup_detected_ip_v4 = detected_ip_v4; let startup_detected_ip_v4 = detected_ip_v4;
let startup_detected_ip_v6 = detected_ip_v6; let startup_detected_ip_v6 = detected_ip_v6;
@@ -1288,9 +1572,11 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
me_pool_api, me_pool_api,
upstream_manager_api, upstream_manager_api,
config_rx_api, config_rx_api,
admission_rx_api,
config_path_api, config_path_api,
startup_detected_ip_v4, startup_detected_ip_v4,
startup_detected_ip_v6, startup_detected_ip_v6,
process_started_at_epoch_secs,
) )
.await; .await;
}); });
@@ -1299,6 +1585,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
for (listener, listener_proxy_protocol) in listeners { for (listener, listener_proxy_protocol) in listeners {
let mut config_rx: tokio::sync::watch::Receiver<Arc<ProxyConfig>> = config_rx.clone(); let mut config_rx: tokio::sync::watch::Receiver<Arc<ProxyConfig>> = config_rx.clone();
let mut admission_rx_tcp = admission_rx.clone();
let stats = stats.clone(); let stats = stats.clone();
let upstream_manager = upstream_manager.clone(); let upstream_manager = upstream_manager.clone();
let replay_checker = replay_checker.clone(); let replay_checker = replay_checker.clone();
@@ -1312,6 +1599,10 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
tokio::spawn(async move { tokio::spawn(async move {
loop { loop {
if !wait_until_admission_open(&mut admission_rx_tcp).await {
warn!("Conditional-admission gate channel closed for tcp listener");
break;
}
match listener.accept().await { match listener.accept().await {
Ok((stream, peer_addr)) => { Ok((stream, peer_addr)) => {
let permit = match max_connections_tcp.clone().acquire_owned().await { let permit = match max_connections_tcp.clone().acquire_owned().await {
@@ -1400,7 +1691,36 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
} }
match signal::ctrl_c().await { match signal::ctrl_c().await {
Ok(()) => info!("Shutting down..."), Ok(()) => {
let shutdown_started_at = Instant::now();
info!("Shutting down...");
let uptime_secs = process_started_at.elapsed().as_secs();
info!("Uptime: {}", format_uptime(uptime_secs));
if let Some(pool) = &me_pool {
match tokio::time::timeout(
Duration::from_secs(2),
pool.shutdown_send_close_conn_all(),
)
.await
{
Ok(total) => {
info!(
close_conn_sent = total,
"ME shutdown: RPC_CLOSE_CONN broadcast completed"
);
}
Err(_) => {
warn!("ME shutdown: RPC_CLOSE_CONN broadcast timed out");
}
}
}
let shutdown_secs = shutdown_started_at.elapsed().as_secs();
info!(
"Shutdown completed successfully in {} {}.",
shutdown_secs,
unit_label(shutdown_secs, "second", "seconds")
);
}
Err(e) => error!("Signal error: {}", e), Err(e) => error!("Signal error: {}", e),
} }

View File

@@ -1199,6 +1199,48 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0 0
} }
); );
let _ = writeln!(
out,
"# HELP telemt_me_no_writer_failfast_total ME route failfast errors due to missing writer in bounded wait window"
);
let _ = writeln!(out, "# TYPE telemt_me_no_writer_failfast_total counter");
let _ = writeln!(
out,
"telemt_me_no_writer_failfast_total {}",
if me_allows_normal {
stats.get_me_no_writer_failfast_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_async_recovery_trigger_total Async ME recovery trigger attempts from route path"
);
let _ = writeln!(out, "# TYPE telemt_me_async_recovery_trigger_total counter");
let _ = writeln!(
out,
"telemt_me_async_recovery_trigger_total {}",
if me_allows_normal {
stats.get_me_async_recovery_trigger_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_inline_recovery_total Legacy inline ME recovery attempts from route path"
);
let _ = writeln!(out, "# TYPE telemt_me_inline_recovery_total counter");
let _ = writeln!(
out,
"telemt_me_inline_recovery_total {}",
if me_allows_normal {
stats.get_me_inline_recovery_total()
} else {
0
}
);
let unresolved_writer_losses = if me_allows_normal { let unresolved_writer_losses = if me_allows_normal {
stats stats
@@ -1237,6 +1279,29 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
let _ = writeln!(out, "# TYPE telemt_user_msgs_from_client counter"); let _ = writeln!(out, "# TYPE telemt_user_msgs_from_client counter");
let _ = writeln!(out, "# HELP telemt_user_msgs_to_client Per-user messages sent"); let _ = writeln!(out, "# HELP telemt_user_msgs_to_client Per-user messages sent");
let _ = writeln!(out, "# TYPE telemt_user_msgs_to_client counter"); let _ = writeln!(out, "# TYPE telemt_user_msgs_to_client counter");
let _ = writeln!(
out,
"# HELP telemt_ip_reservation_rollback_total IP reservation rollbacks caused by later limit checks"
);
let _ = writeln!(out, "# TYPE telemt_ip_reservation_rollback_total counter");
let _ = writeln!(
out,
"telemt_ip_reservation_rollback_total{{reason=\"tcp_limit\"}} {}",
if core_enabled {
stats.get_ip_reservation_rollback_tcp_limit_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_ip_reservation_rollback_total{{reason=\"quota_limit\"}} {}",
if core_enabled {
stats.get_ip_reservation_rollback_quota_limit_total()
} else {
0
}
);
let _ = writeln!( let _ = writeln!(
out, out,
"# HELP telemt_telemetry_user_series_suppressed User-labeled metric series suppression flag" "# HELP telemt_telemetry_user_series_suppressed User-labeled metric series suppression flag"
@@ -1267,11 +1332,21 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
.collect(); .collect();
let mut unique_users = BTreeSet::new(); let mut unique_users = BTreeSet::new();
unique_users.extend(config.access.users.keys().cloned());
unique_users.extend(config.access.user_max_unique_ips.keys().cloned()); unique_users.extend(config.access.user_max_unique_ips.keys().cloned());
unique_users.extend(ip_counts.keys().cloned()); unique_users.extend(ip_counts.keys().cloned());
let unique_users_vec: Vec<String> = unique_users.iter().cloned().collect();
let recent_counts = ip_tracker
.get_recent_counts_for_users(&unique_users_vec)
.await;
let _ = writeln!(out, "# HELP telemt_user_unique_ips_current Per-user current number of unique active IPs"); let _ = writeln!(out, "# HELP telemt_user_unique_ips_current Per-user current number of unique active IPs");
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_current gauge"); let _ = writeln!(out, "# TYPE telemt_user_unique_ips_current gauge");
let _ = writeln!(
out,
"# HELP telemt_user_unique_ips_recent_window Per-user unique IPs seen in configured observation window"
);
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_recent_window gauge");
let _ = writeln!(out, "# HELP telemt_user_unique_ips_limit Per-user configured unique IP limit (0 means unlimited)"); let _ = writeln!(out, "# HELP telemt_user_unique_ips_limit Per-user configured unique IP limit (0 means unlimited)");
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_limit gauge"); let _ = writeln!(out, "# TYPE telemt_user_unique_ips_limit gauge");
let _ = writeln!(out, "# HELP telemt_user_unique_ips_utilization Per-user unique IP usage ratio (0 for unlimited)"); let _ = writeln!(out, "# HELP telemt_user_unique_ips_utilization Per-user unique IP usage ratio (0 for unlimited)");
@@ -1286,6 +1361,12 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0.0 0.0
}; };
let _ = writeln!(out, "telemt_user_unique_ips_current{{user=\"{}\"}} {}", user, current); let _ = writeln!(out, "telemt_user_unique_ips_current{{user=\"{}\"}} {}", user, current);
let _ = writeln!(
out,
"telemt_user_unique_ips_recent_window{{user=\"{}\"}} {}",
user,
recent_counts.get(&user).copied().unwrap_or(0)
);
let _ = writeln!(out, "telemt_user_unique_ips_limit{{user=\"{}\"}} {}", user, limit); let _ = writeln!(out, "telemt_user_unique_ips_limit{{user=\"{}\"}} {}", user, limit);
let _ = writeln!( let _ = writeln!(
out, out,
@@ -1378,6 +1459,7 @@ mod tests {
assert!(output.contains("telemt_user_msgs_from_client{user=\"alice\"} 1")); assert!(output.contains("telemt_user_msgs_from_client{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_msgs_to_client{user=\"alice\"} 2")); assert!(output.contains("telemt_user_msgs_to_client{user=\"alice\"} 2"));
assert!(output.contains("telemt_user_unique_ips_current{user=\"alice\"} 1")); assert!(output.contains("telemt_user_unique_ips_current{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_unique_ips_recent_window{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_unique_ips_limit{user=\"alice\"} 4")); assert!(output.contains("telemt_user_unique_ips_limit{user=\"alice\"} 4"));
assert!(output.contains("telemt_user_unique_ips_utilization{user=\"alice\"} 0.250000")); assert!(output.contains("telemt_user_unique_ips_utilization{user=\"alice\"} 0.250000"));
} }
@@ -1391,7 +1473,8 @@ mod tests {
assert!(output.contains("telemt_connections_total 0")); assert!(output.contains("telemt_connections_total 0"));
assert!(output.contains("telemt_connections_bad_total 0")); assert!(output.contains("telemt_connections_bad_total 0"));
assert!(output.contains("telemt_handshake_timeouts_total 0")); assert!(output.contains("telemt_handshake_timeouts_total 0"));
assert!(!output.contains("user=")); assert!(output.contains("telemt_user_unique_ips_current{user="));
assert!(output.contains("telemt_user_unique_ips_recent_window{user="));
} }
#[tokio::test] #[tokio::test]
@@ -1412,6 +1495,7 @@ mod tests {
"# TYPE telemt_me_writer_removed_unexpected_minus_restored_total gauge" "# TYPE telemt_me_writer_removed_unexpected_minus_restored_total gauge"
)); ));
assert!(output.contains("# TYPE telemt_user_unique_ips_current gauge")); assert!(output.contains("# TYPE telemt_user_unique_ips_current gauge"));
assert!(output.contains("# TYPE telemt_user_unique_ips_recent_window gauge"));
assert!(output.contains("# TYPE telemt_user_unique_ips_limit gauge")); assert!(output.contains("# TYPE telemt_user_unique_ips_limit gauge"));
assert!(output.contains("# TYPE telemt_user_unique_ips_utilization gauge")); assert!(output.contains("# TYPE telemt_user_unique_ips_utilization gauge"));
} }

View File

@@ -97,8 +97,11 @@ where
.unwrap_or_else(|_| "0.0.0.0:443".parse().unwrap()); .unwrap_or_else(|_| "0.0.0.0:443".parse().unwrap());
if proxy_protocol_enabled { if proxy_protocol_enabled {
match parse_proxy_protocol(&mut stream, peer).await { let proxy_header_timeout = Duration::from_millis(
Ok(info) => { config.server.proxy_protocol_header_timeout_ms.max(1),
);
match timeout(proxy_header_timeout, parse_proxy_protocol(&mut stream, peer)).await {
Ok(Ok(info)) => {
debug!( debug!(
peer = %peer, peer = %peer,
client = %info.src_addr, client = %info.src_addr,
@@ -110,12 +113,18 @@ where
local_addr = dst; local_addr = dst;
} }
} }
Err(e) => { Ok(Err(e)) => {
stats.increment_connects_bad(); stats.increment_connects_bad();
warn!(peer = %peer, error = %e, "Invalid PROXY protocol header"); warn!(peer = %peer, error = %e, "Invalid PROXY protocol header");
record_beobachten_class(&beobachten, &config, peer.ip(), "other"); record_beobachten_class(&beobachten, &config, peer.ip(), "other");
return Err(e); return Err(e);
} }
Err(_) => {
stats.increment_connects_bad();
warn!(peer = %peer, timeout_ms = proxy_header_timeout.as_millis(), "PROXY protocol header timeout");
record_beobachten_class(&beobachten, &config, peer.ip(), "other");
return Err(ProxyError::InvalidProxyProtocol);
}
} }
} }
@@ -161,7 +170,7 @@ where
let (read_half, write_half) = tokio::io::split(stream); let (read_half, write_half) = tokio::io::split(stream);
let (mut tls_reader, tls_writer, _tls_user) = match handle_tls_handshake( let (mut tls_reader, tls_writer, tls_user) = match handle_tls_handshake(
&handshake, read_half, write_half, real_peer, &handshake, read_half, write_half, real_peer,
&config, &replay_checker, &rng, tls_cache.clone(), &config, &replay_checker, &rng, tls_cache.clone(),
).await { ).await {
@@ -190,7 +199,7 @@ where
let (crypto_reader, crypto_writer, success) = match handle_mtproto_handshake( let (crypto_reader, crypto_writer, success) = match handle_mtproto_handshake(
&mtproto_handshake, tls_reader, tls_writer, real_peer, &mtproto_handshake, tls_reader, tls_writer, real_peer,
&config, &replay_checker, true, &config, &replay_checker, true, Some(tls_user.as_str()),
).await { ).await {
HandshakeResult::Success(result) => result, HandshakeResult::Success(result) => result,
HandshakeResult::BadClient { reader: _, writer: _ } => { HandshakeResult::BadClient { reader: _, writer: _ } => {
@@ -234,7 +243,7 @@ where
let (crypto_reader, crypto_writer, success) = match handle_mtproto_handshake( let (crypto_reader, crypto_writer, success) = match handle_mtproto_handshake(
&handshake, read_half, write_half, real_peer, &handshake, read_half, write_half, real_peer,
&config, &replay_checker, false, &config, &replay_checker, false, None,
).await { ).await {
HandshakeResult::Success(result) => result, HandshakeResult::Success(result) => result,
HandshakeResult::BadClient { reader, writer } => { HandshakeResult::BadClient { reader, writer } => {
@@ -415,8 +424,16 @@ impl RunningClientHandler {
let mut local_addr = self.stream.local_addr().map_err(ProxyError::Io)?; let mut local_addr = self.stream.local_addr().map_err(ProxyError::Io)?;
if self.proxy_protocol_enabled { if self.proxy_protocol_enabled {
match parse_proxy_protocol(&mut self.stream, self.peer).await { let proxy_header_timeout = Duration::from_millis(
Ok(info) => { self.config.server.proxy_protocol_header_timeout_ms.max(1),
);
match timeout(
proxy_header_timeout,
parse_proxy_protocol(&mut self.stream, self.peer),
)
.await
{
Ok(Ok(info)) => {
debug!( debug!(
peer = %self.peer, peer = %self.peer,
client = %info.src_addr, client = %info.src_addr,
@@ -428,7 +445,7 @@ impl RunningClientHandler {
local_addr = dst; local_addr = dst;
} }
} }
Err(e) => { Ok(Err(e)) => {
self.stats.increment_connects_bad(); self.stats.increment_connects_bad();
warn!(peer = %self.peer, error = %e, "Invalid PROXY protocol header"); warn!(peer = %self.peer, error = %e, "Invalid PROXY protocol header");
record_beobachten_class( record_beobachten_class(
@@ -439,6 +456,21 @@ impl RunningClientHandler {
); );
return Err(e); return Err(e);
} }
Err(_) => {
self.stats.increment_connects_bad();
warn!(
peer = %self.peer,
timeout_ms = proxy_header_timeout.as_millis(),
"PROXY protocol header timeout"
);
record_beobachten_class(
&self.beobachten,
&self.config,
self.peer.ip(),
"other",
);
return Err(ProxyError::InvalidProxyProtocol);
}
} }
} }
@@ -494,7 +526,7 @@ impl RunningClientHandler {
let (read_half, write_half) = self.stream.into_split(); let (read_half, write_half) = self.stream.into_split();
let (mut tls_reader, tls_writer, _tls_user) = match handle_tls_handshake( let (mut tls_reader, tls_writer, tls_user) = match handle_tls_handshake(
&handshake, &handshake,
read_half, read_half,
write_half, write_half,
@@ -538,6 +570,7 @@ impl RunningClientHandler {
&config, &config,
&replay_checker, &replay_checker,
true, true,
Some(tls_user.as_str()),
) )
.await .await
{ {
@@ -611,6 +644,7 @@ impl RunningClientHandler {
&config, &config,
&replay_checker, &replay_checker,
false, false,
None,
) )
.await .await
{ {
@@ -672,42 +706,16 @@ impl RunningClientHandler {
R: AsyncRead + Unpin + Send + 'static, R: AsyncRead + Unpin + Send + 'static,
W: AsyncWrite + Unpin + Send + 'static, W: AsyncWrite + Unpin + Send + 'static,
{ {
let user = &success.user; let user = success.user.clone();
if let Err(e) = Self::check_user_limits_static(user, &config, &stats, peer_addr, &ip_tracker).await { if let Err(e) = Self::check_user_limits_static(&user, &config, &stats, peer_addr, &ip_tracker).await {
warn!(user = %user, error = %e, "User limit exceeded"); warn!(user = %user, error = %e, "User limit exceeded");
return Err(e); return Err(e);
} }
// IP Cleanup Guard: автоматически удаляет IP при выходе из scope let relay_result = if config.general.use_middle_proxy {
struct IpCleanupGuard {
tracker: Arc<UserIpTracker>,
user: String,
ip: std::net::IpAddr,
}
impl Drop for IpCleanupGuard {
fn drop(&mut self) {
let tracker = self.tracker.clone();
let user = self.user.clone();
let ip = self.ip;
tokio::spawn(async move {
tracker.remove_ip(&user, ip).await;
debug!(user = %user, ip = %ip, "IP cleaned up on disconnect");
});
}
}
let _cleanup = IpCleanupGuard {
tracker: ip_tracker,
user: user.clone(),
ip: peer_addr.ip(),
};
// Decide: middle proxy or direct
if config.general.use_middle_proxy {
if let Some(ref pool) = me_pool { if let Some(ref pool) = me_pool {
return handle_via_middle_proxy( handle_via_middle_proxy(
client_reader, client_reader,
client_writer, client_writer,
success, success,
@@ -718,23 +726,38 @@ impl RunningClientHandler {
local_addr, local_addr,
rng, rng,
) )
.await; .await
} else {
warn!("use_middle_proxy=true but MePool not initialized, falling back to direct");
handle_via_direct(
client_reader,
client_writer,
success,
upstream_manager,
stats,
config,
buffer_pool,
rng,
)
.await
} }
warn!("use_middle_proxy=true but MePool not initialized, falling back to direct"); } else {
} // Direct mode (original behavior)
handle_via_direct(
client_reader,
client_writer,
success,
upstream_manager,
stats,
config,
buffer_pool,
rng,
)
.await
};
// Direct mode (original behavior) ip_tracker.remove_ip(&user, peer_addr.ip()).await;
handle_via_direct( relay_result
client_reader,
client_writer,
success,
upstream_manager,
stats,
config,
buffer_pool,
rng,
)
.await
} }
async fn check_user_limits_static( async fn check_user_limits_static(
@@ -752,22 +775,32 @@ impl RunningClientHandler {
}); });
} }
let mut ip_reserved = false;
// IP limit check // IP limit check
if let Err(reason) = ip_tracker.check_and_add(user, peer_addr.ip()).await { match ip_tracker.check_and_add(user, peer_addr.ip()).await {
warn!( Ok(()) => {
user = %user, ip_reserved = true;
ip = %peer_addr.ip(), }
reason = %reason, Err(reason) => {
"IP limit exceeded" warn!(
); user = %user,
return Err(ProxyError::ConnectionLimitExceeded { ip = %peer_addr.ip(),
user: user.to_string(), reason = %reason,
}); "IP limit exceeded"
);
return Err(ProxyError::ConnectionLimitExceeded {
user: user.to_string(),
});
}
} }
if let Some(limit) = config.access.user_max_tcp_conns.get(user) if let Some(limit) = config.access.user_max_tcp_conns.get(user)
&& stats.get_user_curr_connects(user) >= *limit as u64 && stats.get_user_curr_connects(user) >= *limit as u64
{ {
if ip_reserved {
ip_tracker.remove_ip(user, peer_addr.ip()).await;
stats.increment_ip_reservation_rollback_tcp_limit_total();
}
return Err(ProxyError::ConnectionLimitExceeded { return Err(ProxyError::ConnectionLimitExceeded {
user: user.to_string(), user: user.to_string(),
}); });
@@ -776,6 +809,10 @@ impl RunningClientHandler {
if let Some(quota) = config.access.user_data_quota.get(user) if let Some(quota) = config.access.user_data_quota.get(user)
&& stats.get_user_total_octets(user) >= *quota && stats.get_user_total_octets(user) >= *quota
{ {
if ip_reserved {
ip_tracker.remove_ip(user, peer_addr.ip()).await;
stats.increment_ip_reservation_rollback_quota_limit_total();
}
return Err(ProxyError::DataQuotaExceeded { return Err(ProxyError::DataQuotaExceeded {
user: user.to_string(), user: user.to_string(),
}); });

View File

@@ -34,7 +34,7 @@ where
let user = &success.user; let user = &success.user;
let dc_addr = get_dc_addr_static(success.dc_idx, &config)?; let dc_addr = get_dc_addr_static(success.dc_idx, &config)?;
info!( debug!(
user = %user, user = %user,
peer = %success.peer, peer = %success.peer,
dc = success.dc_idx, dc = success.dc_idx,
@@ -118,10 +118,16 @@ fn get_dc_addr_static(dc_idx: i16, config: &ProxyConfig) -> Result<SocketAddr> {
// Unknown DC requested by client without override: log and fall back. // Unknown DC requested by client without override: log and fall back.
if !config.dc_overrides.contains_key(&dc_key) { if !config.dc_overrides.contains_key(&dc_key) {
warn!(dc_idx = dc_idx, "Requested non-standard DC with no override; falling back to default cluster"); warn!(dc_idx = dc_idx, "Requested non-standard DC with no override; falling back to default cluster");
if let Some(path) = &config.general.unknown_dc_log_path if config.general.unknown_dc_file_log_enabled
&& let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) && let Some(path) = &config.general.unknown_dc_log_path
&& let Ok(handle) = tokio::runtime::Handle::try_current()
{ {
let _ = writeln!(file, "dc_idx={dc_idx}"); let path = path.clone();
handle.spawn_blocking(move || {
if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) {
let _ = writeln!(file, "dc_idx={dc_idx}");
}
});
} }
} }

View File

@@ -6,7 +6,7 @@ use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
use tracing::{debug, warn, trace, info}; use tracing::{debug, warn, trace};
use zeroize::Zeroize; use zeroize::Zeroize;
use crate::crypto::{sha256, AesCtr, SecureRandom}; use crate::crypto::{sha256, AesCtr, SecureRandom};
@@ -19,6 +19,31 @@ use crate::stats::ReplayChecker;
use crate::config::ProxyConfig; use crate::config::ProxyConfig;
use crate::tls_front::{TlsFrontCache, emulator}; use crate::tls_front::{TlsFrontCache, emulator};
fn decode_user_secrets(
config: &ProxyConfig,
preferred_user: Option<&str>,
) -> Vec<(String, Vec<u8>)> {
let mut secrets = Vec::with_capacity(config.access.users.len());
if let Some(preferred) = preferred_user
&& let Some(secret_hex) = config.access.users.get(preferred)
&& let Ok(bytes) = hex::decode(secret_hex)
{
secrets.push((preferred.to_string(), bytes));
}
for (name, secret_hex) in &config.access.users {
if preferred_user.is_some_and(|preferred| preferred == name.as_str()) {
continue;
}
if let Ok(bytes) = hex::decode(secret_hex) {
secrets.push((name.clone(), bytes));
}
}
secrets
}
/// Result of successful handshake /// Result of successful handshake
/// ///
/// Key material (`dec_key`, `dec_iv`, `enc_key`, `enc_iv`) is /// Key material (`dec_key`, `dec_iv`, `enc_key`, `enc_iv`) is
@@ -82,11 +107,7 @@ where
return HandshakeResult::BadClient { reader, writer }; return HandshakeResult::BadClient { reader, writer };
} }
let secrets: Vec<(String, Vec<u8>)> = config.access.users.iter() let secrets = decode_user_secrets(config, None);
.filter_map(|(name, hex)| {
hex::decode(hex).ok().map(|bytes| (name.clone(), bytes))
})
.collect();
let validation = match tls::validate_tls_handshake( let validation = match tls::validate_tls_handshake(
handshake, handshake,
@@ -201,7 +222,7 @@ where
return HandshakeResult::Error(ProxyError::Io(e)); return HandshakeResult::Error(ProxyError::Io(e));
} }
info!( debug!(
peer = %peer, peer = %peer,
user = %validation.user, user = %validation.user,
"TLS handshake successful" "TLS handshake successful"
@@ -223,6 +244,7 @@ pub async fn handle_mtproto_handshake<R, W>(
config: &ProxyConfig, config: &ProxyConfig,
replay_checker: &ReplayChecker, replay_checker: &ReplayChecker,
is_tls: bool, is_tls: bool,
preferred_user: Option<&str>,
) -> HandshakeResult<(CryptoReader<R>, CryptoWriter<W>, HandshakeSuccess), R, W> ) -> HandshakeResult<(CryptoReader<R>, CryptoWriter<W>, HandshakeSuccess), R, W>
where where
R: AsyncRead + Unpin + Send, R: AsyncRead + Unpin + Send,
@@ -239,11 +261,9 @@ where
let enc_prekey_iv: Vec<u8> = dec_prekey_iv.iter().rev().copied().collect(); let enc_prekey_iv: Vec<u8> = dec_prekey_iv.iter().rev().copied().collect();
for (user, secret_hex) in &config.access.users { let decoded_users = decode_user_secrets(config, preferred_user);
let secret = match hex::decode(secret_hex) {
Ok(s) => s, for (user, secret) in decoded_users {
Err(_) => continue,
};
let dec_prekey = &dec_prekey_iv[..PREKEY_LEN]; let dec_prekey = &dec_prekey_iv[..PREKEY_LEN];
let dec_iv_bytes = &dec_prekey_iv[PREKEY_LEN..]; let dec_iv_bytes = &dec_prekey_iv[PREKEY_LEN..];
@@ -311,7 +331,7 @@ where
is_tls, is_tls,
}; };
info!( debug!(
peer = %peer, peer = %peer,
user = %user, user = %user,
dc = dc_idx, dc = dc_idx,

View File

@@ -8,7 +8,7 @@ use std::time::{Duration, Instant};
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};
use tracing::{debug, info, trace, warn}; use tracing::{debug, trace, warn};
use crate::config::ProxyConfig; use crate::config::ProxyConfig;
use crate::crypto::SecureRandom; use crate::crypto::SecureRandom;
@@ -210,7 +210,7 @@ where
let proto_tag = success.proto_tag; let proto_tag = success.proto_tag;
let pool_generation = me_pool.current_generation(); let pool_generation = me_pool.current_generation();
info!( debug!(
user = %user, user = %user,
peer = %peer, peer = %peer,
dc = success.dc_idx, dc = success.dc_idx,

View File

@@ -100,6 +100,11 @@ pub struct Stats {
me_refill_failed_total: AtomicU64, me_refill_failed_total: AtomicU64,
me_writer_restored_same_endpoint_total: AtomicU64, me_writer_restored_same_endpoint_total: AtomicU64,
me_writer_restored_fallback_total: AtomicU64, me_writer_restored_fallback_total: AtomicU64,
me_no_writer_failfast_total: AtomicU64,
me_async_recovery_trigger_total: AtomicU64,
me_inline_recovery_total: AtomicU64,
ip_reservation_rollback_tcp_limit_total: AtomicU64,
ip_reservation_rollback_quota_limit_total: AtomicU64,
telemetry_core_enabled: AtomicBool, telemetry_core_enabled: AtomicBool,
telemetry_user_enabled: AtomicBool, telemetry_user_enabled: AtomicBool,
telemetry_me_level: AtomicU8, telemetry_me_level: AtomicU8,
@@ -522,6 +527,34 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed); .fetch_add(1, Ordering::Relaxed);
} }
} }
pub fn increment_me_no_writer_failfast_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_no_writer_failfast_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_async_recovery_trigger_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_async_recovery_trigger_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_inline_recovery_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_inline_recovery_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_ip_reservation_rollback_tcp_limit_total(&self) {
if self.telemetry_core_enabled() {
self.ip_reservation_rollback_tcp_limit_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_ip_reservation_rollback_quota_limit_total(&self) {
if self.telemetry_core_enabled() {
self.ip_reservation_rollback_quota_limit_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_endpoint_quarantine_total(&self) { pub fn increment_me_endpoint_quarantine_total(&self) {
if self.telemetry_me_allows_normal() { if self.telemetry_me_allows_normal() {
self.me_endpoint_quarantine_total self.me_endpoint_quarantine_total
@@ -791,21 +824,52 @@ impl Stats {
pub fn get_me_writer_restored_fallback_total(&self) -> u64 { pub fn get_me_writer_restored_fallback_total(&self) -> u64 {
self.me_writer_restored_fallback_total.load(Ordering::Relaxed) self.me_writer_restored_fallback_total.load(Ordering::Relaxed)
} }
pub fn get_me_no_writer_failfast_total(&self) -> u64 {
self.me_no_writer_failfast_total.load(Ordering::Relaxed)
}
pub fn get_me_async_recovery_trigger_total(&self) -> u64 {
self.me_async_recovery_trigger_total.load(Ordering::Relaxed)
}
pub fn get_me_inline_recovery_total(&self) -> u64 {
self.me_inline_recovery_total.load(Ordering::Relaxed)
}
pub fn get_ip_reservation_rollback_tcp_limit_total(&self) -> u64 {
self.ip_reservation_rollback_tcp_limit_total
.load(Ordering::Relaxed)
}
pub fn get_ip_reservation_rollback_quota_limit_total(&self) -> u64 {
self.ip_reservation_rollback_quota_limit_total
.load(Ordering::Relaxed)
}
pub fn increment_user_connects(&self, user: &str) { pub fn increment_user_connects(&self, user: &str) {
if !self.telemetry_user_enabled() { if !self.telemetry_user_enabled() {
return; return;
} }
self.user_stats.entry(user.to_string()).or_default() if let Some(stats) = self.user_stats.get(user) {
.connects.fetch_add(1, Ordering::Relaxed); stats.connects.fetch_add(1, Ordering::Relaxed);
return;
}
self.user_stats
.entry(user.to_string())
.or_default()
.connects
.fetch_add(1, Ordering::Relaxed);
} }
pub fn increment_user_curr_connects(&self, user: &str) { pub fn increment_user_curr_connects(&self, user: &str) {
if !self.telemetry_user_enabled() { if !self.telemetry_user_enabled() {
return; return;
} }
self.user_stats.entry(user.to_string()).or_default() if let Some(stats) = self.user_stats.get(user) {
.curr_connects.fetch_add(1, Ordering::Relaxed); stats.curr_connects.fetch_add(1, Ordering::Relaxed);
return;
}
self.user_stats
.entry(user.to_string())
.or_default()
.curr_connects
.fetch_add(1, Ordering::Relaxed);
} }
pub fn decrement_user_curr_connects(&self, user: &str) { pub fn decrement_user_curr_connects(&self, user: &str) {
@@ -839,32 +903,60 @@ impl Stats {
if !self.telemetry_user_enabled() { if !self.telemetry_user_enabled() {
return; return;
} }
self.user_stats.entry(user.to_string()).or_default() if let Some(stats) = self.user_stats.get(user) {
.octets_from_client.fetch_add(bytes, Ordering::Relaxed); stats.octets_from_client.fetch_add(bytes, Ordering::Relaxed);
return;
}
self.user_stats
.entry(user.to_string())
.or_default()
.octets_from_client
.fetch_add(bytes, Ordering::Relaxed);
} }
pub fn add_user_octets_to(&self, user: &str, bytes: u64) { pub fn add_user_octets_to(&self, user: &str, bytes: u64) {
if !self.telemetry_user_enabled() { if !self.telemetry_user_enabled() {
return; return;
} }
self.user_stats.entry(user.to_string()).or_default() if let Some(stats) = self.user_stats.get(user) {
.octets_to_client.fetch_add(bytes, Ordering::Relaxed); stats.octets_to_client.fetch_add(bytes, Ordering::Relaxed);
return;
}
self.user_stats
.entry(user.to_string())
.or_default()
.octets_to_client
.fetch_add(bytes, Ordering::Relaxed);
} }
pub fn increment_user_msgs_from(&self, user: &str) { pub fn increment_user_msgs_from(&self, user: &str) {
if !self.telemetry_user_enabled() { if !self.telemetry_user_enabled() {
return; return;
} }
self.user_stats.entry(user.to_string()).or_default() if let Some(stats) = self.user_stats.get(user) {
.msgs_from_client.fetch_add(1, Ordering::Relaxed); stats.msgs_from_client.fetch_add(1, Ordering::Relaxed);
return;
}
self.user_stats
.entry(user.to_string())
.or_default()
.msgs_from_client
.fetch_add(1, Ordering::Relaxed);
} }
pub fn increment_user_msgs_to(&self, user: &str) { pub fn increment_user_msgs_to(&self, user: &str) {
if !self.telemetry_user_enabled() { if !self.telemetry_user_enabled() {
return; return;
} }
self.user_stats.entry(user.to_string()).or_default() if let Some(stats) = self.user_stats.get(user) {
.msgs_to_client.fetch_add(1, Ordering::Relaxed); stats.msgs_to_client.fetch_add(1, Ordering::Relaxed);
return;
}
self.user_stats
.entry(user.to_string())
.or_default()
.msgs_to_client
.fetch_add(1, Ordering::Relaxed);
} }
pub fn get_user_total_octets(&self, user: &str) -> u64 { pub fn get_user_total_octets(&self, user: &str) -> u64 {

View File

@@ -1,6 +1,7 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::hash::{DefaultHasher, Hash, Hasher}; use std::hash::{DefaultHasher, Hash, Hasher};
use std::net::IpAddr; use std::net::IpAddr;
use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
@@ -42,6 +43,87 @@ pub struct ProxyConfigData {
pub proxy_for_lines: u32, pub proxy_for_lines: u32,
} }
pub fn parse_proxy_config_text(text: &str, http_status: u16) -> ProxyConfigData {
let mut map: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
let mut proxy_for_lines: u32 = 0;
for line in text.lines() {
if let Some((dc, ip, port)) = parse_proxy_line(line) {
map.entry(dc).or_default().push((ip, port));
proxy_for_lines = proxy_for_lines.saturating_add(1);
}
}
let default_dc = text.lines().find_map(|l| {
let t = l.trim();
if let Some(rest) = t.strip_prefix("default") {
return rest.trim().trim_end_matches(';').parse::<i32>().ok();
}
None
});
ProxyConfigData {
map,
default_dc,
http_status,
proxy_for_lines,
}
}
pub async fn load_proxy_config_cache(path: &str) -> Result<ProxyConfigData> {
let text = tokio::fs::read_to_string(path).await.map_err(|e| {
crate::error::ProxyError::Proxy(format!("read proxy-config cache '{path}' failed: {e}"))
})?;
Ok(parse_proxy_config_text(&text, 200))
}
pub async fn save_proxy_config_cache(path: &str, raw_text: &str) -> Result<()> {
if let Some(parent) = Path::new(path).parent()
&& !parent.as_os_str().is_empty()
{
tokio::fs::create_dir_all(parent).await.map_err(|e| {
crate::error::ProxyError::Proxy(format!(
"create proxy-config cache dir '{}' failed: {e}",
parent.display()
))
})?;
}
tokio::fs::write(path, raw_text).await.map_err(|e| {
crate::error::ProxyError::Proxy(format!("write proxy-config cache '{path}' failed: {e}"))
})?;
Ok(())
}
pub async fn fetch_proxy_config_with_raw(url: &str) -> Result<(ProxyConfigData, String)> {
let resp = reqwest::get(url)
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config GET failed: {e}")))?
;
let http_status = resp.status().as_u16();
if let Some(date) = resp.headers().get(reqwest::header::DATE)
&& let Ok(date_str) = date.to_str()
&& let Ok(server_time) = httpdate::parse_http_date(date_str)
&& let Ok(skew) = SystemTime::now().duration_since(server_time).or_else(|e| {
server_time.duration_since(SystemTime::now()).map_err(|_| e)
})
{
let skew_secs = skew.as_secs();
if skew_secs > 60 {
warn!(skew_secs, "Time skew >60s detected from fetch_proxy_config Date header");
} else if skew_secs > 30 {
warn!(skew_secs, "Time skew >30s detected from fetch_proxy_config Date header");
}
}
let text = resp
.text()
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config read failed: {e}")))?;
let parsed = parse_proxy_config_text(&text, http_status);
Ok((parsed, text))
}
#[derive(Debug, Default)] #[derive(Debug, Default)]
struct StableSnapshot { struct StableSnapshot {
candidate_hash: Option<u64>, candidate_hash: Option<u64>,
@@ -170,61 +252,9 @@ fn parse_proxy_line(line: &str) -> Option<(i32, IpAddr, u16)> {
} }
pub async fn fetch_proxy_config(url: &str) -> Result<ProxyConfigData> { pub async fn fetch_proxy_config(url: &str) -> Result<ProxyConfigData> {
let resp = reqwest::get(url) fetch_proxy_config_with_raw(url)
.await .await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config GET failed: {e}")))? .map(|(parsed, _raw)| parsed)
;
let http_status = resp.status().as_u16();
if let Some(date) = resp.headers().get(reqwest::header::DATE)
&& let Ok(date_str) = date.to_str()
&& let Ok(server_time) = httpdate::parse_http_date(date_str)
&& let Ok(skew) = SystemTime::now().duration_since(server_time).or_else(|e| {
server_time.duration_since(SystemTime::now()).map_err(|_| e)
})
{
let skew_secs = skew.as_secs();
if skew_secs > 60 {
warn!(skew_secs, "Time skew >60s detected from fetch_proxy_config Date header");
} else if skew_secs > 30 {
warn!(skew_secs, "Time skew >30s detected from fetch_proxy_config Date header");
}
}
let text = resp
.text()
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config read failed: {e}")))?;
let mut map: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
let mut proxy_for_lines: u32 = 0;
for line in text.lines() {
if let Some((dc, ip, port)) = parse_proxy_line(line) {
map.entry(dc).or_default().push((ip, port));
proxy_for_lines = proxy_for_lines.saturating_add(1);
}
}
let default_dc = text
.lines()
.find_map(|l| {
let t = l.trim();
if let Some(rest) = t.strip_prefix("default") {
return rest
.trim()
.trim_end_matches(';')
.parse::<i32>()
.ok();
}
None
});
Ok(ProxyConfigData {
map,
default_dc,
http_status,
proxy_for_lines,
})
} }
fn snapshot_passes_guards( fn snapshot_passes_guards(

View File

@@ -387,9 +387,11 @@ impl MePool {
socks_bound_addr.map(|value| value.ip()), socks_bound_addr.map(|value| value.ip()),
client_port_source, client_port_source,
); );
let mut kdf_fingerprint_guard = self.kdf_material_fingerprint.lock().await; let previous_kdf_fingerprint = {
if let Some((prev_fingerprint, prev_client_port)) = let kdf_fingerprint_guard = self.kdf_material_fingerprint.read().await;
kdf_fingerprint_guard.get(&peer_addr_nat).copied() kdf_fingerprint_guard.get(&peer_addr_nat).copied()
};
if let Some((prev_fingerprint, prev_client_port)) = previous_kdf_fingerprint
{ {
if prev_fingerprint != kdf_fingerprint { if prev_fingerprint != kdf_fingerprint {
self.stats.increment_me_kdf_drift_total(); self.stats.increment_me_kdf_drift_total();
@@ -416,6 +418,9 @@ impl MePool {
); );
} }
} }
// Keep fingerprint updates eventually consistent for diagnostics while avoiding
// serializing all concurrent handshakes on a single async mutex.
let mut kdf_fingerprint_guard = self.kdf_material_fingerprint.write().await;
kdf_fingerprint_guard.insert(peer_addr_nat, (kdf_fingerprint, client_port_for_kdf)); kdf_fingerprint_guard.insert(peer_addr_nat, (kdf_fingerprint, client_port_for_kdf));
drop(kdf_fingerprint_guard); drop(kdf_fingerprint_guard);

View File

@@ -295,15 +295,27 @@ async fn check_family(
let wait = Duration::from_millis(next_ms) let wait = Duration::from_millis(next_ms)
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1))); + Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
next_attempt.insert(key, now + wait); next_attempt.insert(key, now + wait);
warn!( if pool.is_runtime_ready() {
dc = %dc, warn!(
?family, dc = %dc,
alive = now_alive, ?family,
required, alive = now_alive,
endpoint_count = endpoints.len(), required,
backoff_ms = next_ms, endpoint_count = endpoints.len(),
"DC writer floor is below required level, scheduled reconnect" backoff_ms = next_ms,
); "DC writer floor is below required level, scheduled reconnect"
);
} else {
info!(
dc = %dc,
?family,
alive = now_alive,
required,
endpoint_count = endpoints.len(),
backoff_ms = next_ms,
"DC writer floor is below required level during startup, scheduled reconnect"
);
}
} }
if let Some(v) = inflight.get_mut(&key) { if let Some(v) = inflight.get_mut(&key) {
*v = v.saturating_sub(1); *v = v.saturating_sub(1);

View File

@@ -30,7 +30,11 @@ pub use pool::MePool;
pub use pool_nat::{stun_probe, detect_public_ip}; pub use pool_nat::{stun_probe, detect_public_ip};
pub use registry::ConnRegistry; pub use registry::ConnRegistry;
pub use secret::fetch_proxy_secret; pub use secret::fetch_proxy_secret;
pub use config_updater::{fetch_proxy_config, me_config_updater}; #[allow(unused_imports)]
pub use config_updater::{
ProxyConfigData, fetch_proxy_config, fetch_proxy_config_with_raw, load_proxy_config_cache,
me_config_updater, save_proxy_config_cache,
};
pub use rotation::{MeReinitTrigger, me_reinit_scheduler, me_rotation_task}; pub use rotation::{MeReinitTrigger, me_reinit_scheduler, me_rotation_task};
pub use wire::proto_flags_for_tag; pub use wire::proto_flags_for_tag;

View File

@@ -7,7 +7,7 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use tokio::sync::{Mutex, Notify, RwLock, mpsc}; use tokio::sync::{Mutex, Notify, RwLock, mpsc};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use crate::config::{MeBindStaleMode, MeFloorMode, MeSocksKdfPolicy}; use crate::config::{MeBindStaleMode, MeFloorMode, MeRouteNoWriterMode, MeSocksKdfPolicy};
use crate::crypto::SecureRandom; use crate::crypto::SecureRandom;
use crate::network::IpFamily; use crate::network::IpFamily;
use crate::network::probe::NetworkDecision; use crate::network::probe::NetworkDecision;
@@ -119,6 +119,8 @@ pub struct MePool {
pub(super) ping_tracker: Arc<Mutex<HashMap<i64, (std::time::Instant, u64)>>>, pub(super) ping_tracker: Arc<Mutex<HashMap<i64, (std::time::Instant, u64)>>>,
pub(super) rtt_stats: Arc<Mutex<HashMap<u64, (f64, f64)>>>, pub(super) rtt_stats: Arc<Mutex<HashMap<u64, (f64, f64)>>>,
pub(super) nat_reflection_cache: Arc<Mutex<NatReflectionCache>>, pub(super) nat_reflection_cache: Arc<Mutex<NatReflectionCache>>,
pub(super) nat_reflection_singleflight_v4: Arc<Mutex<()>>,
pub(super) nat_reflection_singleflight_v6: Arc<Mutex<()>>,
pub(super) writer_available: Arc<Notify>, pub(super) writer_available: Arc<Notify>,
pub(super) refill_inflight: Arc<Mutex<HashSet<SocketAddr>>>, pub(super) refill_inflight: Arc<Mutex<HashSet<SocketAddr>>>,
pub(super) refill_inflight_dc: Arc<Mutex<HashSet<RefillDcKey>>>, pub(super) refill_inflight_dc: Arc<Mutex<HashSet<RefillDcKey>>>,
@@ -132,7 +134,7 @@ pub struct MePool {
pub(super) pending_hardswap_map_hash: AtomicU64, pub(super) pending_hardswap_map_hash: AtomicU64,
pub(super) hardswap: AtomicBool, pub(super) hardswap: AtomicBool,
pub(super) endpoint_quarantine: Arc<Mutex<HashMap<SocketAddr, Instant>>>, pub(super) endpoint_quarantine: Arc<Mutex<HashMap<SocketAddr, Instant>>>,
pub(super) kdf_material_fingerprint: Arc<Mutex<HashMap<SocketAddr, (u64, u16)>>>, pub(super) kdf_material_fingerprint: Arc<RwLock<HashMap<SocketAddr, (u64, u16)>>>,
pub(super) me_pool_drain_ttl_secs: AtomicU64, pub(super) me_pool_drain_ttl_secs: AtomicU64,
pub(super) me_pool_force_close_secs: AtomicU64, pub(super) me_pool_force_close_secs: AtomicU64,
pub(super) me_pool_min_fresh_ratio_permille: AtomicU32, pub(super) me_pool_min_fresh_ratio_permille: AtomicU32,
@@ -145,6 +147,11 @@ pub struct MePool {
pub(super) secret_atomic_snapshot: AtomicBool, pub(super) secret_atomic_snapshot: AtomicBool,
pub(super) me_deterministic_writer_sort: AtomicBool, pub(super) me_deterministic_writer_sort: AtomicBool,
pub(super) me_socks_kdf_policy: AtomicU8, pub(super) me_socks_kdf_policy: AtomicU8,
pub(super) me_route_no_writer_mode: AtomicU8,
pub(super) me_route_no_writer_wait: Duration,
pub(super) me_route_inline_recovery_attempts: u32,
pub(super) me_route_inline_recovery_wait: Duration,
pub(super) runtime_ready: AtomicBool,
pool_size: usize, pool_size: usize,
} }
@@ -227,6 +234,10 @@ impl MePool {
me_route_backpressure_base_timeout_ms: u64, me_route_backpressure_base_timeout_ms: u64,
me_route_backpressure_high_timeout_ms: u64, me_route_backpressure_high_timeout_ms: u64,
me_route_backpressure_high_watermark_pct: u8, me_route_backpressure_high_watermark_pct: u8,
me_route_no_writer_mode: MeRouteNoWriterMode,
me_route_no_writer_wait_ms: u64,
me_route_inline_recovery_attempts: u32,
me_route_inline_recovery_wait_ms: u64,
) -> Arc<Self> { ) -> Arc<Self> {
let registry = Arc::new(ConnRegistry::new()); let registry = Arc::new(ConnRegistry::new());
registry.update_route_backpressure_policy( registry.update_route_backpressure_policy(
@@ -314,6 +325,8 @@ impl MePool {
ping_tracker: Arc::new(Mutex::new(HashMap::new())), ping_tracker: Arc::new(Mutex::new(HashMap::new())),
rtt_stats: Arc::new(Mutex::new(HashMap::new())), rtt_stats: Arc::new(Mutex::new(HashMap::new())),
nat_reflection_cache: Arc::new(Mutex::new(NatReflectionCache::default())), nat_reflection_cache: Arc::new(Mutex::new(NatReflectionCache::default())),
nat_reflection_singleflight_v4: Arc::new(Mutex::new(())),
nat_reflection_singleflight_v6: Arc::new(Mutex::new(())),
writer_available: Arc::new(Notify::new()), writer_available: Arc::new(Notify::new()),
refill_inflight: Arc::new(Mutex::new(HashSet::new())), refill_inflight: Arc::new(Mutex::new(HashSet::new())),
refill_inflight_dc: Arc::new(Mutex::new(HashSet::new())), refill_inflight_dc: Arc::new(Mutex::new(HashSet::new())),
@@ -326,7 +339,7 @@ impl MePool {
pending_hardswap_map_hash: AtomicU64::new(0), pending_hardswap_map_hash: AtomicU64::new(0),
hardswap: AtomicBool::new(hardswap), hardswap: AtomicBool::new(hardswap),
endpoint_quarantine: Arc::new(Mutex::new(HashMap::new())), endpoint_quarantine: Arc::new(Mutex::new(HashMap::new())),
kdf_material_fingerprint: Arc::new(Mutex::new(HashMap::new())), kdf_material_fingerprint: Arc::new(RwLock::new(HashMap::new())),
me_pool_drain_ttl_secs: AtomicU64::new(me_pool_drain_ttl_secs), me_pool_drain_ttl_secs: AtomicU64::new(me_pool_drain_ttl_secs),
me_pool_force_close_secs: AtomicU64::new(me_pool_force_close_secs), me_pool_force_close_secs: AtomicU64::new(me_pool_force_close_secs),
me_pool_min_fresh_ratio_permille: AtomicU32::new(Self::ratio_to_permille( me_pool_min_fresh_ratio_permille: AtomicU32::new(Self::ratio_to_permille(
@@ -343,6 +356,11 @@ impl MePool {
secret_atomic_snapshot: AtomicBool::new(me_secret_atomic_snapshot), secret_atomic_snapshot: AtomicBool::new(me_secret_atomic_snapshot),
me_deterministic_writer_sort: AtomicBool::new(me_deterministic_writer_sort), me_deterministic_writer_sort: AtomicBool::new(me_deterministic_writer_sort),
me_socks_kdf_policy: AtomicU8::new(me_socks_kdf_policy.as_u8()), me_socks_kdf_policy: AtomicU8::new(me_socks_kdf_policy.as_u8()),
me_route_no_writer_mode: AtomicU8::new(me_route_no_writer_mode.as_u8()),
me_route_no_writer_wait: Duration::from_millis(me_route_no_writer_wait_ms),
me_route_inline_recovery_attempts,
me_route_inline_recovery_wait: Duration::from_millis(me_route_inline_recovery_wait_ms),
runtime_ready: AtomicBool::new(false),
}) })
} }
@@ -350,6 +368,14 @@ impl MePool {
self.active_generation.load(Ordering::Relaxed) self.active_generation.load(Ordering::Relaxed)
} }
pub fn set_runtime_ready(&self, ready: bool) {
self.runtime_ready.store(ready, Ordering::Relaxed);
}
pub fn is_runtime_ready(&self) -> bool {
self.runtime_ready.load(Ordering::Relaxed)
}
pub fn update_runtime_reinit_policy( pub fn update_runtime_reinit_policy(
&self, &self,
hardswap: bool, hardswap: bool,

View File

@@ -14,10 +14,12 @@ use super::pool::MePool;
impl MePool { impl MePool {
pub async fn init(self: &Arc<Self>, pool_size: usize, rng: &Arc<SecureRandom>) -> Result<()> { pub async fn init(self: &Arc<Self>, pool_size: usize, rng: &Arc<SecureRandom>) -> Result<()> {
let family_order = self.family_order(); let family_order = self.family_order();
let connect_concurrency = self.me_reconnect_max_concurrent_per_dc.max(1) as usize;
let ks = self.key_selector().await; let ks = self.key_selector().await;
info!( info!(
me_servers = self.proxy_map_v4.read().await.len(), me_servers = self.proxy_map_v4.read().await.len(),
pool_size, pool_size,
connect_concurrency,
key_selector = format_args!("0x{ks:08x}"), key_selector = format_args!("0x{ks:08x}"),
secret_len = self.proxy_secret.read().await.secret.len(), secret_len = self.proxy_secret.read().await.secret.len(),
"Initializing ME pool" "Initializing ME pool"
@@ -41,23 +43,39 @@ impl MePool {
}) })
.collect(); .collect();
dc_addrs.sort_unstable_by_key(|(dc, _)| *dc); dc_addrs.sort_unstable_by_key(|(dc, _)| *dc);
dc_addrs.sort_by_key(|(_, addrs)| (addrs.len() != 1, addrs.len()));
// Ensure at least one live writer per DC group; run missing DCs in parallel. // Stage 1: build base coverage for conditional-cast.
// Single-endpoint DCs are prefilled first; multi-endpoint DCs require one live writer.
let mut join = tokio::task::JoinSet::new(); let mut join = tokio::task::JoinSet::new();
for (dc, addrs) in dc_addrs.iter().cloned() { for (dc, addrs) in dc_addrs.iter().cloned() {
if addrs.is_empty() { if addrs.is_empty() {
continue; continue;
} }
let target_writers = if addrs.len() == 1 {
self.required_writers_for_dc_with_floor_mode(addrs.len(), false)
} else {
1usize
};
let endpoints: HashSet<SocketAddr> = addrs let endpoints: HashSet<SocketAddr> = addrs
.iter() .iter()
.map(|(ip, port)| SocketAddr::new(*ip, *port)) .map(|(ip, port)| SocketAddr::new(*ip, *port))
.collect(); .collect();
if self.active_writer_count_for_endpoints(&endpoints).await > 0 { if self.active_writer_count_for_endpoints(&endpoints).await >= target_writers {
continue; continue;
} }
let pool = Arc::clone(self); let pool = Arc::clone(self);
let rng_clone = Arc::clone(rng); let rng_clone = Arc::clone(rng);
join.spawn(async move { pool.connect_primary_for_dc(dc, addrs, rng_clone).await }); join.spawn(async move {
pool.connect_primary_for_dc(
dc,
addrs,
target_writers,
rng_clone,
connect_concurrency,
)
.await
});
} }
while join.join_next().await.is_some() {} while join.join_next().await.is_some() {}
@@ -77,47 +95,35 @@ impl MePool {
))); )));
} }
// Warm reserve writers asynchronously so startup does not block after first working pool is ready. // Stage 2: continue saturating multi-endpoint DC groups in background.
let pool = Arc::clone(self); let pool = Arc::clone(self);
let rng_clone = Arc::clone(rng); let rng_clone = Arc::clone(rng);
let dc_addrs_bg = dc_addrs.clone(); let dc_addrs_bg = dc_addrs.clone();
tokio::spawn(async move { tokio::spawn(async move {
if pool.me_warmup_stagger_enabled { let mut join_bg = tokio::task::JoinSet::new();
for (dc, addrs) in &dc_addrs_bg { for (dc, addrs) in dc_addrs_bg {
for (ip, port) in addrs { if addrs.len() <= 1 {
if pool.connection_count() >= pool_size { continue;
break;
}
let addr = SocketAddr::new(*ip, *port);
let jitter = rand::rng()
.random_range(0..=pool.me_warmup_step_jitter.as_millis() as u64);
let delay_ms = pool.me_warmup_step_delay.as_millis() as u64 + jitter;
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
if let Err(e) = pool.connect_one(addr, rng_clone.as_ref()).await {
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed (staggered)");
}
}
}
} else {
for (dc, addrs) in &dc_addrs_bg {
for (ip, port) in addrs {
if pool.connection_count() >= pool_size {
break;
}
let addr = SocketAddr::new(*ip, *port);
if let Err(e) = pool.connect_one(addr, rng_clone.as_ref()).await {
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed");
}
}
if pool.connection_count() >= pool_size {
break;
}
} }
let target_writers = pool.required_writers_for_dc_with_floor_mode(addrs.len(), false);
let pool_clone = Arc::clone(&pool);
let rng_clone_local = Arc::clone(&rng_clone);
join_bg.spawn(async move {
pool_clone
.connect_primary_for_dc(
dc,
addrs,
target_writers,
rng_clone_local,
connect_concurrency,
)
.await
});
} }
while join_bg.join_next().await.is_some() {}
debug!( debug!(
target_pool_size = pool_size,
current_pool_size = pool.connection_count(), current_pool_size = pool.connection_count(),
"Background ME reserve warmup finished" "Background ME saturation warmup finished"
); );
}); });
@@ -140,62 +146,85 @@ impl MePool {
self: Arc<Self>, self: Arc<Self>,
dc: i32, dc: i32,
mut addrs: Vec<(IpAddr, u16)>, mut addrs: Vec<(IpAddr, u16)>,
target_writers: usize,
rng: Arc<SecureRandom>, rng: Arc<SecureRandom>,
connect_concurrency: usize,
) -> bool { ) -> bool {
if addrs.is_empty() { if addrs.is_empty() {
return false; return false;
} }
let target_writers = target_writers.max(1);
addrs.shuffle(&mut rand::rng()); addrs.shuffle(&mut rand::rng());
if addrs.len() > 1 { let endpoints: Vec<SocketAddr> = addrs
let concurrency = 2usize; .iter()
.map(|(ip, port)| SocketAddr::new(*ip, *port))
.collect();
let endpoint_set: HashSet<SocketAddr> = endpoints.iter().copied().collect();
loop {
let alive = self.active_writer_count_for_endpoints(&endpoint_set).await;
if alive >= target_writers {
info!(
dc = %dc,
alive,
target_writers,
"ME connected"
);
return true;
}
let missing = target_writers.saturating_sub(alive).max(1);
let concurrency = connect_concurrency.max(1).min(missing);
let mut join = tokio::task::JoinSet::new(); let mut join = tokio::task::JoinSet::new();
let mut next_idx = 0usize; for _ in 0..concurrency {
let pool = Arc::clone(&self);
let rng_clone = Arc::clone(&rng);
let endpoints_clone = endpoints.clone();
join.spawn(async move {
pool.connect_endpoints_round_robin(&endpoints_clone, rng_clone.as_ref())
.await
});
}
while next_idx < addrs.len() || !join.is_empty() { let mut progress = false;
while next_idx < addrs.len() && join.len() < concurrency { while let Some(res) = join.join_next().await {
let (ip, port) = addrs[next_idx];
next_idx += 1;
let addr = SocketAddr::new(ip, port);
let pool = Arc::clone(&self);
let rng_clone = Arc::clone(&rng);
join.spawn(async move {
(addr, pool.connect_one(addr, rng_clone.as_ref()).await)
});
}
let Some(res) = join.join_next().await else {
break;
};
match res { match res {
Ok((addr, Ok(()))) => { Ok(true) => {
info!(%addr, dc = %dc, "ME connected"); progress = true;
join.abort_all();
while join.join_next().await.is_some() {}
return true;
}
Ok((addr, Err(e))) => {
warn!(%addr, dc = %dc, error = %e, "ME connect failed, trying next");
} }
Ok(false) => {}
Err(e) => { Err(e) => {
warn!(dc = %dc, error = %e, "ME connect task failed"); warn!(dc = %dc, error = %e, "ME connect task failed");
} }
} }
} }
warn!(dc = %dc, "All ME servers for DC failed at init");
return false;
}
for (ip, port) in addrs { let alive_after = self.active_writer_count_for_endpoints(&endpoint_set).await;
let addr = SocketAddr::new(ip, port); if alive_after >= target_writers {
match self.connect_one(addr, rng.as_ref()).await { info!(
Ok(()) => { dc = %dc,
info!(%addr, dc = %dc, "ME connected"); alive = alive_after,
return true; target_writers,
} "ME connected"
Err(e) => warn!(%addr, dc = %dc, error = %e, "ME connect failed, trying next"), );
return true;
}
if !progress {
warn!(
dc = %dc,
alive = alive_after,
target_writers,
"All ME servers for DC failed at init"
);
return false;
}
if self.me_warmup_stagger_enabled {
let jitter = rand::rng()
.random_range(0..=self.me_warmup_step_jitter.as_millis() as u64);
let delay_ms = self.me_warmup_step_delay.as_millis() as u64 + jitter;
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
} }
} }
warn!(dc = %dc, "All ME servers for DC failed at init");
false
} }
} }

View File

@@ -248,6 +248,43 @@ impl MePool {
} }
} }
let _singleflight_guard = if use_shared_cache {
Some(match family {
IpFamily::V4 => self.nat_reflection_singleflight_v4.lock().await,
IpFamily::V6 => self.nat_reflection_singleflight_v6.lock().await,
})
} else {
None
};
if use_shared_cache
&& let Some(until) = *self.stun_backoff_until.read().await
&& Instant::now() < until
{
if let Ok(cache) = self.nat_reflection_cache.try_lock() {
let slot = match family {
IpFamily::V4 => cache.v4,
IpFamily::V6 => cache.v6,
};
return slot.map(|(_, addr)| addr);
}
return None;
}
if use_shared_cache
&& let Ok(mut cache) = self.nat_reflection_cache.try_lock()
{
let slot = match family {
IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6,
};
if let Some((ts, addr)) = slot
&& ts.elapsed() < STUN_CACHE_TTL
{
return Some(*addr);
}
}
let attempt = if use_shared_cache { let attempt = if use_shared_cache {
self.nat_probe_attempts.fetch_add(1, std::sync::atomic::Ordering::Relaxed) self.nat_probe_attempts.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
} else { } else {

View File

@@ -100,6 +100,134 @@ pub(crate) struct MeApiRuntimeSnapshot {
} }
impl MePool { impl MePool {
pub(crate) async fn admission_ready_conditional_cast(&self) -> bool {
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
if self.decision.ipv4_me {
let map = self.proxy_map_v4.read().await.clone();
for (dc, addrs) in map {
let abs_dc = dc.abs();
if abs_dc == 0 {
continue;
}
let Ok(dc_idx) = i16::try_from(abs_dc) else {
continue;
};
let entry = endpoints_by_dc.entry(dc_idx).or_default();
for (ip, port) in addrs {
entry.insert(SocketAddr::new(ip, port));
}
}
}
if self.decision.ipv6_me {
let map = self.proxy_map_v6.read().await.clone();
for (dc, addrs) in map {
let abs_dc = dc.abs();
if abs_dc == 0 {
continue;
}
let Ok(dc_idx) = i16::try_from(abs_dc) else {
continue;
};
let entry = endpoints_by_dc.entry(dc_idx).or_default();
for (ip, port) in addrs {
entry.insert(SocketAddr::new(ip, port));
}
}
}
if endpoints_by_dc.is_empty() {
return false;
}
let writers = self.writers.read().await.clone();
let mut live_writers_by_endpoint = HashMap::<SocketAddr, usize>::new();
for writer in writers {
if writer.draining.load(Ordering::Relaxed) {
continue;
}
*live_writers_by_endpoint.entry(writer.addr).or_insert(0) += 1;
}
for endpoints in endpoints_by_dc.values() {
let alive: usize = endpoints
.iter()
.map(|endpoint| live_writers_by_endpoint.get(endpoint).copied().unwrap_or(0))
.sum();
if alive == 0 {
return false;
}
}
true
}
#[allow(dead_code)]
pub(crate) async fn admission_ready_full_floor(&self) -> bool {
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
if self.decision.ipv4_me {
let map = self.proxy_map_v4.read().await.clone();
for (dc, addrs) in map {
let abs_dc = dc.abs();
if abs_dc == 0 {
continue;
}
let Ok(dc_idx) = i16::try_from(abs_dc) else {
continue;
};
let entry = endpoints_by_dc.entry(dc_idx).or_default();
for (ip, port) in addrs {
entry.insert(SocketAddr::new(ip, port));
}
}
}
if self.decision.ipv6_me {
let map = self.proxy_map_v6.read().await.clone();
for (dc, addrs) in map {
let abs_dc = dc.abs();
if abs_dc == 0 {
continue;
}
let Ok(dc_idx) = i16::try_from(abs_dc) else {
continue;
};
let entry = endpoints_by_dc.entry(dc_idx).or_default();
for (ip, port) in addrs {
entry.insert(SocketAddr::new(ip, port));
}
}
}
if endpoints_by_dc.is_empty() {
return false;
}
let writers = self.writers.read().await.clone();
let mut live_writers_by_endpoint = HashMap::<SocketAddr, usize>::new();
for writer in writers {
if writer.draining.load(Ordering::Relaxed) {
continue;
}
*live_writers_by_endpoint.entry(writer.addr).or_insert(0) += 1;
}
for endpoints in endpoints_by_dc.values() {
let endpoint_count = endpoints.len();
if endpoint_count == 0 {
return false;
}
let required = self.required_writers_for_dc_with_floor_mode(endpoint_count, false);
let alive: usize = endpoints
.iter()
.map(|endpoint| live_writers_by_endpoint.get(endpoint).copied().unwrap_or(0))
.sum();
if alive < required {
return false;
}
}
true
}
pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot { pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot {
let now_epoch_secs = Self::now_epoch_secs(); let now_epoch_secs = Self::now_epoch_secs();

View File

@@ -124,7 +124,7 @@ pub(crate) async fn reader_loop(
let data = Bytes::copy_from_slice(&body[12..]); let data = Bytes::copy_from_slice(&body[12..]);
trace!(cid, flags, len = data.len(), "RPC_PROXY_ANS"); trace!(cid, flags, len = data.len(), "RPC_PROXY_ANS");
let routed = reg.route(cid, MeResponse::Data { flags, data }).await; let routed = reg.route_nowait(cid, MeResponse::Data { flags, data }).await;
if !matches!(routed, RouteResult::Routed) { if !matches!(routed, RouteResult::Routed) {
match routed { match routed {
RouteResult::NoConn => stats.increment_me_route_drop_no_conn(), RouteResult::NoConn => stats.increment_me_route_drop_no_conn(),
@@ -147,7 +147,7 @@ pub(crate) async fn reader_loop(
let cfm = u32::from_le_bytes(body[8..12].try_into().unwrap()); let cfm = u32::from_le_bytes(body[8..12].try_into().unwrap());
trace!(cid, cfm, "RPC_SIMPLE_ACK"); trace!(cid, cfm, "RPC_SIMPLE_ACK");
let routed = reg.route(cid, MeResponse::Ack(cfm)).await; let routed = reg.route_nowait(cid, MeResponse::Ack(cfm)).await;
if !matches!(routed, RouteResult::Routed) { if !matches!(routed, RouteResult::Routed) {
match routed { match routed {
RouteResult::NoConn => stats.increment_me_route_drop_no_conn(), RouteResult::NoConn => stats.increment_me_route_drop_no_conn(),

View File

@@ -208,6 +208,23 @@ impl ConnRegistry {
} }
} }
pub async fn route_nowait(&self, id: u64, resp: MeResponse) -> RouteResult {
let tx = {
let inner = self.inner.read().await;
inner.map.get(&id).cloned()
};
let Some(tx) = tx else {
return RouteResult::NoConn;
};
match tx.try_send(resp) {
Ok(()) => RouteResult::Routed,
Err(TrySendError::Closed(_)) => RouteResult::ChannelClosed,
Err(TrySendError::Full(_)) => RouteResult::QueueFullBase,
}
}
pub async fn bind_writer( pub async fn bind_writer(
&self, &self,
conn_id: u64, conn_id: u64,
@@ -278,6 +295,11 @@ impl ConnRegistry {
Some(ConnWriter { writer_id, tx: writer }) Some(ConnWriter { writer_id, tx: writer })
} }
pub async fn active_conn_ids(&self) -> Vec<u64> {
let inner = self.inner.read().await;
inner.writer_for_conn.keys().copied().collect()
}
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> { pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
let mut inner = self.inner.write().await; let mut inner = self.inner.write().await;
inner.writers.remove(&writer_id); inner.writers.remove(&writer_id);

View File

@@ -1,16 +1,17 @@
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::HashMap; use std::collections::{HashMap, HashSet};
use std::net::SocketAddr; use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use std::time::Duration; use std::time::{Duration, Instant};
use tokio::sync::mpsc::error::TrySendError; use tokio::sync::mpsc::error::TrySendError;
use tracing::{debug, warn}; use tracing::{debug, warn};
use crate::config::MeRouteNoWriterMode;
use crate::error::{ProxyError, Result}; use crate::error::{ProxyError, Result};
use crate::network::IpFamily; use crate::network::IpFamily;
use crate::protocol::constants::RPC_CLOSE_EXT_U32; use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32};
use super::MePool; use super::MePool;
use super::codec::WriterCommand; use super::codec::WriterCommand;
@@ -21,6 +22,7 @@ use super::registry::ConnMeta;
const IDLE_WRITER_PENALTY_MID_SECS: u64 = 45; const IDLE_WRITER_PENALTY_MID_SECS: u64 = 45;
const IDLE_WRITER_PENALTY_HIGH_SECS: u64 = 55; const IDLE_WRITER_PENALTY_HIGH_SECS: u64 = 55;
const HYBRID_GLOBAL_BURST_PERIOD_ROUNDS: u32 = 4;
impl MePool { impl MePool {
/// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default. /// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default.
@@ -49,7 +51,14 @@ impl MePool {
our_addr, our_addr,
proto_flags, proto_flags,
}; };
let mut emergency_attempts = 0; let no_writer_mode =
MeRouteNoWriterMode::from_u8(self.me_route_no_writer_mode.load(Ordering::Relaxed));
let mut no_writer_deadline: Option<Instant> = None;
let mut emergency_attempts = 0u32;
let mut async_recovery_triggered = false;
let mut hybrid_recovery_round = 0u32;
let mut hybrid_last_recovery_at: Option<Instant> = None;
let hybrid_wait_step = self.me_route_no_writer_wait.max(Duration::from_millis(50));
loop { loop {
if let Some(current) = self.registry.get_writer(conn_id).await { if let Some(current) = self.registry.get_writer(conn_id).await {
@@ -74,34 +83,78 @@ impl MePool {
let mut writers_snapshot = { let mut writers_snapshot = {
let ws = self.writers.read().await; let ws = self.writers.read().await;
if ws.is_empty() { if ws.is_empty() {
// Create waiter before recovery attempts so notify_one permits are not missed.
let waiter = self.writer_available.notified();
drop(ws); drop(ws);
for family in self.family_order() { match no_writer_mode {
let map = match family { MeRouteNoWriterMode::AsyncRecoveryFailfast => {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(), let deadline = *no_writer_deadline.get_or_insert_with(|| {
IpFamily::V6 => self.proxy_map_v6.read().await.clone(), Instant::now() + self.me_route_no_writer_wait
}; });
for (_dc, addrs) in map.iter() { if !async_recovery_triggered {
for (ip, port) in addrs { let triggered =
let addr = SocketAddr::new(*ip, *port); self.trigger_async_recovery_for_target_dc(target_dc).await;
if self.connect_one(addr, self.rng.as_ref()).await.is_ok() { if !triggered {
self.writer_available.notify_one(); self.trigger_async_recovery_global().await;
}
async_recovery_triggered = true;
}
if self.wait_for_writer_until(deadline).await {
continue;
}
self.stats.increment_me_no_writer_failfast_total();
return Err(ProxyError::Proxy(
"No ME writer available in failfast window".into(),
));
}
MeRouteNoWriterMode::InlineRecoveryLegacy => {
self.stats.increment_me_inline_recovery_total();
for _ in 0..self.me_route_inline_recovery_attempts.max(1) {
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
for (_dc, addrs) in &map {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
let _ = self.connect_one(addr, self.rng.as_ref()).await;
}
}
}
if !self.writers.read().await.is_empty() {
break; break;
} }
} }
} if !self.writers.read().await.is_empty() {
} continue;
if !self.writers.read().await.is_empty() { }
continue; let waiter = self.writer_available.notified();
} if tokio::time::timeout(self.me_route_inline_recovery_wait, waiter)
if tokio::time::timeout(Duration::from_secs(3), waiter).await.is_err() { .await
if !self.writers.read().await.is_empty() { .is_err()
{
if !self.writers.read().await.is_empty() {
continue;
}
self.stats.increment_me_no_writer_failfast_total();
return Err(ProxyError::Proxy(
"All ME connections dead (legacy wait timeout)".into(),
));
}
continue;
}
MeRouteNoWriterMode::HybridAsyncPersistent => {
self.maybe_trigger_hybrid_recovery(
target_dc,
&mut hybrid_recovery_round,
&mut hybrid_last_recovery_at,
hybrid_wait_step,
)
.await;
let deadline = Instant::now() + hybrid_wait_step;
let _ = self.wait_for_writer_until(deadline).await;
continue; continue;
} }
return Err(ProxyError::Proxy("All ME connections dead (waited 3s)".into()));
} }
continue;
} }
ws.clone() ws.clone()
}; };
@@ -115,45 +168,81 @@ impl MePool {
.await; .await;
} }
if candidate_indices.is_empty() { if candidate_indices.is_empty() {
// Emergency connect-on-demand match no_writer_mode {
if emergency_attempts >= 3 { MeRouteNoWriterMode::AsyncRecoveryFailfast => {
return Err(ProxyError::Proxy("No ME writers available for target DC".into())); let deadline = *no_writer_deadline.get_or_insert_with(|| {
} Instant::now() + self.me_route_no_writer_wait
emergency_attempts += 1; });
for family in self.family_order() { if !async_recovery_triggered {
let map_guard = match family { let triggered = self.trigger_async_recovery_for_target_dc(target_dc).await;
IpFamily::V4 => self.proxy_map_v4.read().await, if !triggered {
IpFamily::V6 => self.proxy_map_v6.read().await, self.trigger_async_recovery_global().await;
}; }
if let Some(addrs) = map_guard.get(&(target_dc as i32)) { async_recovery_triggered = true;
let mut shuffled = addrs.clone(); }
shuffled.shuffle(&mut rand::rng()); if self.wait_for_candidate_until(target_dc, deadline).await {
drop(map_guard); continue;
for (ip, port) in shuffled { }
let addr = SocketAddr::new(ip, port); self.stats.increment_me_no_writer_failfast_total();
if self.connect_one(addr, self.rng.as_ref()).await.is_ok() { return Err(ProxyError::Proxy(
break; "No ME writers available for target DC in failfast window".into(),
));
}
MeRouteNoWriterMode::InlineRecoveryLegacy => {
self.stats.increment_me_inline_recovery_total();
if emergency_attempts >= self.me_route_inline_recovery_attempts.max(1) {
self.stats.increment_me_no_writer_failfast_total();
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
}
emergency_attempts += 1;
for family in self.family_order() {
let map_guard = match family {
IpFamily::V4 => self.proxy_map_v4.read().await,
IpFamily::V6 => self.proxy_map_v6.read().await,
};
if let Some(addrs) = map_guard.get(&(target_dc as i32)) {
let mut shuffled = addrs.clone();
shuffled.shuffle(&mut rand::rng());
drop(map_guard);
for (ip, port) in shuffled {
let addr = SocketAddr::new(ip, port);
if self.connect_one(addr, self.rng.as_ref()).await.is_ok() {
break;
}
}
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts as u64)).await;
let ws2 = self.writers.read().await;
writers_snapshot = ws2.clone();
drop(ws2);
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, false)
.await;
if candidate_indices.is_empty() {
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, true)
.await;
}
if !candidate_indices.is_empty() {
break;
}
} }
} }
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts)).await;
let ws2 = self.writers.read().await;
writers_snapshot = ws2.clone();
drop(ws2);
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, false)
.await;
if candidate_indices.is_empty() { if candidate_indices.is_empty() {
candidate_indices = self return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
.candidate_indices_for_dc(&writers_snapshot, target_dc, true)
.await;
}
if !candidate_indices.is_empty() {
break;
} }
} }
} MeRouteNoWriterMode::HybridAsyncPersistent => {
if candidate_indices.is_empty() { self.maybe_trigger_hybrid_recovery(
return Err(ProxyError::Proxy("No ME writers available for target DC".into())); target_dc,
&mut hybrid_recovery_round,
&mut hybrid_last_recovery_at,
hybrid_wait_step,
)
.await;
let deadline = Instant::now() + hybrid_wait_step;
let _ = self.wait_for_candidate_until(target_dc, deadline).await;
continue;
}
} }
} }
let writer_idle_since = self.registry.writer_idle_since_snapshot().await; let writer_idle_since = self.registry.writer_idle_since_snapshot().await;
@@ -275,6 +364,151 @@ impl MePool {
} }
} }
async fn wait_for_writer_until(&self, deadline: Instant) -> bool {
let waiter = self.writer_available.notified();
if !self.writers.read().await.is_empty() {
return true;
}
let now = Instant::now();
if now >= deadline {
return !self.writers.read().await.is_empty();
}
let timeout = deadline.saturating_duration_since(now);
if tokio::time::timeout(timeout, waiter).await.is_ok() {
return true;
}
!self.writers.read().await.is_empty()
}
async fn wait_for_candidate_until(&self, target_dc: i16, deadline: Instant) -> bool {
loop {
if self.has_candidate_for_target_dc(target_dc).await {
return true;
}
let now = Instant::now();
if now >= deadline {
return self.has_candidate_for_target_dc(target_dc).await;
}
let remaining = deadline.saturating_duration_since(now);
let sleep_for = remaining.min(Duration::from_millis(25));
let waiter = self.writer_available.notified();
tokio::select! {
_ = waiter => {}
_ = tokio::time::sleep(sleep_for) => {}
}
}
}
async fn has_candidate_for_target_dc(&self, target_dc: i16) -> bool {
let writers_snapshot = {
let ws = self.writers.read().await;
if ws.is_empty() {
return false;
}
ws.clone()
};
let mut candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, false)
.await;
if candidate_indices.is_empty() {
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, true)
.await;
}
!candidate_indices.is_empty()
}
async fn trigger_async_recovery_for_target_dc(self: &Arc<Self>, target_dc: i16) -> bool {
let endpoints = self.endpoint_candidates_for_target_dc(target_dc).await;
if endpoints.is_empty() {
return false;
}
self.stats.increment_me_async_recovery_trigger_total();
for addr in endpoints.into_iter().take(8) {
self.trigger_immediate_refill(addr);
}
true
}
async fn trigger_async_recovery_global(self: &Arc<Self>) {
self.stats.increment_me_async_recovery_trigger_total();
let mut seen = HashSet::<SocketAddr>::new();
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
for addrs in map.values() {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
if seen.insert(addr) {
self.trigger_immediate_refill(addr);
}
if seen.len() >= 8 {
return;
}
}
}
}
}
async fn endpoint_candidates_for_target_dc(&self, target_dc: i16) -> Vec<SocketAddr> {
let key = target_dc as i32;
let mut preferred = Vec::<SocketAddr>::new();
let mut seen = HashSet::<SocketAddr>::new();
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
let mut lookup_keys = vec![key, key.abs(), -key.abs()];
let def = self.default_dc.load(Ordering::Relaxed);
if def != 0 {
lookup_keys.push(def);
}
for lookup in lookup_keys {
if let Some(addrs) = map.get(&lookup) {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
if seen.insert(addr) {
preferred.push(addr);
}
}
}
}
if !preferred.is_empty() && !self.decision.effective_multipath {
break;
}
}
preferred
}
async fn maybe_trigger_hybrid_recovery(
self: &Arc<Self>,
target_dc: i16,
hybrid_recovery_round: &mut u32,
hybrid_last_recovery_at: &mut Option<Instant>,
hybrid_wait_step: Duration,
) {
if let Some(last) = *hybrid_last_recovery_at
&& last.elapsed() < hybrid_wait_step
{
return;
}
let round = *hybrid_recovery_round;
let target_triggered = self.trigger_async_recovery_for_target_dc(target_dc).await;
if !target_triggered || round % HYBRID_GLOBAL_BURST_PERIOD_ROUNDS == 0 {
self.trigger_async_recovery_global().await;
}
*hybrid_recovery_round = round.saturating_add(1);
*hybrid_last_recovery_at = Some(Instant::now());
}
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> { pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await { if let Some(w) = self.registry.get_writer(conn_id).await {
let mut p = Vec::with_capacity(12); let mut p = Vec::with_capacity(12);
@@ -292,6 +526,37 @@ impl MePool {
Ok(()) Ok(())
} }
pub async fn send_close_conn(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await {
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_CLOSE_CONN_U32.to_le_bytes());
p.extend_from_slice(&conn_id.to_le_bytes());
match w.tx.try_send(WriterCommand::DataAndFlush(p)) {
Ok(()) => {}
Err(TrySendError::Full(cmd)) => {
let _ = tokio::time::timeout(Duration::from_millis(50), w.tx.send(cmd)).await;
}
Err(TrySendError::Closed(_)) => {
debug!(conn_id, "ME close_conn skipped: writer channel closed");
}
}
} else {
debug!(conn_id, "ME close_conn skipped (writer missing)");
}
self.registry.unregister(conn_id).await;
Ok(())
}
pub async fn shutdown_send_close_conn_all(self: &Arc<Self>) -> usize {
let conn_ids = self.registry.active_conn_ids().await;
let total = conn_ids.len();
for conn_id in conn_ids {
let _ = self.send_close_conn(conn_id).await;
}
total
}
pub fn connection_count(&self) -> usize { pub fn connection_count(&self) -> usize {
self.conn_count.load(Ordering::Relaxed) self.conn_count.load(Ordering::Relaxed)
} }

View File

@@ -225,6 +225,7 @@ pub struct UpstreamManager {
upstreams: Arc<RwLock<Vec<UpstreamState>>>, upstreams: Arc<RwLock<Vec<UpstreamState>>>,
connect_retry_attempts: u32, connect_retry_attempts: u32,
connect_retry_backoff: Duration, connect_retry_backoff: Duration,
connect_budget: Duration,
unhealthy_fail_threshold: u32, unhealthy_fail_threshold: u32,
connect_failfast_hard_errors: bool, connect_failfast_hard_errors: bool,
stats: Arc<Stats>, stats: Arc<Stats>,
@@ -235,6 +236,7 @@ impl UpstreamManager {
configs: Vec<UpstreamConfig>, configs: Vec<UpstreamConfig>,
connect_retry_attempts: u32, connect_retry_attempts: u32,
connect_retry_backoff_ms: u64, connect_retry_backoff_ms: u64,
connect_budget_ms: u64,
unhealthy_fail_threshold: u32, unhealthy_fail_threshold: u32,
connect_failfast_hard_errors: bool, connect_failfast_hard_errors: bool,
stats: Arc<Stats>, stats: Arc<Stats>,
@@ -248,6 +250,7 @@ impl UpstreamManager {
upstreams: Arc::new(RwLock::new(states)), upstreams: Arc::new(RwLock::new(states)),
connect_retry_attempts: connect_retry_attempts.max(1), connect_retry_attempts: connect_retry_attempts.max(1),
connect_retry_backoff: Duration::from_millis(connect_retry_backoff_ms), connect_retry_backoff: Duration::from_millis(connect_retry_backoff_ms),
connect_budget: Duration::from_millis(connect_budget_ms.max(1)),
unhealthy_fail_threshold: unhealthy_fail_threshold.max(1), unhealthy_fail_threshold: unhealthy_fail_threshold.max(1),
connect_failfast_hard_errors, connect_failfast_hard_errors,
stats, stats,
@@ -593,11 +596,27 @@ impl UpstreamManager {
let mut last_error: Option<ProxyError> = None; let mut last_error: Option<ProxyError> = None;
let mut attempts_used = 0u32; let mut attempts_used = 0u32;
for attempt in 1..=self.connect_retry_attempts { for attempt in 1..=self.connect_retry_attempts {
let elapsed = connect_started_at.elapsed();
if elapsed >= self.connect_budget {
last_error = Some(ProxyError::ConnectionTimeout {
addr: target.to_string(),
});
break;
}
let remaining_budget = self.connect_budget.saturating_sub(elapsed);
let attempt_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS)
.min(remaining_budget);
if attempt_timeout.is_zero() {
last_error = Some(ProxyError::ConnectionTimeout {
addr: target.to_string(),
});
break;
}
attempts_used = attempt; attempts_used = attempt;
self.stats.increment_upstream_connect_attempt_total(); self.stats.increment_upstream_connect_attempt_total();
let start = Instant::now(); let start = Instant::now();
match self match self
.connect_via_upstream(&upstream, target, bind_rr.clone()) .connect_via_upstream(&upstream, target, bind_rr.clone(), attempt_timeout)
.await .await
{ {
Ok((stream, egress)) => { Ok((stream, egress)) => {
@@ -707,6 +726,7 @@ impl UpstreamManager {
config: &UpstreamConfig, config: &UpstreamConfig,
target: SocketAddr, target: SocketAddr,
bind_rr: Option<Arc<AtomicUsize>>, bind_rr: Option<Arc<AtomicUsize>>,
connect_timeout: Duration,
) -> Result<(TcpStream, UpstreamEgressInfo)> { ) -> Result<(TcpStream, UpstreamEgressInfo)> {
match &config.upstream_type { match &config.upstream_type {
UpstreamType::Direct { interface, bind_addresses } => { UpstreamType::Direct { interface, bind_addresses } => {
@@ -735,7 +755,6 @@ impl UpstreamManager {
let std_stream: std::net::TcpStream = socket.into(); let std_stream: std::net::TcpStream = socket.into();
let stream = TcpStream::from_std(std_stream)?; let stream = TcpStream::from_std(std_stream)?;
let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS);
match tokio::time::timeout(connect_timeout, stream.writable()).await { match tokio::time::timeout(connect_timeout, stream.writable()).await {
Ok(Ok(())) => {} Ok(Ok(())) => {}
Ok(Err(e)) => return Err(ProxyError::Io(e)), Ok(Err(e)) => return Err(ProxyError::Io(e)),
@@ -762,7 +781,6 @@ impl UpstreamManager {
)) ))
}, },
UpstreamType::Socks4 { address, interface, user_id } => { UpstreamType::Socks4 { address, interface, user_id } => {
let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS);
// Try to parse as SocketAddr first (IP:port), otherwise treat as hostname:port // Try to parse as SocketAddr first (IP:port), otherwise treat as hostname:port
let mut stream = if let Ok(proxy_addr) = address.parse::<SocketAddr>() { let mut stream = if let Ok(proxy_addr) = address.parse::<SocketAddr>() {
// IP:port format - use socket with optional interface binding // IP:port format - use socket with optional interface binding
@@ -841,7 +859,6 @@ impl UpstreamManager {
)) ))
}, },
UpstreamType::Socks5 { address, interface, username, password } => { UpstreamType::Socks5 { address, interface, username, password } => {
let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS);
// Try to parse as SocketAddr first (IP:port), otherwise treat as hostname:port // Try to parse as SocketAddr first (IP:port), otherwise treat as hostname:port
let mut stream = if let Ok(proxy_addr) = address.parse::<SocketAddr>() { let mut stream = if let Ok(proxy_addr) = address.parse::<SocketAddr>() {
// IP:port format - use socket with optional interface binding // IP:port format - use socket with optional interface binding
@@ -1165,7 +1182,14 @@ impl UpstreamManager {
target: SocketAddr, target: SocketAddr,
) -> Result<f64> { ) -> Result<f64> {
let start = Instant::now(); let start = Instant::now();
let _ = self.connect_via_upstream(config, target, bind_rr).await?; let _ = self
.connect_via_upstream(
config,
target,
bind_rr,
Duration::from_secs(DC_PING_TIMEOUT_SECS),
)
.await?;
Ok(start.elapsed().as_secs_f64() * 1000.0) Ok(start.elapsed().as_secs_f64() * 1000.0)
} }
@@ -1337,7 +1361,12 @@ impl UpstreamManager {
let start = Instant::now(); let start = Instant::now();
let result = tokio::time::timeout( let result = tokio::time::timeout(
Duration::from_secs(HEALTH_CHECK_CONNECT_TIMEOUT_SECS), Duration::from_secs(HEALTH_CHECK_CONNECT_TIMEOUT_SECS),
self.connect_via_upstream(&config, endpoint, Some(bind_rr.clone())), self.connect_via_upstream(
&config,
endpoint,
Some(bind_rr.clone()),
Duration::from_secs(HEALTH_CHECK_CONNECT_TIMEOUT_SECS),
),
) )
.await; .await;