mirror of
https://github.com/telemt/telemt.git
synced 2026-04-14 17:14:09 +03:00
Compare commits
53 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9b3ba2e1c6 | ||
|
|
dbadbf0221 | ||
|
|
173624c838 | ||
|
|
de2047adf2 | ||
|
|
5df2fe9f97 | ||
|
|
2510ebaa79 | ||
|
|
314f30a434 | ||
|
|
c86a511638 | ||
|
|
f1efaf4491 | ||
|
|
716b4adef2 | ||
|
|
5876623bb0 | ||
|
|
6b9c7f7862 | ||
|
|
7ea6387278 | ||
|
|
4c2bc2f41f | ||
|
|
c86f35f059 | ||
|
|
3492566842 | ||
|
|
349bbbb8fa | ||
|
|
ead08981e7 | ||
|
|
068cf825b9 | ||
|
|
7269dfbdc5 | ||
|
|
533708f885 | ||
|
|
5e93ce258f | ||
|
|
1236505502 | ||
|
|
f7d451e689 | ||
|
|
e11da6d2ae | ||
|
|
d31b4cd6c8 | ||
|
|
f4ec6bb303 | ||
|
|
a6132bac38 | ||
|
|
624870109e | ||
|
|
cdf829de91 | ||
|
|
6ef51dbfb0 | ||
|
|
af5f0b9692 | ||
|
|
bd0dcfff15 | ||
|
|
ec4e48808e | ||
|
|
c293901669 | ||
|
|
f4e5a08614 | ||
|
|
430a0ae6b4 | ||
|
|
53d93880ad | ||
|
|
1706698a83 | ||
|
|
cb0832b803 | ||
|
|
c01ca40b6d | ||
|
|
cfec6dbb3c | ||
|
|
1fe1acadd4 | ||
|
|
225fc3e4ea | ||
|
|
4a0d88ad43 | ||
|
|
58ff0c7971 | ||
|
|
7d39bf1698 | ||
|
|
3b8eea762b | ||
|
|
07ec84d071 | ||
|
|
235642459a | ||
|
|
3799fc13c4 | ||
|
|
71261522bd | ||
|
|
762deac511 |
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "telemt"
|
||||
version = "3.1.4"
|
||||
version = "3.2.1"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# Telemt - MTProxy on Rust + Tokio
|
||||
|
||||
***Löst Probleme, bevor andere überhaupt wissen, dass sie existieren*** / ***It solves problems before others even realize they exist***
|
||||
|
||||
**Telemt** is a fast, secure, and feature-rich server written in Rust: it fully implements the official Telegram proxy algo and adds many production-ready improvements such as connection pooling, replay protection, detailed statistics, masking from "prying" eyes
|
||||
|
||||
[**Telemt Chat in Telegram**](https://t.me/telemtrs)
|
||||
|
||||
@@ -34,6 +34,13 @@ port = 443
|
||||
# metrics_port = 9090
|
||||
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
|
||||
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "0.0.0.0:9091"
|
||||
whitelist = ["127.0.0.0/8"]
|
||||
minimal_runtime_enabled = false
|
||||
minimal_runtime_cache_ttl_ms = 1000
|
||||
|
||||
# Listen on multiple interfaces/IPs - IPv4
|
||||
[[server.listeners]]
|
||||
ip = "0.0.0.0"
|
||||
|
||||
@@ -6,7 +6,7 @@ services:
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "443:443"
|
||||
- "9090:9090"
|
||||
- "127.0.0.1:9090:9090"
|
||||
# Allow caching 'proxy-secret' in read-only container
|
||||
working_dir: /run/telemt
|
||||
volumes:
|
||||
|
||||
427
docs/API.md
Normal file
427
docs/API.md
Normal file
@@ -0,0 +1,427 @@
|
||||
# Telemt Control API
|
||||
|
||||
## Purpose
|
||||
Control-plane HTTP API for runtime visibility and user/config management.
|
||||
Data-plane MTProto traffic is out of scope.
|
||||
|
||||
## Runtime Configuration
|
||||
API runtime is configured in `[server.api]`.
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `enabled` | `bool` | `false` | Enables REST API listener. |
|
||||
| `listen` | `string` (`IP:PORT`) | `127.0.0.1:9091` | API bind address. |
|
||||
| `whitelist` | `CIDR[]` | `127.0.0.1/32, ::1/128` | Source IP allowlist. Empty list means allow all. |
|
||||
| `auth_header` | `string` | `""` | Exact value for `Authorization` header. Empty disables header auth. |
|
||||
| `request_body_limit_bytes` | `usize` | `65536` | Maximum request body size. |
|
||||
| `minimal_runtime_enabled` | `bool` | `false` | Enables runtime snapshot endpoints requiring ME pool read-lock aggregation. |
|
||||
| `minimal_runtime_cache_ttl_ms` | `u64` | `1000` | Cache TTL for minimal snapshots. `0` disables cache. |
|
||||
| `read_only` | `bool` | `false` | Disables mutating endpoints. |
|
||||
|
||||
`server.admin_api` is accepted as an alias for backward compatibility.
|
||||
|
||||
## Protocol Contract
|
||||
|
||||
| Item | Value |
|
||||
| --- | --- |
|
||||
| Transport | HTTP/1.1 |
|
||||
| Content type | `application/json; charset=utf-8` |
|
||||
| Prefix | `/v1` |
|
||||
| Optimistic concurrency | `If-Match: <revision>` on mutating requests (optional) |
|
||||
| Revision format | SHA-256 hex of current `config.toml` content |
|
||||
|
||||
### Success Envelope
|
||||
```json
|
||||
{
|
||||
"ok": true,
|
||||
"data": {},
|
||||
"revision": "sha256-hex"
|
||||
}
|
||||
```
|
||||
|
||||
### Error Envelope
|
||||
```json
|
||||
{
|
||||
"ok": false,
|
||||
"error": {
|
||||
"code": "machine_code",
|
||||
"message": "human-readable"
|
||||
},
|
||||
"request_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Endpoint Matrix
|
||||
|
||||
| Method | Path | Body | Success | `data` contract |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| `GET` | `/v1/health` | none | `200` | `HealthData` |
|
||||
| `GET` | `/v1/stats/summary` | none | `200` | `SummaryData` |
|
||||
| `GET` | `/v1/stats/zero/all` | none | `200` | `ZeroAllData` |
|
||||
| `GET` | `/v1/stats/minimal/all` | none | `200` | `MinimalAllData` |
|
||||
| `GET` | `/v1/stats/me-writers` | none | `200` | `MeWritersData` |
|
||||
| `GET` | `/v1/stats/dcs` | none | `200` | `DcStatusData` |
|
||||
| `GET` | `/v1/stats/users` | none | `200` | `UserInfo[]` |
|
||||
| `GET` | `/v1/users` | none | `200` | `UserInfo[]` |
|
||||
| `POST` | `/v1/users` | `CreateUserRequest` | `201` | `CreateUserResponse` |
|
||||
| `GET` | `/v1/users/{username}` | none | `200` | `UserInfo` |
|
||||
| `PATCH` | `/v1/users/{username}` | `PatchUserRequest` | `200` | `UserInfo` |
|
||||
| `DELETE` | `/v1/users/{username}` | none | `200` | `string` (deleted username) |
|
||||
| `POST` | `/v1/users/{username}/rotate-secret` | `RotateSecretRequest` or empty body | `200` | `CreateUserResponse` |
|
||||
|
||||
## Common Error Codes
|
||||
|
||||
| HTTP | `error.code` | Trigger |
|
||||
| --- | --- | --- |
|
||||
| `400` | `bad_request` | Invalid JSON, validation failures, malformed request body. |
|
||||
| `401` | `unauthorized` | Missing/invalid `Authorization` when `auth_header` is configured. |
|
||||
| `403` | `forbidden` | Source IP is not allowed by whitelist. |
|
||||
| `403` | `read_only` | Mutating endpoint called while `read_only=true`. |
|
||||
| `404` | `not_found` | Unknown route or unknown user. |
|
||||
| `405` | `method_not_allowed` | Unsupported method for an existing user route. |
|
||||
| `409` | `revision_conflict` | `If-Match` revision mismatch. |
|
||||
| `409` | `user_exists` | User already exists on create. |
|
||||
| `409` | `last_user_forbidden` | Attempt to delete last configured user. |
|
||||
| `413` | `payload_too_large` | Body exceeds `request_body_limit_bytes`. |
|
||||
| `500` | `internal_error` | Internal error (I/O, serialization, config load/save). |
|
||||
| `503` | `api_disabled` | API disabled in config. |
|
||||
|
||||
## Request Contracts
|
||||
|
||||
### `CreateUserRequest`
|
||||
| Field | Type | Required | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `username` | `string` | yes | `[A-Za-z0-9_.-]`, length `1..64`. |
|
||||
| `secret` | `string` | no | Exactly 32 hex chars. If missing, generated automatically. |
|
||||
| `user_ad_tag` | `string` | no | Exactly 32 hex chars. |
|
||||
| `max_tcp_conns` | `usize` | no | Per-user concurrent TCP limit. |
|
||||
| `expiration_rfc3339` | `string` | no | RFC3339 expiration timestamp. |
|
||||
| `data_quota_bytes` | `u64` | no | Per-user traffic quota. |
|
||||
| `max_unique_ips` | `usize` | no | Per-user unique source IP limit. |
|
||||
|
||||
### `PatchUserRequest`
|
||||
| Field | Type | Required | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `secret` | `string` | no | Exactly 32 hex chars. |
|
||||
| `user_ad_tag` | `string` | no | Exactly 32 hex chars. |
|
||||
| `max_tcp_conns` | `usize` | no | Per-user concurrent TCP limit. |
|
||||
| `expiration_rfc3339` | `string` | no | RFC3339 expiration timestamp. |
|
||||
| `data_quota_bytes` | `u64` | no | Per-user traffic quota. |
|
||||
| `max_unique_ips` | `usize` | no | Per-user unique source IP limit. |
|
||||
|
||||
### `RotateSecretRequest`
|
||||
| Field | Type | Required | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `secret` | `string` | no | Exactly 32 hex chars. If missing, generated automatically. |
|
||||
|
||||
## Response Data Contracts
|
||||
|
||||
### `HealthData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `status` | `string` | Always `"ok"`. |
|
||||
| `read_only` | `bool` | Mirrors current API `read_only` mode. |
|
||||
|
||||
### `SummaryData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `uptime_seconds` | `f64` | Process uptime in seconds. |
|
||||
| `connections_total` | `u64` | Total accepted client connections. |
|
||||
| `connections_bad_total` | `u64` | Failed/invalid client connections. |
|
||||
| `handshake_timeouts_total` | `u64` | Handshake timeout count. |
|
||||
| `configured_users` | `usize` | Number of configured users in config. |
|
||||
|
||||
### `ZeroAllData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `generated_at_epoch_secs` | `u64` | Snapshot time (Unix epoch seconds). |
|
||||
| `core` | `ZeroCoreData` | Core counters and telemetry policy snapshot. |
|
||||
| `upstream` | `ZeroUpstreamData` | Upstream connect counters/histogram buckets. |
|
||||
| `middle_proxy` | `ZeroMiddleProxyData` | ME protocol/health counters. |
|
||||
| `pool` | `ZeroPoolData` | ME pool lifecycle counters. |
|
||||
| `desync` | `ZeroDesyncData` | Frame desync counters. |
|
||||
|
||||
#### `ZeroCoreData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `uptime_seconds` | `f64` | Process uptime. |
|
||||
| `connections_total` | `u64` | Total accepted connections. |
|
||||
| `connections_bad_total` | `u64` | Failed/invalid connections. |
|
||||
| `handshake_timeouts_total` | `u64` | Handshake timeouts. |
|
||||
| `configured_users` | `usize` | Configured user count. |
|
||||
| `telemetry_core_enabled` | `bool` | Core telemetry toggle. |
|
||||
| `telemetry_user_enabled` | `bool` | User telemetry toggle. |
|
||||
| `telemetry_me_level` | `string` | ME telemetry level (`off|normal|verbose`). |
|
||||
|
||||
#### `ZeroUpstreamData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `connect_attempt_total` | `u64` | Total upstream connect attempts. |
|
||||
| `connect_success_total` | `u64` | Successful upstream connects. |
|
||||
| `connect_fail_total` | `u64` | Failed upstream connects. |
|
||||
| `connect_failfast_hard_error_total` | `u64` | Fail-fast hard errors. |
|
||||
| `connect_attempts_bucket_1` | `u64` | Connect attempts resolved in 1 try. |
|
||||
| `connect_attempts_bucket_2` | `u64` | Connect attempts resolved in 2 tries. |
|
||||
| `connect_attempts_bucket_3_4` | `u64` | Connect attempts resolved in 3-4 tries. |
|
||||
| `connect_attempts_bucket_gt_4` | `u64` | Connect attempts requiring more than 4 tries. |
|
||||
| `connect_duration_success_bucket_le_100ms` | `u64` | Successful connects <=100 ms. |
|
||||
| `connect_duration_success_bucket_101_500ms` | `u64` | Successful connects 101-500 ms. |
|
||||
| `connect_duration_success_bucket_501_1000ms` | `u64` | Successful connects 501-1000 ms. |
|
||||
| `connect_duration_success_bucket_gt_1000ms` | `u64` | Successful connects >1000 ms. |
|
||||
| `connect_duration_fail_bucket_le_100ms` | `u64` | Failed connects <=100 ms. |
|
||||
| `connect_duration_fail_bucket_101_500ms` | `u64` | Failed connects 101-500 ms. |
|
||||
| `connect_duration_fail_bucket_501_1000ms` | `u64` | Failed connects 501-1000 ms. |
|
||||
| `connect_duration_fail_bucket_gt_1000ms` | `u64` | Failed connects >1000 ms. |
|
||||
|
||||
#### `ZeroMiddleProxyData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `keepalive_sent_total` | `u64` | ME keepalive packets sent. |
|
||||
| `keepalive_failed_total` | `u64` | ME keepalive send failures. |
|
||||
| `keepalive_pong_total` | `u64` | Keepalive pong responses received. |
|
||||
| `keepalive_timeout_total` | `u64` | Keepalive timeout events. |
|
||||
| `rpc_proxy_req_signal_sent_total` | `u64` | RPC proxy activity signals sent. |
|
||||
| `rpc_proxy_req_signal_failed_total` | `u64` | RPC proxy activity signal failures. |
|
||||
| `rpc_proxy_req_signal_skipped_no_meta_total` | `u64` | Signals skipped due to missing metadata. |
|
||||
| `rpc_proxy_req_signal_response_total` | `u64` | RPC proxy signal responses received. |
|
||||
| `rpc_proxy_req_signal_close_sent_total` | `u64` | RPC proxy close signals sent. |
|
||||
| `reconnect_attempt_total` | `u64` | ME reconnect attempts. |
|
||||
| `reconnect_success_total` | `u64` | Successful reconnects. |
|
||||
| `handshake_reject_total` | `u64` | ME handshake rejects. |
|
||||
| `handshake_error_codes` | `ZeroCodeCount[]` | Handshake rejects grouped by code. |
|
||||
| `reader_eof_total` | `u64` | ME reader EOF events. |
|
||||
| `idle_close_by_peer_total` | `u64` | Idle closes initiated by peer. |
|
||||
| `route_drop_no_conn_total` | `u64` | Route drops due to missing bound connection. |
|
||||
| `route_drop_channel_closed_total` | `u64` | Route drops due to closed channel. |
|
||||
| `route_drop_queue_full_total` | `u64` | Route drops due to full queue (total). |
|
||||
| `route_drop_queue_full_base_total` | `u64` | Route drops in base queue mode. |
|
||||
| `route_drop_queue_full_high_total` | `u64` | Route drops in high queue mode. |
|
||||
| `socks_kdf_strict_reject_total` | `u64` | SOCKS KDF strict rejects. |
|
||||
| `socks_kdf_compat_fallback_total` | `u64` | SOCKS KDF compat fallbacks. |
|
||||
| `endpoint_quarantine_total` | `u64` | Endpoint quarantine activations. |
|
||||
| `kdf_drift_total` | `u64` | KDF drift detections. |
|
||||
| `kdf_port_only_drift_total` | `u64` | KDF port-only drift detections. |
|
||||
| `hardswap_pending_reuse_total` | `u64` | Pending hardswap reused events. |
|
||||
| `hardswap_pending_ttl_expired_total` | `u64` | Pending hardswap TTL expiry events. |
|
||||
| `single_endpoint_outage_enter_total` | `u64` | Entered single-endpoint outage mode. |
|
||||
| `single_endpoint_outage_exit_total` | `u64` | Exited single-endpoint outage mode. |
|
||||
| `single_endpoint_outage_reconnect_attempt_total` | `u64` | Reconnect attempts in outage mode. |
|
||||
| `single_endpoint_outage_reconnect_success_total` | `u64` | Reconnect successes in outage mode. |
|
||||
| `single_endpoint_quarantine_bypass_total` | `u64` | Quarantine bypasses in outage mode. |
|
||||
| `single_endpoint_shadow_rotate_total` | `u64` | Shadow writer rotations. |
|
||||
| `single_endpoint_shadow_rotate_skipped_quarantine_total` | `u64` | Shadow rotations skipped because of quarantine. |
|
||||
| `floor_mode_switch_total` | `u64` | Total floor mode switches. |
|
||||
| `floor_mode_switch_static_to_adaptive_total` | `u64` | Static -> adaptive switches. |
|
||||
| `floor_mode_switch_adaptive_to_static_total` | `u64` | Adaptive -> static switches. |
|
||||
|
||||
#### `ZeroCodeCount`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `code` | `i32` | Handshake error code. |
|
||||
| `total` | `u64` | Events with this code. |
|
||||
|
||||
#### `ZeroPoolData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `pool_swap_total` | `u64` | Pool swap count. |
|
||||
| `pool_drain_active` | `u64` | Current active draining pools. |
|
||||
| `pool_force_close_total` | `u64` | Forced pool closes by timeout. |
|
||||
| `pool_stale_pick_total` | `u64` | Stale writer picks for binding. |
|
||||
| `writer_removed_total` | `u64` | Writer removals total. |
|
||||
| `writer_removed_unexpected_total` | `u64` | Unexpected writer removals. |
|
||||
| `refill_triggered_total` | `u64` | Refill triggers. |
|
||||
| `refill_skipped_inflight_total` | `u64` | Refill skipped because refill already in-flight. |
|
||||
| `refill_failed_total` | `u64` | Refill failures. |
|
||||
| `writer_restored_same_endpoint_total` | `u64` | Restores on same endpoint. |
|
||||
| `writer_restored_fallback_total` | `u64` | Restores on fallback endpoint. |
|
||||
|
||||
#### `ZeroDesyncData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `secure_padding_invalid_total` | `u64` | Invalid secure padding events. |
|
||||
| `desync_total` | `u64` | Desync events total. |
|
||||
| `desync_full_logged_total` | `u64` | Fully logged desync events. |
|
||||
| `desync_suppressed_total` | `u64` | Suppressed desync logs. |
|
||||
| `desync_frames_bucket_0` | `u64` | Desync frames bucket 0. |
|
||||
| `desync_frames_bucket_1_2` | `u64` | Desync frames bucket 1-2. |
|
||||
| `desync_frames_bucket_3_10` | `u64` | Desync frames bucket 3-10. |
|
||||
| `desync_frames_bucket_gt_10` | `u64` | Desync frames bucket >10. |
|
||||
|
||||
### `MinimalAllData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `enabled` | `bool` | Whether minimal runtime snapshots are enabled by config. |
|
||||
| `reason` | `string?` | `feature_disabled` or `source_unavailable` when applicable. |
|
||||
| `generated_at_epoch_secs` | `u64` | Snapshot generation time. |
|
||||
| `data` | `MinimalAllPayload?` | Null when disabled; fallback payload when source unavailable. |
|
||||
|
||||
#### `MinimalAllPayload`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `me_writers` | `MeWritersData` | ME writer status block. |
|
||||
| `dcs` | `DcStatusData` | DC aggregate status block. |
|
||||
| `me_runtime` | `MinimalMeRuntimeData?` | Runtime ME control snapshot. |
|
||||
| `network_path` | `MinimalDcPathData[]` | Active IP path selection per DC. |
|
||||
|
||||
#### `MinimalMeRuntimeData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `active_generation` | `u64` | Active pool generation. |
|
||||
| `warm_generation` | `u64` | Warm pool generation. |
|
||||
| `pending_hardswap_generation` | `u64` | Pending hardswap generation. |
|
||||
| `pending_hardswap_age_secs` | `u64?` | Pending hardswap age in seconds. |
|
||||
| `hardswap_enabled` | `bool` | Hardswap mode toggle. |
|
||||
| `floor_mode` | `string` | Writer floor mode. |
|
||||
| `adaptive_floor_idle_secs` | `u64` | Idle threshold for adaptive floor. |
|
||||
| `adaptive_floor_min_writers_single_endpoint` | `u8` | Minimum writers for single-endpoint DC in adaptive mode. |
|
||||
| `adaptive_floor_recover_grace_secs` | `u64` | Grace period for floor recovery. |
|
||||
| `me_keepalive_enabled` | `bool` | ME keepalive toggle. |
|
||||
| `me_keepalive_interval_secs` | `u64` | Keepalive period. |
|
||||
| `me_keepalive_jitter_secs` | `u64` | Keepalive jitter. |
|
||||
| `me_keepalive_payload_random` | `bool` | Randomized keepalive payload toggle. |
|
||||
| `rpc_proxy_req_every_secs` | `u64` | Period for RPC proxy request signal. |
|
||||
| `me_reconnect_max_concurrent_per_dc` | `u32` | Reconnect concurrency per DC. |
|
||||
| `me_reconnect_backoff_base_ms` | `u64` | Base reconnect backoff. |
|
||||
| `me_reconnect_backoff_cap_ms` | `u64` | Max reconnect backoff. |
|
||||
| `me_reconnect_fast_retry_count` | `u32` | Fast retry attempts before normal backoff. |
|
||||
| `me_pool_drain_ttl_secs` | `u64` | Pool drain TTL. |
|
||||
| `me_pool_force_close_secs` | `u64` | Hard close timeout for draining writers. |
|
||||
| `me_pool_min_fresh_ratio` | `f32` | Minimum fresh ratio before swap. |
|
||||
| `me_bind_stale_mode` | `string` | Stale writer bind policy. |
|
||||
| `me_bind_stale_ttl_secs` | `u64` | Stale writer TTL. |
|
||||
| `me_single_endpoint_shadow_writers` | `u8` | Shadow writers for single-endpoint DCs. |
|
||||
| `me_single_endpoint_outage_mode_enabled` | `bool` | Outage mode toggle for single-endpoint DCs. |
|
||||
| `me_single_endpoint_outage_disable_quarantine` | `bool` | Quarantine behavior in outage mode. |
|
||||
| `me_single_endpoint_outage_backoff_min_ms` | `u64` | Outage mode min reconnect backoff. |
|
||||
| `me_single_endpoint_outage_backoff_max_ms` | `u64` | Outage mode max reconnect backoff. |
|
||||
| `me_single_endpoint_shadow_rotate_every_secs` | `u64` | Shadow rotation interval. |
|
||||
| `me_deterministic_writer_sort` | `bool` | Deterministic writer ordering toggle. |
|
||||
| `me_socks_kdf_policy` | `string` | Current SOCKS KDF policy mode. |
|
||||
| `quarantined_endpoints_total` | `usize` | Total quarantined endpoints. |
|
||||
| `quarantined_endpoints` | `MinimalQuarantineData[]` | Quarantine details. |
|
||||
|
||||
#### `MinimalQuarantineData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `endpoint` | `string` | Endpoint (`ip:port`). |
|
||||
| `remaining_ms` | `u64` | Remaining quarantine duration. |
|
||||
|
||||
#### `MinimalDcPathData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `dc` | `i16` | Telegram DC identifier. |
|
||||
| `ip_preference` | `string?` | Runtime IP family preference. |
|
||||
| `selected_addr_v4` | `string?` | Selected IPv4 endpoint for this DC. |
|
||||
| `selected_addr_v6` | `string?` | Selected IPv6 endpoint for this DC. |
|
||||
|
||||
### `MeWritersData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `middle_proxy_enabled` | `bool` | `false` when minimal runtime is disabled or source unavailable. |
|
||||
| `reason` | `string?` | `feature_disabled` or `source_unavailable` when not fully available. |
|
||||
| `generated_at_epoch_secs` | `u64` | Snapshot generation time. |
|
||||
| `summary` | `MeWritersSummary` | Coverage/availability summary. |
|
||||
| `writers` | `MeWriterStatus[]` | Per-writer statuses. |
|
||||
|
||||
#### `MeWritersSummary`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `configured_dc_groups` | `usize` | Number of configured DC groups. |
|
||||
| `configured_endpoints` | `usize` | Total configured ME endpoints. |
|
||||
| `available_endpoints` | `usize` | Endpoints currently available. |
|
||||
| `available_pct` | `f64` | `available_endpoints / configured_endpoints * 100`. |
|
||||
| `required_writers` | `usize` | Required writers based on current floor policy. |
|
||||
| `alive_writers` | `usize` | Writers currently alive. |
|
||||
| `coverage_pct` | `f64` | `alive_writers / required_writers * 100`. |
|
||||
|
||||
#### `MeWriterStatus`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `writer_id` | `u64` | Runtime writer identifier. |
|
||||
| `dc` | `i16?` | DC id if mapped. |
|
||||
| `endpoint` | `string` | Endpoint (`ip:port`). |
|
||||
| `generation` | `u64` | Pool generation owning this writer. |
|
||||
| `state` | `string` | Writer state (`warm`, `active`, `draining`). |
|
||||
| `draining` | `bool` | Draining flag. |
|
||||
| `degraded` | `bool` | Degraded flag. |
|
||||
| `bound_clients` | `usize` | Number of currently bound clients. |
|
||||
| `idle_for_secs` | `u64?` | Idle age in seconds if idle. |
|
||||
| `rtt_ema_ms` | `f64?` | RTT exponential moving average. |
|
||||
|
||||
### `DcStatusData`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `middle_proxy_enabled` | `bool` | `false` when minimal runtime is disabled or source unavailable. |
|
||||
| `reason` | `string?` | `feature_disabled` or `source_unavailable` when not fully available. |
|
||||
| `generated_at_epoch_secs` | `u64` | Snapshot generation time. |
|
||||
| `dcs` | `DcStatus[]` | Per-DC status rows. |
|
||||
|
||||
#### `DcStatus`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `dc` | `i16` | Telegram DC id. |
|
||||
| `endpoints` | `string[]` | Endpoints in this DC (`ip:port`). |
|
||||
| `available_endpoints` | `usize` | Endpoints currently available in this DC. |
|
||||
| `available_pct` | `f64` | `available_endpoints / endpoints_total * 100`. |
|
||||
| `required_writers` | `usize` | Required writer count for this DC. |
|
||||
| `alive_writers` | `usize` | Alive writers in this DC. |
|
||||
| `coverage_pct` | `f64` | `alive_writers / required_writers * 100`. |
|
||||
| `rtt_ms` | `f64?` | Aggregated RTT for DC. |
|
||||
| `load` | `usize` | Active client sessions bound to this DC. |
|
||||
|
||||
### `UserInfo`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `username` | `string` | Username. |
|
||||
| `user_ad_tag` | `string?` | Optional ad tag (32 hex chars). |
|
||||
| `max_tcp_conns` | `usize?` | Optional max concurrent TCP limit. |
|
||||
| `expiration_rfc3339` | `string?` | Optional expiration timestamp. |
|
||||
| `data_quota_bytes` | `u64?` | Optional data quota. |
|
||||
| `max_unique_ips` | `usize?` | Optional unique IP limit. |
|
||||
| `current_connections` | `u64` | Current live connections. |
|
||||
| `active_unique_ips` | `usize` | Current active unique source IPs. |
|
||||
| `total_octets` | `u64` | Total traffic octets for this user. |
|
||||
| `links` | `UserLinks` | Active connection links derived from current config. |
|
||||
|
||||
#### `UserLinks`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `classic` | `string[]` | Active `tg://proxy` links for classic mode. |
|
||||
| `secure` | `string[]` | Active `tg://proxy` links for secure/DD mode. |
|
||||
| `tls` | `string[]` | Active `tg://proxy` links for EE-TLS mode (for each host+TLS domain). |
|
||||
|
||||
Link generation uses active config and enabled modes:
|
||||
- `[general.links].public_host/public_port` have priority.
|
||||
- Fallback host sources: listener `announce`, `announce_ip`, explicit listener `ip`.
|
||||
- Legacy fallback: `listen_addr_ipv4` and `listen_addr_ipv6` when routable.
|
||||
|
||||
### `CreateUserResponse`
|
||||
| Field | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| `user` | `UserInfo` | Created or updated user view. |
|
||||
| `secret` | `string` | Effective user secret. |
|
||||
|
||||
## Mutation Semantics
|
||||
|
||||
| Endpoint | Notes |
|
||||
| --- | --- |
|
||||
| `POST /v1/users` | Creates user and validates resulting config before atomic save. |
|
||||
| `PATCH /v1/users/{username}` | Partial update of provided fields only. Missing fields remain unchanged. |
|
||||
| `POST /v1/users/{username}/rotate-secret` | Replaces secret. Empty body is allowed and auto-generates secret. |
|
||||
| `DELETE /v1/users/{username}` | Deletes user and related optional settings. Last user deletion is blocked. |
|
||||
|
||||
All mutating endpoints:
|
||||
- Respect `read_only` mode.
|
||||
- Accept optional `If-Match` for optimistic concurrency.
|
||||
- Return new `revision` after successful write.
|
||||
|
||||
## Operational Notes
|
||||
|
||||
| Topic | Details |
|
||||
| --- | --- |
|
||||
| API startup | API binds only when `[server.api].enabled=true`. |
|
||||
| Restart requirements | Changes in `server.api` settings require process restart. |
|
||||
| Runtime apply path | Successful writes are picked up by existing config watcher/hot-reload path. |
|
||||
| Exposure | Built-in TLS/mTLS is not provided. Use loopback bind + reverse proxy if needed. |
|
||||
| Pagination | User list currently has no pagination/filtering. |
|
||||
| Serialization side effect | Config comments/manual formatting are not preserved on write. |
|
||||
@@ -6,6 +6,8 @@
|
||||
4. Открыть конфиг `nano /etc/telemt.toml`.
|
||||
5. Скопировать и отправить боту секрет пользователя из раздела [access.users].
|
||||
6. Скопировать полученный tag у бота. Например 1234567890abcdef1234567890abcdef.
|
||||
> [!WARNING]
|
||||
> Ссылка, которую выдает бот, не будет работать. Не копируйте и не используйте её!
|
||||
7. Раскомментировать параметр ad_tag и вписать tag, полученный у бота.
|
||||
8. Раскомментировать/добавить параметр use_middle_proxy = true.
|
||||
|
||||
@@ -61,4 +63,3 @@ metrics_whitelist = ["127.0.0.1/32", "::1/128", "0.0.0.0/0"]
|
||||
4. Метрики доступны по адресу SERVER_IP:9090/metrics.
|
||||
> [!WARNING]
|
||||
> "0.0.0.0/0" в metrics_whitelist открывает доступ с любого IP. Замените на свой ip. Например "1.2.3.4"
|
||||
|
||||
|
||||
@@ -115,6 +115,8 @@ WantedBy=multi-user.target
|
||||
**5.** Для автоматического запуска при запуске системы в введите `systemctl enable telemt`
|
||||
|
||||
**6.** Для получения ссылки введите `journalctl -u telemt -n -g "links" --no-pager -o cat | tac`
|
||||
> [!WARNING]
|
||||
> Рабочую ссылку может выдать только команда из 6 пункта. Не пытайтесь делать ее самостоятельно или копировать откуда-либо!
|
||||
|
||||
---
|
||||
|
||||
|
||||
107
src/api/config_store.rs
Normal file
107
src/api/config_store.rs
Normal file
@@ -0,0 +1,107 @@
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use hyper::header::IF_MATCH;
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
|
||||
use super::model::ApiFailure;
|
||||
|
||||
pub(super) fn parse_if_match(headers: &hyper::HeaderMap) -> Option<String> {
|
||||
headers
|
||||
.get(IF_MATCH)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(|value| value.trim_matches('"').to_string())
|
||||
}
|
||||
|
||||
pub(super) async fn ensure_expected_revision(
|
||||
config_path: &Path,
|
||||
expected_revision: Option<&str>,
|
||||
) -> Result<(), ApiFailure> {
|
||||
let Some(expected) = expected_revision else {
|
||||
return Ok(());
|
||||
};
|
||||
let current = current_revision(config_path).await?;
|
||||
if current != expected {
|
||||
return Err(ApiFailure::new(
|
||||
hyper::StatusCode::CONFLICT,
|
||||
"revision_conflict",
|
||||
"Config revision mismatch",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) async fn current_revision(config_path: &Path) -> Result<String, ApiFailure> {
|
||||
let content = tokio::fs::read_to_string(config_path)
|
||||
.await
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to read config: {}", e)))?;
|
||||
Ok(compute_revision(&content))
|
||||
}
|
||||
|
||||
pub(super) fn compute_revision(content: &str) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(content.as_bytes());
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
pub(super) async fn load_config_from_disk(config_path: &Path) -> Result<ProxyConfig, ApiFailure> {
|
||||
let config_path = config_path.to_path_buf();
|
||||
tokio::task::spawn_blocking(move || ProxyConfig::load(config_path))
|
||||
.await
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to join config loader: {}", e)))?
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to load config: {}", e)))
|
||||
}
|
||||
|
||||
pub(super) async fn save_config_to_disk(
|
||||
config_path: &Path,
|
||||
cfg: &ProxyConfig,
|
||||
) -> Result<String, ApiFailure> {
|
||||
let serialized = toml::to_string_pretty(cfg)
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to serialize config: {}", e)))?;
|
||||
write_atomic(config_path.to_path_buf(), serialized.clone()).await?;
|
||||
Ok(compute_revision(&serialized))
|
||||
}
|
||||
|
||||
async fn write_atomic(path: PathBuf, contents: String) -> Result<(), ApiFailure> {
|
||||
tokio::task::spawn_blocking(move || write_atomic_sync(&path, &contents))
|
||||
.await
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to join writer: {}", e)))?
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to write config: {}", e)))
|
||||
}
|
||||
|
||||
fn write_atomic_sync(path: &Path, contents: &str) -> std::io::Result<()> {
|
||||
let parent = path.parent().unwrap_or_else(|| Path::new("."));
|
||||
std::fs::create_dir_all(parent)?;
|
||||
|
||||
let tmp_name = format!(
|
||||
".{}.tmp-{}",
|
||||
path.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("config.toml"),
|
||||
rand::random::<u64>()
|
||||
);
|
||||
let tmp_path = parent.join(tmp_name);
|
||||
|
||||
let write_result = (|| {
|
||||
let mut file = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&tmp_path)?;
|
||||
file.write_all(contents.as_bytes())?;
|
||||
file.sync_all()?;
|
||||
std::fs::rename(&tmp_path, path)?;
|
||||
if let Ok(dir) = std::fs::File::open(parent) {
|
||||
let _ = dir.sync_all();
|
||||
}
|
||||
Ok(())
|
||||
})();
|
||||
|
||||
if write_result.is_err() {
|
||||
let _ = std::fs::remove_file(&tmp_path);
|
||||
}
|
||||
write_result
|
||||
}
|
||||
443
src/api/mod.rs
Normal file
443
src/api/mod.rs
Normal file
@@ -0,0 +1,443 @@
|
||||
use std::convert::Infallible;
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
use http_body_util::{BodyExt, Full};
|
||||
use hyper::body::{Bytes, Incoming};
|
||||
use hyper::header::AUTHORIZATION;
|
||||
use hyper::server::conn::http1;
|
||||
use hyper::service::service_fn;
|
||||
use hyper::{Method, Request, Response, StatusCode};
|
||||
use serde::Serialize;
|
||||
use serde::de::DeserializeOwned;
|
||||
use tokio::net::TcpListener;
|
||||
use tokio::sync::{Mutex, watch};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::stats::Stats;
|
||||
use crate::transport::middle_proxy::MePool;
|
||||
use crate::transport::UpstreamManager;
|
||||
|
||||
mod config_store;
|
||||
mod model;
|
||||
mod runtime_stats;
|
||||
mod users;
|
||||
|
||||
use config_store::{current_revision, parse_if_match};
|
||||
use model::{
|
||||
ApiFailure, CreateUserRequest, ErrorBody, ErrorResponse, HealthData, PatchUserRequest,
|
||||
RotateSecretRequest, SuccessResponse, SummaryData,
|
||||
};
|
||||
use runtime_stats::{
|
||||
MinimalCacheEntry, build_dcs_data, build_me_writers_data, build_minimal_all_data,
|
||||
build_upstreams_data, build_zero_all_data,
|
||||
};
|
||||
use users::{create_user, delete_user, patch_user, rotate_secret, users_from_config};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(super) struct ApiShared {
|
||||
pub(super) stats: Arc<Stats>,
|
||||
pub(super) ip_tracker: Arc<UserIpTracker>,
|
||||
pub(super) me_pool: Option<Arc<MePool>>,
|
||||
pub(super) upstream_manager: Arc<UpstreamManager>,
|
||||
pub(super) config_path: PathBuf,
|
||||
pub(super) startup_detected_ip_v4: Option<IpAddr>,
|
||||
pub(super) startup_detected_ip_v6: Option<IpAddr>,
|
||||
pub(super) mutation_lock: Arc<Mutex<()>>,
|
||||
pub(super) minimal_cache: Arc<Mutex<Option<MinimalCacheEntry>>>,
|
||||
pub(super) request_id: Arc<AtomicU64>,
|
||||
}
|
||||
|
||||
impl ApiShared {
|
||||
fn next_request_id(&self) -> u64 {
|
||||
self.request_id.fetch_add(1, Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn serve(
|
||||
listen: SocketAddr,
|
||||
stats: Arc<Stats>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
upstream_manager: Arc<UpstreamManager>,
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
config_path: PathBuf,
|
||||
startup_detected_ip_v4: Option<IpAddr>,
|
||||
startup_detected_ip_v6: Option<IpAddr>,
|
||||
) {
|
||||
let listener = match TcpListener::bind(listen).await {
|
||||
Ok(listener) => listener,
|
||||
Err(error) => {
|
||||
warn!(
|
||||
error = %error,
|
||||
listen = %listen,
|
||||
"Failed to bind API listener"
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
info!("API endpoint: http://{}/v1/*", listen);
|
||||
|
||||
let shared = Arc::new(ApiShared {
|
||||
stats,
|
||||
ip_tracker,
|
||||
me_pool,
|
||||
upstream_manager,
|
||||
config_path,
|
||||
startup_detected_ip_v4,
|
||||
startup_detected_ip_v6,
|
||||
mutation_lock: Arc::new(Mutex::new(())),
|
||||
minimal_cache: Arc::new(Mutex::new(None)),
|
||||
request_id: Arc::new(AtomicU64::new(1)),
|
||||
});
|
||||
|
||||
loop {
|
||||
let (stream, peer) = match listener.accept().await {
|
||||
Ok(v) => v,
|
||||
Err(error) => {
|
||||
warn!(error = %error, "API accept error");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let shared_conn = shared.clone();
|
||||
let config_rx_conn = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
let svc = service_fn(move |req: Request<Incoming>| {
|
||||
let shared_req = shared_conn.clone();
|
||||
let config_rx_req = config_rx_conn.clone();
|
||||
async move { handle(req, peer, shared_req, config_rx_req).await }
|
||||
});
|
||||
if let Err(error) = http1::Builder::new()
|
||||
.serve_connection(hyper_util::rt::TokioIo::new(stream), svc)
|
||||
.await
|
||||
{
|
||||
debug!(error = %error, "API connection error");
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
req: Request<Incoming>,
|
||||
peer: SocketAddr,
|
||||
shared: Arc<ApiShared>,
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
) -> Result<Response<Full<Bytes>>, Infallible> {
|
||||
let request_id = shared.next_request_id();
|
||||
let cfg = config_rx.borrow().clone();
|
||||
let api_cfg = &cfg.server.api;
|
||||
|
||||
if !api_cfg.enabled {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"api_disabled",
|
||||
"API is disabled",
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
if !api_cfg.whitelist.is_empty()
|
||||
&& !api_cfg
|
||||
.whitelist
|
||||
.iter()
|
||||
.any(|net| net.contains(peer.ip()))
|
||||
{
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(StatusCode::FORBIDDEN, "forbidden", "Source IP is not allowed"),
|
||||
));
|
||||
}
|
||||
|
||||
if !api_cfg.auth_header.is_empty() {
|
||||
let auth_ok = req
|
||||
.headers()
|
||||
.get(AUTHORIZATION)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|v| v == api_cfg.auth_header)
|
||||
.unwrap_or(false);
|
||||
if !auth_ok {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::UNAUTHORIZED,
|
||||
"unauthorized",
|
||||
"Missing or invalid Authorization header",
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let method = req.method().clone();
|
||||
let path = req.uri().path().to_string();
|
||||
let body_limit = api_cfg.request_body_limit_bytes;
|
||||
|
||||
let result: Result<Response<Full<Bytes>>, ApiFailure> = async {
|
||||
match (method.as_str(), path.as_str()) {
|
||||
("GET", "/v1/health") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = HealthData {
|
||||
status: "ok",
|
||||
read_only: api_cfg.read_only,
|
||||
};
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/summary") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = SummaryData {
|
||||
uptime_seconds: shared.stats.uptime_secs(),
|
||||
connections_total: shared.stats.get_connects_all(),
|
||||
connections_bad_total: shared.stats.get_connects_bad(),
|
||||
handshake_timeouts_total: shared.stats.get_handshake_timeouts(),
|
||||
configured_users: cfg.access.users.len(),
|
||||
};
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/zero/all") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_zero_all_data(&shared.stats, cfg.access.users.len());
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/upstreams") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_upstreams_data(shared.as_ref(), api_cfg);
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/minimal/all") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_minimal_all_data(shared.as_ref(), api_cfg).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/me-writers") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_me_writers_data(shared.as_ref(), api_cfg).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/dcs") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_dcs_data(shared.as_ref(), api_cfg).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/users") | ("GET", "/v1/users") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
shared.startup_detected_ip_v4,
|
||||
shared.startup_detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
Ok(success_response(StatusCode::OK, users, revision))
|
||||
}
|
||||
("POST", "/v1/users") => {
|
||||
if api_cfg.read_only {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::FORBIDDEN,
|
||||
"read_only",
|
||||
"API runs in read-only mode",
|
||||
),
|
||||
));
|
||||
}
|
||||
let expected_revision = parse_if_match(req.headers());
|
||||
let body = read_json::<CreateUserRequest>(req.into_body(), body_limit).await?;
|
||||
let (data, revision) = create_user(body, expected_revision, &shared).await?;
|
||||
Ok(success_response(StatusCode::CREATED, data, revision))
|
||||
}
|
||||
_ => {
|
||||
if let Some(user) = path.strip_prefix("/v1/users/")
|
||||
&& !user.is_empty()
|
||||
&& !user.contains('/')
|
||||
{
|
||||
if method == Method::GET {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
shared.startup_detected_ip_v4,
|
||||
shared.startup_detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
if let Some(user_info) = users.into_iter().find(|entry| entry.username == user)
|
||||
{
|
||||
return Ok(success_response(StatusCode::OK, user_info, revision));
|
||||
}
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "User not found"),
|
||||
));
|
||||
}
|
||||
if method == Method::PATCH {
|
||||
if api_cfg.read_only {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::FORBIDDEN,
|
||||
"read_only",
|
||||
"API runs in read-only mode",
|
||||
),
|
||||
));
|
||||
}
|
||||
let expected_revision = parse_if_match(req.headers());
|
||||
let body = read_json::<PatchUserRequest>(req.into_body(), body_limit).await?;
|
||||
let (data, revision) =
|
||||
patch_user(user, body, expected_revision, &shared).await?;
|
||||
return Ok(success_response(StatusCode::OK, data, revision));
|
||||
}
|
||||
if method == Method::DELETE {
|
||||
if api_cfg.read_only {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::FORBIDDEN,
|
||||
"read_only",
|
||||
"API runs in read-only mode",
|
||||
),
|
||||
));
|
||||
}
|
||||
let expected_revision = parse_if_match(req.headers());
|
||||
let (deleted_user, revision) =
|
||||
delete_user(user, expected_revision, &shared).await?;
|
||||
return Ok(success_response(StatusCode::OK, deleted_user, revision));
|
||||
}
|
||||
if method == Method::POST
|
||||
&& let Some(base_user) = user.strip_suffix("/rotate-secret")
|
||||
&& !base_user.is_empty()
|
||||
&& !base_user.contains('/')
|
||||
{
|
||||
if api_cfg.read_only {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::FORBIDDEN,
|
||||
"read_only",
|
||||
"API runs in read-only mode",
|
||||
),
|
||||
));
|
||||
}
|
||||
let expected_revision = parse_if_match(req.headers());
|
||||
let body =
|
||||
read_optional_json::<RotateSecretRequest>(req.into_body(), body_limit)
|
||||
.await?;
|
||||
let (data, revision) =
|
||||
rotate_secret(base_user, body.unwrap_or_default(), expected_revision, &shared)
|
||||
.await?;
|
||||
return Ok(success_response(StatusCode::OK, data, revision));
|
||||
}
|
||||
if method == Method::POST {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "Route not found"),
|
||||
));
|
||||
}
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::METHOD_NOT_ALLOWED,
|
||||
"method_not_allowed",
|
||||
"Unsupported HTTP method for this route",
|
||||
),
|
||||
));
|
||||
}
|
||||
Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "Route not found"),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(error) => Ok(error_response(request_id, error)),
|
||||
}
|
||||
}
|
||||
|
||||
fn success_response<T: Serialize>(
|
||||
status: StatusCode,
|
||||
data: T,
|
||||
revision: String,
|
||||
) -> Response<Full<Bytes>> {
|
||||
let payload = SuccessResponse {
|
||||
ok: true,
|
||||
data,
|
||||
revision,
|
||||
};
|
||||
let body = serde_json::to_vec(&payload).unwrap_or_else(|_| b"{\"ok\":false}".to_vec());
|
||||
Response::builder()
|
||||
.status(status)
|
||||
.header("content-type", "application/json; charset=utf-8")
|
||||
.body(Full::new(Bytes::from(body)))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn error_response(request_id: u64, failure: ApiFailure) -> Response<Full<Bytes>> {
|
||||
let payload = ErrorResponse {
|
||||
ok: false,
|
||||
error: ErrorBody {
|
||||
code: failure.code,
|
||||
message: failure.message,
|
||||
},
|
||||
request_id,
|
||||
};
|
||||
let body = serde_json::to_vec(&payload).unwrap_or_else(|_| {
|
||||
format!(
|
||||
"{{\"ok\":false,\"error\":{{\"code\":\"internal_error\",\"message\":\"serialization failed\"}},\"request_id\":{}}}",
|
||||
request_id
|
||||
)
|
||||
.into_bytes()
|
||||
});
|
||||
Response::builder()
|
||||
.status(failure.status)
|
||||
.header("content-type", "application/json; charset=utf-8")
|
||||
.body(Full::new(Bytes::from(body)))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
async fn read_json<T: DeserializeOwned>(body: Incoming, limit: usize) -> Result<T, ApiFailure> {
|
||||
let bytes = read_body_with_limit(body, limit).await?;
|
||||
serde_json::from_slice(&bytes).map_err(|_| ApiFailure::bad_request("Invalid JSON body"))
|
||||
}
|
||||
|
||||
async fn read_optional_json<T: DeserializeOwned>(
|
||||
body: Incoming,
|
||||
limit: usize,
|
||||
) -> Result<Option<T>, ApiFailure> {
|
||||
let bytes = read_body_with_limit(body, limit).await?;
|
||||
if bytes.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
serde_json::from_slice(&bytes)
|
||||
.map(Some)
|
||||
.map_err(|_| ApiFailure::bad_request("Invalid JSON body"))
|
||||
}
|
||||
|
||||
async fn read_body_with_limit(body: Incoming, limit: usize) -> Result<Vec<u8>, ApiFailure> {
|
||||
let mut collected = Vec::new();
|
||||
let mut body = body;
|
||||
while let Some(frame_result) = body.frame().await {
|
||||
let frame = frame_result.map_err(|_| ApiFailure::bad_request("Invalid request body"))?;
|
||||
if let Some(chunk) = frame.data_ref() {
|
||||
if collected.len().saturating_add(chunk.len()) > limit {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::PAYLOAD_TOO_LARGE,
|
||||
"payload_too_large",
|
||||
format!("Body exceeds {} bytes", limit),
|
||||
));
|
||||
}
|
||||
collected.extend_from_slice(chunk);
|
||||
}
|
||||
}
|
||||
Ok(collected)
|
||||
}
|
||||
439
src/api/model.rs
Normal file
439
src/api/model.rs
Normal file
@@ -0,0 +1,439 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use hyper::StatusCode;
|
||||
use rand::Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
const MAX_USERNAME_LEN: usize = 64;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct ApiFailure {
|
||||
pub(super) status: StatusCode,
|
||||
pub(super) code: &'static str,
|
||||
pub(super) message: String,
|
||||
}
|
||||
|
||||
impl ApiFailure {
|
||||
pub(super) fn new(status: StatusCode, code: &'static str, message: impl Into<String>) -> Self {
|
||||
Self {
|
||||
status,
|
||||
code,
|
||||
message: message.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn internal(message: impl Into<String>) -> Self {
|
||||
Self::new(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", message)
|
||||
}
|
||||
|
||||
pub(super) fn bad_request(message: impl Into<String>) -> Self {
|
||||
Self::new(StatusCode::BAD_REQUEST, "bad_request", message)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct ErrorBody {
|
||||
pub(super) code: &'static str,
|
||||
pub(super) message: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct ErrorResponse {
|
||||
pub(super) ok: bool,
|
||||
pub(super) error: ErrorBody,
|
||||
pub(super) request_id: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct SuccessResponse<T> {
|
||||
pub(super) ok: bool,
|
||||
pub(super) data: T,
|
||||
pub(super) revision: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct HealthData {
|
||||
pub(super) status: &'static str,
|
||||
pub(super) read_only: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct SummaryData {
|
||||
pub(super) uptime_seconds: f64,
|
||||
pub(super) connections_total: u64,
|
||||
pub(super) connections_bad_total: u64,
|
||||
pub(super) handshake_timeouts_total: u64,
|
||||
pub(super) configured_users: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroCodeCount {
|
||||
pub(super) code: i32,
|
||||
pub(super) total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroCoreData {
|
||||
pub(super) uptime_seconds: f64,
|
||||
pub(super) connections_total: u64,
|
||||
pub(super) connections_bad_total: u64,
|
||||
pub(super) handshake_timeouts_total: u64,
|
||||
pub(super) configured_users: usize,
|
||||
pub(super) telemetry_core_enabled: bool,
|
||||
pub(super) telemetry_user_enabled: bool,
|
||||
pub(super) telemetry_me_level: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroUpstreamData {
|
||||
pub(super) connect_attempt_total: u64,
|
||||
pub(super) connect_success_total: u64,
|
||||
pub(super) connect_fail_total: u64,
|
||||
pub(super) connect_failfast_hard_error_total: u64,
|
||||
pub(super) connect_attempts_bucket_1: u64,
|
||||
pub(super) connect_attempts_bucket_2: u64,
|
||||
pub(super) connect_attempts_bucket_3_4: u64,
|
||||
pub(super) connect_attempts_bucket_gt_4: u64,
|
||||
pub(super) connect_duration_success_bucket_le_100ms: u64,
|
||||
pub(super) connect_duration_success_bucket_101_500ms: u64,
|
||||
pub(super) connect_duration_success_bucket_501_1000ms: u64,
|
||||
pub(super) connect_duration_success_bucket_gt_1000ms: u64,
|
||||
pub(super) connect_duration_fail_bucket_le_100ms: u64,
|
||||
pub(super) connect_duration_fail_bucket_101_500ms: u64,
|
||||
pub(super) connect_duration_fail_bucket_501_1000ms: u64,
|
||||
pub(super) connect_duration_fail_bucket_gt_1000ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct UpstreamDcStatus {
|
||||
pub(super) dc: i16,
|
||||
pub(super) latency_ema_ms: Option<f64>,
|
||||
pub(super) ip_preference: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct UpstreamStatus {
|
||||
pub(super) upstream_id: usize,
|
||||
pub(super) route_kind: &'static str,
|
||||
pub(super) address: String,
|
||||
pub(super) weight: u16,
|
||||
pub(super) scopes: String,
|
||||
pub(super) healthy: bool,
|
||||
pub(super) fails: u32,
|
||||
pub(super) last_check_age_secs: u64,
|
||||
pub(super) effective_latency_ms: Option<f64>,
|
||||
pub(super) dc: Vec<UpstreamDcStatus>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct UpstreamSummaryData {
|
||||
pub(super) configured_total: usize,
|
||||
pub(super) healthy_total: usize,
|
||||
pub(super) unhealthy_total: usize,
|
||||
pub(super) direct_total: usize,
|
||||
pub(super) socks4_total: usize,
|
||||
pub(super) socks5_total: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct UpstreamsData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) zero: ZeroUpstreamData,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) summary: Option<UpstreamSummaryData>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) upstreams: Option<Vec<UpstreamStatus>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroMiddleProxyData {
|
||||
pub(super) keepalive_sent_total: u64,
|
||||
pub(super) keepalive_failed_total: u64,
|
||||
pub(super) keepalive_pong_total: u64,
|
||||
pub(super) keepalive_timeout_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_sent_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_failed_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_skipped_no_meta_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_response_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_close_sent_total: u64,
|
||||
pub(super) reconnect_attempt_total: u64,
|
||||
pub(super) reconnect_success_total: u64,
|
||||
pub(super) handshake_reject_total: u64,
|
||||
pub(super) handshake_error_codes: Vec<ZeroCodeCount>,
|
||||
pub(super) reader_eof_total: u64,
|
||||
pub(super) idle_close_by_peer_total: u64,
|
||||
pub(super) route_drop_no_conn_total: u64,
|
||||
pub(super) route_drop_channel_closed_total: u64,
|
||||
pub(super) route_drop_queue_full_total: u64,
|
||||
pub(super) route_drop_queue_full_base_total: u64,
|
||||
pub(super) route_drop_queue_full_high_total: u64,
|
||||
pub(super) socks_kdf_strict_reject_total: u64,
|
||||
pub(super) socks_kdf_compat_fallback_total: u64,
|
||||
pub(super) endpoint_quarantine_total: u64,
|
||||
pub(super) kdf_drift_total: u64,
|
||||
pub(super) kdf_port_only_drift_total: u64,
|
||||
pub(super) hardswap_pending_reuse_total: u64,
|
||||
pub(super) hardswap_pending_ttl_expired_total: u64,
|
||||
pub(super) single_endpoint_outage_enter_total: u64,
|
||||
pub(super) single_endpoint_outage_exit_total: u64,
|
||||
pub(super) single_endpoint_outage_reconnect_attempt_total: u64,
|
||||
pub(super) single_endpoint_outage_reconnect_success_total: u64,
|
||||
pub(super) single_endpoint_quarantine_bypass_total: u64,
|
||||
pub(super) single_endpoint_shadow_rotate_total: u64,
|
||||
pub(super) single_endpoint_shadow_rotate_skipped_quarantine_total: u64,
|
||||
pub(super) floor_mode_switch_total: u64,
|
||||
pub(super) floor_mode_switch_static_to_adaptive_total: u64,
|
||||
pub(super) floor_mode_switch_adaptive_to_static_total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroPoolData {
|
||||
pub(super) pool_swap_total: u64,
|
||||
pub(super) pool_drain_active: u64,
|
||||
pub(super) pool_force_close_total: u64,
|
||||
pub(super) pool_stale_pick_total: u64,
|
||||
pub(super) writer_removed_total: u64,
|
||||
pub(super) writer_removed_unexpected_total: u64,
|
||||
pub(super) refill_triggered_total: u64,
|
||||
pub(super) refill_skipped_inflight_total: u64,
|
||||
pub(super) refill_failed_total: u64,
|
||||
pub(super) writer_restored_same_endpoint_total: u64,
|
||||
pub(super) writer_restored_fallback_total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroDesyncData {
|
||||
pub(super) secure_padding_invalid_total: u64,
|
||||
pub(super) desync_total: u64,
|
||||
pub(super) desync_full_logged_total: u64,
|
||||
pub(super) desync_suppressed_total: u64,
|
||||
pub(super) desync_frames_bucket_0: u64,
|
||||
pub(super) desync_frames_bucket_1_2: u64,
|
||||
pub(super) desync_frames_bucket_3_10: u64,
|
||||
pub(super) desync_frames_bucket_gt_10: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroAllData {
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) core: ZeroCoreData,
|
||||
pub(super) upstream: ZeroUpstreamData,
|
||||
pub(super) middle_proxy: ZeroMiddleProxyData,
|
||||
pub(super) pool: ZeroPoolData,
|
||||
pub(super) desync: ZeroDesyncData,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MeWritersSummary {
|
||||
pub(super) configured_dc_groups: usize,
|
||||
pub(super) configured_endpoints: usize,
|
||||
pub(super) available_endpoints: usize,
|
||||
pub(super) available_pct: f64,
|
||||
pub(super) required_writers: usize,
|
||||
pub(super) alive_writers: usize,
|
||||
pub(super) coverage_pct: f64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MeWriterStatus {
|
||||
pub(super) writer_id: u64,
|
||||
pub(super) dc: Option<i16>,
|
||||
pub(super) endpoint: String,
|
||||
pub(super) generation: u64,
|
||||
pub(super) state: &'static str,
|
||||
pub(super) draining: bool,
|
||||
pub(super) degraded: bool,
|
||||
pub(super) bound_clients: usize,
|
||||
pub(super) idle_for_secs: Option<u64>,
|
||||
pub(super) rtt_ema_ms: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MeWritersData {
|
||||
pub(super) middle_proxy_enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) summary: MeWritersSummary,
|
||||
pub(super) writers: Vec<MeWriterStatus>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct DcStatus {
|
||||
pub(super) dc: i16,
|
||||
pub(super) endpoints: Vec<String>,
|
||||
pub(super) available_endpoints: usize,
|
||||
pub(super) available_pct: f64,
|
||||
pub(super) required_writers: usize,
|
||||
pub(super) alive_writers: usize,
|
||||
pub(super) coverage_pct: f64,
|
||||
pub(super) rtt_ms: Option<f64>,
|
||||
pub(super) load: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct DcStatusData {
|
||||
pub(super) middle_proxy_enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) dcs: Vec<DcStatus>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalQuarantineData {
|
||||
pub(super) endpoint: String,
|
||||
pub(super) remaining_ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalDcPathData {
|
||||
pub(super) dc: i16,
|
||||
pub(super) ip_preference: Option<&'static str>,
|
||||
pub(super) selected_addr_v4: Option<String>,
|
||||
pub(super) selected_addr_v6: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalMeRuntimeData {
|
||||
pub(super) active_generation: u64,
|
||||
pub(super) warm_generation: u64,
|
||||
pub(super) pending_hardswap_generation: u64,
|
||||
pub(super) pending_hardswap_age_secs: Option<u64>,
|
||||
pub(super) hardswap_enabled: bool,
|
||||
pub(super) floor_mode: &'static str,
|
||||
pub(super) adaptive_floor_idle_secs: u64,
|
||||
pub(super) adaptive_floor_min_writers_single_endpoint: u8,
|
||||
pub(super) adaptive_floor_recover_grace_secs: u64,
|
||||
pub(super) me_keepalive_enabled: bool,
|
||||
pub(super) me_keepalive_interval_secs: u64,
|
||||
pub(super) me_keepalive_jitter_secs: u64,
|
||||
pub(super) me_keepalive_payload_random: bool,
|
||||
pub(super) rpc_proxy_req_every_secs: u64,
|
||||
pub(super) me_reconnect_max_concurrent_per_dc: u32,
|
||||
pub(super) me_reconnect_backoff_base_ms: u64,
|
||||
pub(super) me_reconnect_backoff_cap_ms: u64,
|
||||
pub(super) me_reconnect_fast_retry_count: u32,
|
||||
pub(super) me_pool_drain_ttl_secs: u64,
|
||||
pub(super) me_pool_force_close_secs: u64,
|
||||
pub(super) me_pool_min_fresh_ratio: f32,
|
||||
pub(super) me_bind_stale_mode: &'static str,
|
||||
pub(super) me_bind_stale_ttl_secs: u64,
|
||||
pub(super) me_single_endpoint_shadow_writers: u8,
|
||||
pub(super) me_single_endpoint_outage_mode_enabled: bool,
|
||||
pub(super) me_single_endpoint_outage_disable_quarantine: bool,
|
||||
pub(super) me_single_endpoint_outage_backoff_min_ms: u64,
|
||||
pub(super) me_single_endpoint_outage_backoff_max_ms: u64,
|
||||
pub(super) me_single_endpoint_shadow_rotate_every_secs: u64,
|
||||
pub(super) me_deterministic_writer_sort: bool,
|
||||
pub(super) me_socks_kdf_policy: &'static str,
|
||||
pub(super) quarantined_endpoints_total: usize,
|
||||
pub(super) quarantined_endpoints: Vec<MinimalQuarantineData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalAllPayload {
|
||||
pub(super) me_writers: MeWritersData,
|
||||
pub(super) dcs: DcStatusData,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) me_runtime: Option<MinimalMeRuntimeData>,
|
||||
pub(super) network_path: Vec<MinimalDcPathData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalAllData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) data: Option<MinimalAllPayload>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct UserLinks {
|
||||
pub(super) classic: Vec<String>,
|
||||
pub(super) secure: Vec<String>,
|
||||
pub(super) tls: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct UserInfo {
|
||||
pub(super) username: String,
|
||||
pub(super) user_ad_tag: Option<String>,
|
||||
pub(super) max_tcp_conns: Option<usize>,
|
||||
pub(super) expiration_rfc3339: Option<String>,
|
||||
pub(super) data_quota_bytes: Option<u64>,
|
||||
pub(super) max_unique_ips: Option<usize>,
|
||||
pub(super) current_connections: u64,
|
||||
pub(super) active_unique_ips: usize,
|
||||
pub(super) total_octets: u64,
|
||||
pub(super) links: UserLinks,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct CreateUserResponse {
|
||||
pub(super) user: UserInfo,
|
||||
pub(super) secret: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub(super) struct CreateUserRequest {
|
||||
pub(super) username: String,
|
||||
pub(super) secret: Option<String>,
|
||||
pub(super) user_ad_tag: Option<String>,
|
||||
pub(super) max_tcp_conns: Option<usize>,
|
||||
pub(super) expiration_rfc3339: Option<String>,
|
||||
pub(super) data_quota_bytes: Option<u64>,
|
||||
pub(super) max_unique_ips: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub(super) struct PatchUserRequest {
|
||||
pub(super) secret: Option<String>,
|
||||
pub(super) user_ad_tag: Option<String>,
|
||||
pub(super) max_tcp_conns: Option<usize>,
|
||||
pub(super) expiration_rfc3339: Option<String>,
|
||||
pub(super) data_quota_bytes: Option<u64>,
|
||||
pub(super) max_unique_ips: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Default, Deserialize)]
|
||||
pub(super) struct RotateSecretRequest {
|
||||
pub(super) secret: Option<String>,
|
||||
}
|
||||
|
||||
pub(super) fn parse_optional_expiration(
|
||||
value: Option<&str>,
|
||||
) -> Result<Option<DateTime<Utc>>, ApiFailure> {
|
||||
let Some(raw) = value else {
|
||||
return Ok(None);
|
||||
};
|
||||
let parsed = DateTime::parse_from_rfc3339(raw)
|
||||
.map_err(|_| ApiFailure::bad_request("expiration_rfc3339 must be valid RFC3339"))?;
|
||||
Ok(Some(parsed.with_timezone(&Utc)))
|
||||
}
|
||||
|
||||
pub(super) fn is_valid_user_secret(secret: &str) -> bool {
|
||||
secret.len() == 32 && secret.chars().all(|c| c.is_ascii_hexdigit())
|
||||
}
|
||||
|
||||
pub(super) fn is_valid_ad_tag(tag: &str) -> bool {
|
||||
tag.len() == 32 && tag.chars().all(|c| c.is_ascii_hexdigit())
|
||||
}
|
||||
|
||||
pub(super) fn is_valid_username(user: &str) -> bool {
|
||||
!user.is_empty()
|
||||
&& user.len() <= MAX_USERNAME_LEN
|
||||
&& user
|
||||
.chars()
|
||||
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.'))
|
||||
}
|
||||
|
||||
pub(super) fn random_user_secret() -> String {
|
||||
let mut bytes = [0u8; 16];
|
||||
rand::rng().fill(&mut bytes);
|
||||
hex::encode(bytes)
|
||||
}
|
||||
484
src/api/runtime_stats.rs
Normal file
484
src/api/runtime_stats.rs
Normal file
@@ -0,0 +1,484 @@
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use crate::config::ApiConfig;
|
||||
use crate::stats::Stats;
|
||||
use crate::transport::upstream::IpPreference;
|
||||
use crate::transport::UpstreamRouteKind;
|
||||
|
||||
use super::ApiShared;
|
||||
use super::model::{
|
||||
DcStatus, DcStatusData, MeWriterStatus, MeWritersData, MeWritersSummary, MinimalAllData,
|
||||
MinimalAllPayload, MinimalDcPathData, MinimalMeRuntimeData, MinimalQuarantineData,
|
||||
UpstreamDcStatus, UpstreamStatus, UpstreamSummaryData, UpstreamsData, ZeroAllData,
|
||||
ZeroCodeCount, ZeroCoreData, ZeroDesyncData, ZeroMiddleProxyData, ZeroPoolData,
|
||||
ZeroUpstreamData,
|
||||
};
|
||||
|
||||
const FEATURE_DISABLED_REASON: &str = "feature_disabled";
|
||||
const SOURCE_UNAVAILABLE_REASON: &str = "source_unavailable";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct MinimalCacheEntry {
|
||||
pub(super) expires_at: Instant,
|
||||
pub(super) payload: MinimalAllPayload,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
}
|
||||
|
||||
pub(super) fn build_zero_all_data(stats: &Stats, configured_users: usize) -> ZeroAllData {
|
||||
let telemetry = stats.telemetry_policy();
|
||||
let handshake_error_codes = stats
|
||||
.get_me_handshake_error_code_counts()
|
||||
.into_iter()
|
||||
.map(|(code, total)| ZeroCodeCount { code, total })
|
||||
.collect();
|
||||
|
||||
ZeroAllData {
|
||||
generated_at_epoch_secs: now_epoch_secs(),
|
||||
core: ZeroCoreData {
|
||||
uptime_seconds: stats.uptime_secs(),
|
||||
connections_total: stats.get_connects_all(),
|
||||
connections_bad_total: stats.get_connects_bad(),
|
||||
handshake_timeouts_total: stats.get_handshake_timeouts(),
|
||||
configured_users,
|
||||
telemetry_core_enabled: telemetry.core_enabled,
|
||||
telemetry_user_enabled: telemetry.user_enabled,
|
||||
telemetry_me_level: telemetry.me_level.to_string(),
|
||||
},
|
||||
upstream: build_zero_upstream_data(stats),
|
||||
middle_proxy: ZeroMiddleProxyData {
|
||||
keepalive_sent_total: stats.get_me_keepalive_sent(),
|
||||
keepalive_failed_total: stats.get_me_keepalive_failed(),
|
||||
keepalive_pong_total: stats.get_me_keepalive_pong(),
|
||||
keepalive_timeout_total: stats.get_me_keepalive_timeout(),
|
||||
rpc_proxy_req_signal_sent_total: stats.get_me_rpc_proxy_req_signal_sent_total(),
|
||||
rpc_proxy_req_signal_failed_total: stats.get_me_rpc_proxy_req_signal_failed_total(),
|
||||
rpc_proxy_req_signal_skipped_no_meta_total: stats
|
||||
.get_me_rpc_proxy_req_signal_skipped_no_meta_total(),
|
||||
rpc_proxy_req_signal_response_total: stats.get_me_rpc_proxy_req_signal_response_total(),
|
||||
rpc_proxy_req_signal_close_sent_total: stats
|
||||
.get_me_rpc_proxy_req_signal_close_sent_total(),
|
||||
reconnect_attempt_total: stats.get_me_reconnect_attempts(),
|
||||
reconnect_success_total: stats.get_me_reconnect_success(),
|
||||
handshake_reject_total: stats.get_me_handshake_reject_total(),
|
||||
handshake_error_codes,
|
||||
reader_eof_total: stats.get_me_reader_eof_total(),
|
||||
idle_close_by_peer_total: stats.get_me_idle_close_by_peer_total(),
|
||||
route_drop_no_conn_total: stats.get_me_route_drop_no_conn(),
|
||||
route_drop_channel_closed_total: stats.get_me_route_drop_channel_closed(),
|
||||
route_drop_queue_full_total: stats.get_me_route_drop_queue_full(),
|
||||
route_drop_queue_full_base_total: stats.get_me_route_drop_queue_full_base(),
|
||||
route_drop_queue_full_high_total: stats.get_me_route_drop_queue_full_high(),
|
||||
socks_kdf_strict_reject_total: stats.get_me_socks_kdf_strict_reject(),
|
||||
socks_kdf_compat_fallback_total: stats.get_me_socks_kdf_compat_fallback(),
|
||||
endpoint_quarantine_total: stats.get_me_endpoint_quarantine_total(),
|
||||
kdf_drift_total: stats.get_me_kdf_drift_total(),
|
||||
kdf_port_only_drift_total: stats.get_me_kdf_port_only_drift_total(),
|
||||
hardswap_pending_reuse_total: stats.get_me_hardswap_pending_reuse_total(),
|
||||
hardswap_pending_ttl_expired_total: stats.get_me_hardswap_pending_ttl_expired_total(),
|
||||
single_endpoint_outage_enter_total: stats.get_me_single_endpoint_outage_enter_total(),
|
||||
single_endpoint_outage_exit_total: stats.get_me_single_endpoint_outage_exit_total(),
|
||||
single_endpoint_outage_reconnect_attempt_total: stats
|
||||
.get_me_single_endpoint_outage_reconnect_attempt_total(),
|
||||
single_endpoint_outage_reconnect_success_total: stats
|
||||
.get_me_single_endpoint_outage_reconnect_success_total(),
|
||||
single_endpoint_quarantine_bypass_total: stats
|
||||
.get_me_single_endpoint_quarantine_bypass_total(),
|
||||
single_endpoint_shadow_rotate_total: stats.get_me_single_endpoint_shadow_rotate_total(),
|
||||
single_endpoint_shadow_rotate_skipped_quarantine_total: stats
|
||||
.get_me_single_endpoint_shadow_rotate_skipped_quarantine_total(),
|
||||
floor_mode_switch_total: stats.get_me_floor_mode_switch_total(),
|
||||
floor_mode_switch_static_to_adaptive_total: stats
|
||||
.get_me_floor_mode_switch_static_to_adaptive_total(),
|
||||
floor_mode_switch_adaptive_to_static_total: stats
|
||||
.get_me_floor_mode_switch_adaptive_to_static_total(),
|
||||
},
|
||||
pool: ZeroPoolData {
|
||||
pool_swap_total: stats.get_pool_swap_total(),
|
||||
pool_drain_active: stats.get_pool_drain_active(),
|
||||
pool_force_close_total: stats.get_pool_force_close_total(),
|
||||
pool_stale_pick_total: stats.get_pool_stale_pick_total(),
|
||||
writer_removed_total: stats.get_me_writer_removed_total(),
|
||||
writer_removed_unexpected_total: stats.get_me_writer_removed_unexpected_total(),
|
||||
refill_triggered_total: stats.get_me_refill_triggered_total(),
|
||||
refill_skipped_inflight_total: stats.get_me_refill_skipped_inflight_total(),
|
||||
refill_failed_total: stats.get_me_refill_failed_total(),
|
||||
writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(),
|
||||
writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(),
|
||||
},
|
||||
desync: ZeroDesyncData {
|
||||
secure_padding_invalid_total: stats.get_secure_padding_invalid(),
|
||||
desync_total: stats.get_desync_total(),
|
||||
desync_full_logged_total: stats.get_desync_full_logged(),
|
||||
desync_suppressed_total: stats.get_desync_suppressed(),
|
||||
desync_frames_bucket_0: stats.get_desync_frames_bucket_0(),
|
||||
desync_frames_bucket_1_2: stats.get_desync_frames_bucket_1_2(),
|
||||
desync_frames_bucket_3_10: stats.get_desync_frames_bucket_3_10(),
|
||||
desync_frames_bucket_gt_10: stats.get_desync_frames_bucket_gt_10(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn build_zero_upstream_data(stats: &Stats) -> ZeroUpstreamData {
|
||||
ZeroUpstreamData {
|
||||
connect_attempt_total: stats.get_upstream_connect_attempt_total(),
|
||||
connect_success_total: stats.get_upstream_connect_success_total(),
|
||||
connect_fail_total: stats.get_upstream_connect_fail_total(),
|
||||
connect_failfast_hard_error_total: stats.get_upstream_connect_failfast_hard_error_total(),
|
||||
connect_attempts_bucket_1: stats.get_upstream_connect_attempts_bucket_1(),
|
||||
connect_attempts_bucket_2: stats.get_upstream_connect_attempts_bucket_2(),
|
||||
connect_attempts_bucket_3_4: stats.get_upstream_connect_attempts_bucket_3_4(),
|
||||
connect_attempts_bucket_gt_4: stats.get_upstream_connect_attempts_bucket_gt_4(),
|
||||
connect_duration_success_bucket_le_100ms: stats
|
||||
.get_upstream_connect_duration_success_bucket_le_100ms(),
|
||||
connect_duration_success_bucket_101_500ms: stats
|
||||
.get_upstream_connect_duration_success_bucket_101_500ms(),
|
||||
connect_duration_success_bucket_501_1000ms: stats
|
||||
.get_upstream_connect_duration_success_bucket_501_1000ms(),
|
||||
connect_duration_success_bucket_gt_1000ms: stats
|
||||
.get_upstream_connect_duration_success_bucket_gt_1000ms(),
|
||||
connect_duration_fail_bucket_le_100ms: stats.get_upstream_connect_duration_fail_bucket_le_100ms(),
|
||||
connect_duration_fail_bucket_101_500ms: stats
|
||||
.get_upstream_connect_duration_fail_bucket_101_500ms(),
|
||||
connect_duration_fail_bucket_501_1000ms: stats
|
||||
.get_upstream_connect_duration_fail_bucket_501_1000ms(),
|
||||
connect_duration_fail_bucket_gt_1000ms: stats
|
||||
.get_upstream_connect_duration_fail_bucket_gt_1000ms(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn build_upstreams_data(shared: &ApiShared, api_cfg: &ApiConfig) -> UpstreamsData {
|
||||
let generated_at_epoch_secs = now_epoch_secs();
|
||||
let zero = build_zero_upstream_data(&shared.stats);
|
||||
if !api_cfg.minimal_runtime_enabled {
|
||||
return UpstreamsData {
|
||||
enabled: false,
|
||||
reason: Some(FEATURE_DISABLED_REASON),
|
||||
generated_at_epoch_secs,
|
||||
zero,
|
||||
summary: None,
|
||||
upstreams: None,
|
||||
};
|
||||
}
|
||||
|
||||
let Some(snapshot) = shared.upstream_manager.try_api_snapshot() else {
|
||||
return UpstreamsData {
|
||||
enabled: true,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs,
|
||||
zero,
|
||||
summary: None,
|
||||
upstreams: None,
|
||||
};
|
||||
};
|
||||
|
||||
let summary = UpstreamSummaryData {
|
||||
configured_total: snapshot.summary.configured_total,
|
||||
healthy_total: snapshot.summary.healthy_total,
|
||||
unhealthy_total: snapshot.summary.unhealthy_total,
|
||||
direct_total: snapshot.summary.direct_total,
|
||||
socks4_total: snapshot.summary.socks4_total,
|
||||
socks5_total: snapshot.summary.socks5_total,
|
||||
};
|
||||
let upstreams = snapshot
|
||||
.upstreams
|
||||
.into_iter()
|
||||
.map(|upstream| UpstreamStatus {
|
||||
upstream_id: upstream.upstream_id,
|
||||
route_kind: map_route_kind(upstream.route_kind),
|
||||
address: upstream.address,
|
||||
weight: upstream.weight,
|
||||
scopes: upstream.scopes,
|
||||
healthy: upstream.healthy,
|
||||
fails: upstream.fails,
|
||||
last_check_age_secs: upstream.last_check_age_secs,
|
||||
effective_latency_ms: upstream.effective_latency_ms,
|
||||
dc: upstream
|
||||
.dc
|
||||
.into_iter()
|
||||
.map(|dc| UpstreamDcStatus {
|
||||
dc: dc.dc,
|
||||
latency_ema_ms: dc.latency_ema_ms,
|
||||
ip_preference: map_ip_preference(dc.ip_preference),
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
UpstreamsData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
zero,
|
||||
summary: Some(summary),
|
||||
upstreams: Some(upstreams),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_minimal_all_data(
|
||||
shared: &ApiShared,
|
||||
api_cfg: &ApiConfig,
|
||||
) -> MinimalAllData {
|
||||
let now = now_epoch_secs();
|
||||
if !api_cfg.minimal_runtime_enabled {
|
||||
return MinimalAllData {
|
||||
enabled: false,
|
||||
reason: Some(FEATURE_DISABLED_REASON),
|
||||
generated_at_epoch_secs: now,
|
||||
data: None,
|
||||
};
|
||||
}
|
||||
|
||||
let Some((generated_at_epoch_secs, payload)) =
|
||||
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
|
||||
else {
|
||||
return MinimalAllData {
|
||||
enabled: true,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs: now,
|
||||
data: Some(MinimalAllPayload {
|
||||
me_writers: disabled_me_writers(now, SOURCE_UNAVAILABLE_REASON),
|
||||
dcs: disabled_dcs(now, SOURCE_UNAVAILABLE_REASON),
|
||||
me_runtime: None,
|
||||
network_path: Vec::new(),
|
||||
}),
|
||||
};
|
||||
};
|
||||
|
||||
MinimalAllData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
data: Some(payload),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_me_writers_data(
|
||||
shared: &ApiShared,
|
||||
api_cfg: &ApiConfig,
|
||||
) -> MeWritersData {
|
||||
let now = now_epoch_secs();
|
||||
if !api_cfg.minimal_runtime_enabled {
|
||||
return disabled_me_writers(now, FEATURE_DISABLED_REASON);
|
||||
}
|
||||
|
||||
let Some((_, payload)) =
|
||||
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
|
||||
else {
|
||||
return disabled_me_writers(now, SOURCE_UNAVAILABLE_REASON);
|
||||
};
|
||||
payload.me_writers
|
||||
}
|
||||
|
||||
pub(super) async fn build_dcs_data(shared: &ApiShared, api_cfg: &ApiConfig) -> DcStatusData {
|
||||
let now = now_epoch_secs();
|
||||
if !api_cfg.minimal_runtime_enabled {
|
||||
return disabled_dcs(now, FEATURE_DISABLED_REASON);
|
||||
}
|
||||
|
||||
let Some((_, payload)) =
|
||||
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
|
||||
else {
|
||||
return disabled_dcs(now, SOURCE_UNAVAILABLE_REASON);
|
||||
};
|
||||
payload.dcs
|
||||
}
|
||||
|
||||
async fn get_minimal_payload_cached(
|
||||
shared: &ApiShared,
|
||||
cache_ttl_ms: u64,
|
||||
) -> Option<(u64, MinimalAllPayload)> {
|
||||
if cache_ttl_ms > 0 {
|
||||
let now = Instant::now();
|
||||
let cached = shared.minimal_cache.lock().await.clone();
|
||||
if let Some(entry) = cached
|
||||
&& now < entry.expires_at
|
||||
{
|
||||
return Some((entry.generated_at_epoch_secs, entry.payload));
|
||||
}
|
||||
}
|
||||
|
||||
let pool = shared.me_pool.as_ref()?;
|
||||
let status = pool.api_status_snapshot().await;
|
||||
let runtime = pool.api_runtime_snapshot().await;
|
||||
let generated_at_epoch_secs = status.generated_at_epoch_secs;
|
||||
|
||||
let me_writers = MeWritersData {
|
||||
middle_proxy_enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
summary: MeWritersSummary {
|
||||
configured_dc_groups: status.configured_dc_groups,
|
||||
configured_endpoints: status.configured_endpoints,
|
||||
available_endpoints: status.available_endpoints,
|
||||
available_pct: status.available_pct,
|
||||
required_writers: status.required_writers,
|
||||
alive_writers: status.alive_writers,
|
||||
coverage_pct: status.coverage_pct,
|
||||
},
|
||||
writers: status
|
||||
.writers
|
||||
.into_iter()
|
||||
.map(|entry| MeWriterStatus {
|
||||
writer_id: entry.writer_id,
|
||||
dc: entry.dc,
|
||||
endpoint: entry.endpoint.to_string(),
|
||||
generation: entry.generation,
|
||||
state: entry.state,
|
||||
draining: entry.draining,
|
||||
degraded: entry.degraded,
|
||||
bound_clients: entry.bound_clients,
|
||||
idle_for_secs: entry.idle_for_secs,
|
||||
rtt_ema_ms: entry.rtt_ema_ms,
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
let dcs = DcStatusData {
|
||||
middle_proxy_enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
dcs: status
|
||||
.dcs
|
||||
.into_iter()
|
||||
.map(|entry| DcStatus {
|
||||
dc: entry.dc,
|
||||
endpoints: entry
|
||||
.endpoints
|
||||
.into_iter()
|
||||
.map(|value| value.to_string())
|
||||
.collect(),
|
||||
available_endpoints: entry.available_endpoints,
|
||||
available_pct: entry.available_pct,
|
||||
required_writers: entry.required_writers,
|
||||
alive_writers: entry.alive_writers,
|
||||
coverage_pct: entry.coverage_pct,
|
||||
rtt_ms: entry.rtt_ms,
|
||||
load: entry.load,
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
let me_runtime = MinimalMeRuntimeData {
|
||||
active_generation: runtime.active_generation,
|
||||
warm_generation: runtime.warm_generation,
|
||||
pending_hardswap_generation: runtime.pending_hardswap_generation,
|
||||
pending_hardswap_age_secs: runtime.pending_hardswap_age_secs,
|
||||
hardswap_enabled: runtime.hardswap_enabled,
|
||||
floor_mode: runtime.floor_mode,
|
||||
adaptive_floor_idle_secs: runtime.adaptive_floor_idle_secs,
|
||||
adaptive_floor_min_writers_single_endpoint: runtime
|
||||
.adaptive_floor_min_writers_single_endpoint,
|
||||
adaptive_floor_recover_grace_secs: runtime.adaptive_floor_recover_grace_secs,
|
||||
me_keepalive_enabled: runtime.me_keepalive_enabled,
|
||||
me_keepalive_interval_secs: runtime.me_keepalive_interval_secs,
|
||||
me_keepalive_jitter_secs: runtime.me_keepalive_jitter_secs,
|
||||
me_keepalive_payload_random: runtime.me_keepalive_payload_random,
|
||||
rpc_proxy_req_every_secs: runtime.rpc_proxy_req_every_secs,
|
||||
me_reconnect_max_concurrent_per_dc: runtime.me_reconnect_max_concurrent_per_dc,
|
||||
me_reconnect_backoff_base_ms: runtime.me_reconnect_backoff_base_ms,
|
||||
me_reconnect_backoff_cap_ms: runtime.me_reconnect_backoff_cap_ms,
|
||||
me_reconnect_fast_retry_count: runtime.me_reconnect_fast_retry_count,
|
||||
me_pool_drain_ttl_secs: runtime.me_pool_drain_ttl_secs,
|
||||
me_pool_force_close_secs: runtime.me_pool_force_close_secs,
|
||||
me_pool_min_fresh_ratio: runtime.me_pool_min_fresh_ratio,
|
||||
me_bind_stale_mode: runtime.me_bind_stale_mode,
|
||||
me_bind_stale_ttl_secs: runtime.me_bind_stale_ttl_secs,
|
||||
me_single_endpoint_shadow_writers: runtime.me_single_endpoint_shadow_writers,
|
||||
me_single_endpoint_outage_mode_enabled: runtime.me_single_endpoint_outage_mode_enabled,
|
||||
me_single_endpoint_outage_disable_quarantine: runtime
|
||||
.me_single_endpoint_outage_disable_quarantine,
|
||||
me_single_endpoint_outage_backoff_min_ms: runtime.me_single_endpoint_outage_backoff_min_ms,
|
||||
me_single_endpoint_outage_backoff_max_ms: runtime.me_single_endpoint_outage_backoff_max_ms,
|
||||
me_single_endpoint_shadow_rotate_every_secs: runtime
|
||||
.me_single_endpoint_shadow_rotate_every_secs,
|
||||
me_deterministic_writer_sort: runtime.me_deterministic_writer_sort,
|
||||
me_socks_kdf_policy: runtime.me_socks_kdf_policy,
|
||||
quarantined_endpoints_total: runtime.quarantined_endpoints.len(),
|
||||
quarantined_endpoints: runtime
|
||||
.quarantined_endpoints
|
||||
.into_iter()
|
||||
.map(|entry| MinimalQuarantineData {
|
||||
endpoint: entry.endpoint.to_string(),
|
||||
remaining_ms: entry.remaining_ms,
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
let network_path = runtime
|
||||
.network_path
|
||||
.into_iter()
|
||||
.map(|entry| MinimalDcPathData {
|
||||
dc: entry.dc,
|
||||
ip_preference: entry.ip_preference,
|
||||
selected_addr_v4: entry.selected_addr_v4.map(|value| value.to_string()),
|
||||
selected_addr_v6: entry.selected_addr_v6.map(|value| value.to_string()),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let payload = MinimalAllPayload {
|
||||
me_writers,
|
||||
dcs,
|
||||
me_runtime: Some(me_runtime),
|
||||
network_path,
|
||||
};
|
||||
|
||||
if cache_ttl_ms > 0 {
|
||||
let entry = MinimalCacheEntry {
|
||||
expires_at: Instant::now() + Duration::from_millis(cache_ttl_ms),
|
||||
payload: payload.clone(),
|
||||
generated_at_epoch_secs,
|
||||
};
|
||||
*shared.minimal_cache.lock().await = Some(entry);
|
||||
}
|
||||
|
||||
Some((generated_at_epoch_secs, payload))
|
||||
}
|
||||
|
||||
fn disabled_me_writers(now_epoch_secs: u64, reason: &'static str) -> MeWritersData {
|
||||
MeWritersData {
|
||||
middle_proxy_enabled: false,
|
||||
reason: Some(reason),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
summary: MeWritersSummary {
|
||||
configured_dc_groups: 0,
|
||||
configured_endpoints: 0,
|
||||
available_endpoints: 0,
|
||||
available_pct: 0.0,
|
||||
required_writers: 0,
|
||||
alive_writers: 0,
|
||||
coverage_pct: 0.0,
|
||||
},
|
||||
writers: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn disabled_dcs(now_epoch_secs: u64, reason: &'static str) -> DcStatusData {
|
||||
DcStatusData {
|
||||
middle_proxy_enabled: false,
|
||||
reason: Some(reason),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
dcs: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_route_kind(value: UpstreamRouteKind) -> &'static str {
|
||||
match value {
|
||||
UpstreamRouteKind::Direct => "direct",
|
||||
UpstreamRouteKind::Socks4 => "socks4",
|
||||
UpstreamRouteKind::Socks5 => "socks5",
|
||||
}
|
||||
}
|
||||
|
||||
fn map_ip_preference(value: IpPreference) -> &'static str {
|
||||
match value {
|
||||
IpPreference::Unknown => "unknown",
|
||||
IpPreference::PreferV6 => "prefer_v6",
|
||||
IpPreference::PreferV4 => "prefer_v4",
|
||||
IpPreference::BothWork => "both_work",
|
||||
IpPreference::Unavailable => "unavailable",
|
||||
}
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
490
src/api/users.rs
Normal file
490
src/api/users.rs
Normal file
@@ -0,0 +1,490 @@
|
||||
use std::collections::HashMap;
|
||||
use std::net::IpAddr;
|
||||
|
||||
use hyper::StatusCode;
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::stats::Stats;
|
||||
|
||||
use super::ApiShared;
|
||||
use super::config_store::{
|
||||
ensure_expected_revision, load_config_from_disk, save_config_to_disk,
|
||||
};
|
||||
use super::model::{
|
||||
ApiFailure, CreateUserRequest, CreateUserResponse, PatchUserRequest, RotateSecretRequest,
|
||||
UserInfo, UserLinks, is_valid_ad_tag, is_valid_user_secret, is_valid_username,
|
||||
parse_optional_expiration, random_user_secret,
|
||||
};
|
||||
|
||||
pub(super) async fn create_user(
|
||||
body: CreateUserRequest,
|
||||
expected_revision: Option<String>,
|
||||
shared: &ApiShared,
|
||||
) -> Result<(CreateUserResponse, String), ApiFailure> {
|
||||
if !is_valid_username(&body.username) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"username must match [A-Za-z0-9_.-] and be 1..64 chars",
|
||||
));
|
||||
}
|
||||
|
||||
let secret = match body.secret {
|
||||
Some(secret) => {
|
||||
if !is_valid_user_secret(&secret) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"secret must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
secret
|
||||
}
|
||||
None => random_user_secret(),
|
||||
};
|
||||
|
||||
if let Some(ad_tag) = body.user_ad_tag.as_ref() && !is_valid_ad_tag(ad_tag) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"user_ad_tag must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
|
||||
let expiration = parse_optional_expiration(body.expiration_rfc3339.as_deref())?;
|
||||
let _guard = shared.mutation_lock.lock().await;
|
||||
let mut cfg = load_config_from_disk(&shared.config_path).await?;
|
||||
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
|
||||
|
||||
if cfg.access.users.contains_key(&body.username) {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::CONFLICT,
|
||||
"user_exists",
|
||||
"User already exists",
|
||||
));
|
||||
}
|
||||
|
||||
cfg.access.users.insert(body.username.clone(), secret.clone());
|
||||
if let Some(ad_tag) = body.user_ad_tag {
|
||||
cfg.access.user_ad_tags.insert(body.username.clone(), ad_tag);
|
||||
}
|
||||
if let Some(limit) = body.max_tcp_conns {
|
||||
cfg.access.user_max_tcp_conns.insert(body.username.clone(), limit);
|
||||
}
|
||||
if let Some(expiration) = expiration {
|
||||
cfg.access
|
||||
.user_expirations
|
||||
.insert(body.username.clone(), expiration);
|
||||
}
|
||||
if let Some(quota) = body.data_quota_bytes {
|
||||
cfg.access.user_data_quota.insert(body.username.clone(), quota);
|
||||
}
|
||||
|
||||
let updated_limit = body.max_unique_ips;
|
||||
if let Some(limit) = updated_limit {
|
||||
cfg.access
|
||||
.user_max_unique_ips
|
||||
.insert(body.username.clone(), limit);
|
||||
}
|
||||
|
||||
cfg.validate()
|
||||
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
|
||||
|
||||
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
|
||||
drop(_guard);
|
||||
|
||||
if let Some(limit) = updated_limit {
|
||||
shared.ip_tracker.set_user_limit(&body.username, limit).await;
|
||||
}
|
||||
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
shared.startup_detected_ip_v4,
|
||||
shared.startup_detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
let user = users
|
||||
.into_iter()
|
||||
.find(|entry| entry.username == body.username)
|
||||
.unwrap_or(UserInfo {
|
||||
username: body.username.clone(),
|
||||
user_ad_tag: None,
|
||||
max_tcp_conns: None,
|
||||
expiration_rfc3339: None,
|
||||
data_quota_bytes: None,
|
||||
max_unique_ips: updated_limit,
|
||||
current_connections: 0,
|
||||
active_unique_ips: 0,
|
||||
total_octets: 0,
|
||||
links: build_user_links(
|
||||
&cfg,
|
||||
&secret,
|
||||
shared.startup_detected_ip_v4,
|
||||
shared.startup_detected_ip_v6,
|
||||
),
|
||||
});
|
||||
|
||||
Ok((CreateUserResponse { user, secret }, revision))
|
||||
}
|
||||
|
||||
pub(super) async fn patch_user(
|
||||
user: &str,
|
||||
body: PatchUserRequest,
|
||||
expected_revision: Option<String>,
|
||||
shared: &ApiShared,
|
||||
) -> Result<(UserInfo, String), ApiFailure> {
|
||||
if let Some(secret) = body.secret.as_ref() && !is_valid_user_secret(secret) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"secret must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
if let Some(ad_tag) = body.user_ad_tag.as_ref() && !is_valid_ad_tag(ad_tag) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"user_ad_tag must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
let expiration = parse_optional_expiration(body.expiration_rfc3339.as_deref())?;
|
||||
let _guard = shared.mutation_lock.lock().await;
|
||||
let mut cfg = load_config_from_disk(&shared.config_path).await?;
|
||||
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
|
||||
|
||||
if !cfg.access.users.contains_key(user) {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::NOT_FOUND,
|
||||
"not_found",
|
||||
"User not found",
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(secret) = body.secret {
|
||||
cfg.access.users.insert(user.to_string(), secret);
|
||||
}
|
||||
if let Some(ad_tag) = body.user_ad_tag {
|
||||
cfg.access.user_ad_tags.insert(user.to_string(), ad_tag);
|
||||
}
|
||||
if let Some(limit) = body.max_tcp_conns {
|
||||
cfg.access.user_max_tcp_conns.insert(user.to_string(), limit);
|
||||
}
|
||||
if let Some(expiration) = expiration {
|
||||
cfg.access.user_expirations.insert(user.to_string(), expiration);
|
||||
}
|
||||
if let Some(quota) = body.data_quota_bytes {
|
||||
cfg.access.user_data_quota.insert(user.to_string(), quota);
|
||||
}
|
||||
|
||||
let mut updated_limit = None;
|
||||
if let Some(limit) = body.max_unique_ips {
|
||||
cfg.access.user_max_unique_ips.insert(user.to_string(), limit);
|
||||
updated_limit = Some(limit);
|
||||
}
|
||||
|
||||
cfg.validate()
|
||||
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
|
||||
|
||||
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
|
||||
drop(_guard);
|
||||
if let Some(limit) = updated_limit {
|
||||
shared.ip_tracker.set_user_limit(user, limit).await;
|
||||
}
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
shared.startup_detected_ip_v4,
|
||||
shared.startup_detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
let user_info = users
|
||||
.into_iter()
|
||||
.find(|entry| entry.username == user)
|
||||
.ok_or_else(|| ApiFailure::internal("failed to build updated user view"))?;
|
||||
|
||||
Ok((user_info, revision))
|
||||
}
|
||||
|
||||
pub(super) async fn rotate_secret(
|
||||
user: &str,
|
||||
body: RotateSecretRequest,
|
||||
expected_revision: Option<String>,
|
||||
shared: &ApiShared,
|
||||
) -> Result<(CreateUserResponse, String), ApiFailure> {
|
||||
let secret = body.secret.unwrap_or_else(random_user_secret);
|
||||
if !is_valid_user_secret(&secret) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"secret must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
|
||||
let _guard = shared.mutation_lock.lock().await;
|
||||
let mut cfg = load_config_from_disk(&shared.config_path).await?;
|
||||
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
|
||||
|
||||
if !cfg.access.users.contains_key(user) {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::NOT_FOUND,
|
||||
"not_found",
|
||||
"User not found",
|
||||
));
|
||||
}
|
||||
|
||||
cfg.access.users.insert(user.to_string(), secret.clone());
|
||||
cfg.validate()
|
||||
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
|
||||
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
|
||||
drop(_guard);
|
||||
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
shared.startup_detected_ip_v4,
|
||||
shared.startup_detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
let user_info = users
|
||||
.into_iter()
|
||||
.find(|entry| entry.username == user)
|
||||
.ok_or_else(|| ApiFailure::internal("failed to build updated user view"))?;
|
||||
|
||||
Ok((
|
||||
CreateUserResponse {
|
||||
user: user_info,
|
||||
secret,
|
||||
},
|
||||
revision,
|
||||
))
|
||||
}
|
||||
|
||||
pub(super) async fn delete_user(
|
||||
user: &str,
|
||||
expected_revision: Option<String>,
|
||||
shared: &ApiShared,
|
||||
) -> Result<(String, String), ApiFailure> {
|
||||
let _guard = shared.mutation_lock.lock().await;
|
||||
let mut cfg = load_config_from_disk(&shared.config_path).await?;
|
||||
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
|
||||
|
||||
if !cfg.access.users.contains_key(user) {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::NOT_FOUND,
|
||||
"not_found",
|
||||
"User not found",
|
||||
));
|
||||
}
|
||||
if cfg.access.users.len() <= 1 {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::CONFLICT,
|
||||
"last_user_forbidden",
|
||||
"Cannot delete the last configured user",
|
||||
));
|
||||
}
|
||||
|
||||
cfg.access.users.remove(user);
|
||||
cfg.access.user_ad_tags.remove(user);
|
||||
cfg.access.user_max_tcp_conns.remove(user);
|
||||
cfg.access.user_expirations.remove(user);
|
||||
cfg.access.user_data_quota.remove(user);
|
||||
cfg.access.user_max_unique_ips.remove(user);
|
||||
|
||||
cfg.validate()
|
||||
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
|
||||
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
|
||||
drop(_guard);
|
||||
shared.ip_tracker.clear_user_ips(user).await;
|
||||
|
||||
Ok((user.to_string(), revision))
|
||||
}
|
||||
|
||||
pub(super) async fn users_from_config(
|
||||
cfg: &ProxyConfig,
|
||||
stats: &Stats,
|
||||
ip_tracker: &UserIpTracker,
|
||||
startup_detected_ip_v4: Option<IpAddr>,
|
||||
startup_detected_ip_v6: Option<IpAddr>,
|
||||
) -> Vec<UserInfo> {
|
||||
let ip_counts = ip_tracker
|
||||
.get_stats()
|
||||
.await
|
||||
.into_iter()
|
||||
.map(|(user, count, _)| (user, count))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let mut names = cfg.access.users.keys().cloned().collect::<Vec<_>>();
|
||||
names.sort();
|
||||
|
||||
let mut users = Vec::with_capacity(names.len());
|
||||
for username in names {
|
||||
let links = cfg
|
||||
.access
|
||||
.users
|
||||
.get(&username)
|
||||
.map(|secret| {
|
||||
build_user_links(
|
||||
cfg,
|
||||
secret,
|
||||
startup_detected_ip_v4,
|
||||
startup_detected_ip_v6,
|
||||
)
|
||||
})
|
||||
.unwrap_or(UserLinks {
|
||||
classic: Vec::new(),
|
||||
secure: Vec::new(),
|
||||
tls: Vec::new(),
|
||||
});
|
||||
users.push(UserInfo {
|
||||
user_ad_tag: cfg.access.user_ad_tags.get(&username).cloned(),
|
||||
max_tcp_conns: cfg.access.user_max_tcp_conns.get(&username).copied(),
|
||||
expiration_rfc3339: cfg
|
||||
.access
|
||||
.user_expirations
|
||||
.get(&username)
|
||||
.map(chrono::DateTime::<chrono::Utc>::to_rfc3339),
|
||||
data_quota_bytes: cfg.access.user_data_quota.get(&username).copied(),
|
||||
max_unique_ips: cfg.access.user_max_unique_ips.get(&username).copied(),
|
||||
current_connections: stats.get_user_curr_connects(&username),
|
||||
active_unique_ips: ip_counts.get(&username).copied().unwrap_or(0),
|
||||
total_octets: stats.get_user_total_octets(&username),
|
||||
links,
|
||||
username,
|
||||
});
|
||||
}
|
||||
users
|
||||
}
|
||||
|
||||
fn build_user_links(
|
||||
cfg: &ProxyConfig,
|
||||
secret: &str,
|
||||
startup_detected_ip_v4: Option<IpAddr>,
|
||||
startup_detected_ip_v6: Option<IpAddr>,
|
||||
) -> UserLinks {
|
||||
let hosts = resolve_link_hosts(cfg, startup_detected_ip_v4, startup_detected_ip_v6);
|
||||
let port = cfg.general.links.public_port.unwrap_or(cfg.server.port);
|
||||
let tls_domains = resolve_tls_domains(cfg);
|
||||
|
||||
let mut classic = Vec::new();
|
||||
let mut secure = Vec::new();
|
||||
let mut tls = Vec::new();
|
||||
|
||||
for host in &hosts {
|
||||
if cfg.general.modes.classic {
|
||||
classic.push(format!(
|
||||
"tg://proxy?server={}&port={}&secret={}",
|
||||
host, port, secret
|
||||
));
|
||||
}
|
||||
if cfg.general.modes.secure {
|
||||
secure.push(format!(
|
||||
"tg://proxy?server={}&port={}&secret=dd{}",
|
||||
host, port, secret
|
||||
));
|
||||
}
|
||||
if cfg.general.modes.tls {
|
||||
for domain in &tls_domains {
|
||||
let domain_hex = hex::encode(domain);
|
||||
tls.push(format!(
|
||||
"tg://proxy?server={}&port={}&secret=ee{}{}",
|
||||
host, port, secret, domain_hex
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UserLinks {
|
||||
classic,
|
||||
secure,
|
||||
tls,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_link_hosts(
|
||||
cfg: &ProxyConfig,
|
||||
startup_detected_ip_v4: Option<IpAddr>,
|
||||
startup_detected_ip_v6: Option<IpAddr>,
|
||||
) -> Vec<String> {
|
||||
if let Some(host) = cfg
|
||||
.general
|
||||
.links
|
||||
.public_host
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
return vec![host.to_string()];
|
||||
}
|
||||
|
||||
let mut startup_hosts = Vec::new();
|
||||
if let Some(ip) = startup_detected_ip_v4 {
|
||||
push_unique_host(&mut startup_hosts, &ip.to_string());
|
||||
}
|
||||
if let Some(ip) = startup_detected_ip_v6 {
|
||||
push_unique_host(&mut startup_hosts, &ip.to_string());
|
||||
}
|
||||
if !startup_hosts.is_empty() {
|
||||
return startup_hosts;
|
||||
}
|
||||
|
||||
let mut hosts = Vec::new();
|
||||
for listener in &cfg.server.listeners {
|
||||
if let Some(host) = listener
|
||||
.announce
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
push_unique_host(&mut hosts, host);
|
||||
continue;
|
||||
}
|
||||
if let Some(ip) = listener.announce_ip {
|
||||
if !ip.is_unspecified() {
|
||||
push_unique_host(&mut hosts, &ip.to_string());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if !listener.ip.is_unspecified() {
|
||||
push_unique_host(&mut hosts, &listener.ip.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
if hosts.is_empty() {
|
||||
if let Some(host) = cfg.server.listen_addr_ipv4.as_deref() {
|
||||
push_host_from_legacy_listen(&mut hosts, host);
|
||||
}
|
||||
if let Some(host) = cfg.server.listen_addr_ipv6.as_deref() {
|
||||
push_host_from_legacy_listen(&mut hosts, host);
|
||||
}
|
||||
}
|
||||
|
||||
hosts
|
||||
}
|
||||
|
||||
fn push_host_from_legacy_listen(hosts: &mut Vec<String>, raw: &str) {
|
||||
let candidate = raw.trim();
|
||||
if candidate.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
match candidate.parse::<IpAddr>() {
|
||||
Ok(ip) if ip.is_unspecified() => {}
|
||||
Ok(ip) => push_unique_host(hosts, &ip.to_string()),
|
||||
Err(_) => push_unique_host(hosts, candidate),
|
||||
}
|
||||
}
|
||||
|
||||
fn push_unique_host(hosts: &mut Vec<String>, candidate: &str) {
|
||||
if !hosts.iter().any(|existing| existing == candidate) {
|
||||
hosts.push(candidate.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_tls_domains(cfg: &ProxyConfig) -> Vec<&str> {
|
||||
let mut domains = Vec::with_capacity(1 + cfg.censorship.tls_domains.len());
|
||||
let primary = cfg.censorship.tls_domain.as_str();
|
||||
if !primary.is_empty() {
|
||||
domains.push(primary);
|
||||
}
|
||||
for domain in &cfg.censorship.tls_domains {
|
||||
let value = domain.as_str();
|
||||
if value.is_empty() || domains.contains(&value) {
|
||||
continue;
|
||||
}
|
||||
domains.push(value);
|
||||
}
|
||||
domains
|
||||
}
|
||||
@@ -9,8 +9,11 @@ const DEFAULT_MIDDLE_PROXY_WARM_STANDBY: usize = 16;
|
||||
const DEFAULT_ME_RECONNECT_MAX_CONCURRENT_PER_DC: u32 = 8;
|
||||
const DEFAULT_ME_RECONNECT_FAST_RETRY_COUNT: u32 = 16;
|
||||
const DEFAULT_ME_SINGLE_ENDPOINT_SHADOW_WRITERS: u8 = 2;
|
||||
const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 3;
|
||||
const DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD: u32 = 4;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_IDLE_SECS: u64 = 90;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MIN_WRITERS_SINGLE_ENDPOINT: u8 = 1;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_RECOVER_GRACE_SECS: u64 = 180;
|
||||
const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 2;
|
||||
const DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD: u32 = 5;
|
||||
const DEFAULT_LISTEN_ADDR_IPV6: &str = "::";
|
||||
const DEFAULT_ACCESS_USER: &str = "default";
|
||||
const DEFAULT_ACCESS_SECRET: &str = "00000000000000000000000000000000";
|
||||
@@ -89,6 +92,26 @@ pub(crate) fn default_metrics_whitelist() -> Vec<IpNetwork> {
|
||||
]
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_listen() -> String {
|
||||
"127.0.0.1:9091".to_string()
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_whitelist() -> Vec<IpNetwork> {
|
||||
default_metrics_whitelist()
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_request_body_limit_bytes() -> usize {
|
||||
64 * 1024
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_minimal_runtime_enabled() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_minimal_runtime_cache_ttl_ms() -> u64 {
|
||||
1000
|
||||
}
|
||||
|
||||
pub(crate) fn default_prefer_4() -> u8 {
|
||||
4
|
||||
}
|
||||
@@ -130,11 +153,11 @@ pub(crate) fn default_middle_proxy_warm_standby() -> usize {
|
||||
}
|
||||
|
||||
pub(crate) fn default_keepalive_interval() -> u64 {
|
||||
25
|
||||
8
|
||||
}
|
||||
|
||||
pub(crate) fn default_keepalive_jitter() -> u64 {
|
||||
5
|
||||
2
|
||||
}
|
||||
|
||||
pub(crate) fn default_warmup_step_delay_ms() -> u64 {
|
||||
@@ -185,18 +208,38 @@ pub(crate) fn default_me_single_endpoint_shadow_rotate_every_secs() -> u64 {
|
||||
900
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_idle_secs() -> u64 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_IDLE_SECS
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_min_writers_single_endpoint() -> u8 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MIN_WRITERS_SINGLE_ENDPOINT
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_recover_grace_secs() -> u64 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_RECOVER_GRACE_SECS
|
||||
}
|
||||
|
||||
pub(crate) fn default_upstream_connect_retry_attempts() -> u32 {
|
||||
DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS
|
||||
}
|
||||
|
||||
pub(crate) fn default_upstream_connect_retry_backoff_ms() -> u64 {
|
||||
250
|
||||
100
|
||||
}
|
||||
|
||||
pub(crate) fn default_upstream_unhealthy_fail_threshold() -> u32 {
|
||||
DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD
|
||||
}
|
||||
|
||||
pub(crate) fn default_upstream_connect_failfast_hard_errors() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn default_rpc_proxy_req_every() -> u64 {
|
||||
0
|
||||
}
|
||||
|
||||
pub(crate) fn default_crypto_pending_buffer() -> usize {
|
||||
256 * 1024
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ use notify::{EventKind, RecursiveMode, Watcher, recommended_watcher};
|
||||
use tokio::sync::{mpsc, watch};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use crate::config::{LogLevel, MeSocksKdfPolicy, MeTelemetryLevel};
|
||||
use crate::config::{LogLevel, MeFloorMode, MeSocksKdfPolicy, MeTelemetryLevel};
|
||||
use super::load::ProxyConfig;
|
||||
|
||||
// ── Hot fields ────────────────────────────────────────────────────────────────
|
||||
@@ -58,6 +58,10 @@ pub struct HotFields {
|
||||
pub telemetry_user_enabled: bool,
|
||||
pub telemetry_me_level: MeTelemetryLevel,
|
||||
pub me_socks_kdf_policy: MeSocksKdfPolicy,
|
||||
pub me_floor_mode: MeFloorMode,
|
||||
pub me_adaptive_floor_idle_secs: u64,
|
||||
pub me_adaptive_floor_min_writers_single_endpoint: u8,
|
||||
pub me_adaptive_floor_recover_grace_secs: u64,
|
||||
pub me_route_backpressure_base_timeout_ms: u64,
|
||||
pub me_route_backpressure_high_timeout_ms: u64,
|
||||
pub me_route_backpressure_high_watermark_pct: u8,
|
||||
@@ -85,6 +89,14 @@ impl HotFields {
|
||||
telemetry_user_enabled: cfg.general.telemetry.user_enabled,
|
||||
telemetry_me_level: cfg.general.telemetry.me_level,
|
||||
me_socks_kdf_policy: cfg.general.me_socks_kdf_policy,
|
||||
me_floor_mode: cfg.general.me_floor_mode,
|
||||
me_adaptive_floor_idle_secs: cfg.general.me_adaptive_floor_idle_secs,
|
||||
me_adaptive_floor_min_writers_single_endpoint: cfg
|
||||
.general
|
||||
.me_adaptive_floor_min_writers_single_endpoint,
|
||||
me_adaptive_floor_recover_grace_secs: cfg
|
||||
.general
|
||||
.me_adaptive_floor_recover_grace_secs,
|
||||
me_route_backpressure_base_timeout_ms: cfg.general.me_route_backpressure_base_timeout_ms,
|
||||
me_route_backpressure_high_timeout_ms: cfg.general.me_route_backpressure_high_timeout_ms,
|
||||
me_route_backpressure_high_watermark_pct: cfg.general.me_route_backpressure_high_watermark_pct,
|
||||
@@ -103,6 +115,18 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig) {
|
||||
old.server.port, new.server.port
|
||||
);
|
||||
}
|
||||
if old.server.api.enabled != new.server.api.enabled
|
||||
|| old.server.api.listen != new.server.api.listen
|
||||
|| old.server.api.whitelist != new.server.api.whitelist
|
||||
|| old.server.api.auth_header != new.server.api.auth_header
|
||||
|| old.server.api.request_body_limit_bytes != new.server.api.request_body_limit_bytes
|
||||
|| old.server.api.minimal_runtime_enabled != new.server.api.minimal_runtime_enabled
|
||||
|| old.server.api.minimal_runtime_cache_ttl_ms
|
||||
!= new.server.api.minimal_runtime_cache_ttl_ms
|
||||
|| old.server.api.read_only != new.server.api.read_only
|
||||
{
|
||||
warn!("config reload: server.api changed; restart required");
|
||||
}
|
||||
if old.censorship.tls_domain != new.censorship.tls_domain {
|
||||
warn!(
|
||||
"config reload: censorship.tls_domain changed ('{}' → '{}'); restart required",
|
||||
@@ -123,6 +147,9 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig) {
|
||||
!= new.general.upstream_connect_retry_backoff_ms
|
||||
|| old.general.upstream_unhealthy_fail_threshold
|
||||
!= new.general.upstream_unhealthy_fail_threshold
|
||||
|| old.general.upstream_connect_failfast_hard_errors
|
||||
!= new.general.upstream_connect_failfast_hard_errors
|
||||
|| old.general.rpc_proxy_req_every != new.general.rpc_proxy_req_every
|
||||
{
|
||||
warn!("config reload: general.upstream_* changed; restart required");
|
||||
}
|
||||
@@ -309,6 +336,22 @@ fn log_changes(
|
||||
);
|
||||
}
|
||||
|
||||
if old_hot.me_floor_mode != new_hot.me_floor_mode
|
||||
|| old_hot.me_adaptive_floor_idle_secs != new_hot.me_adaptive_floor_idle_secs
|
||||
|| old_hot.me_adaptive_floor_min_writers_single_endpoint
|
||||
!= new_hot.me_adaptive_floor_min_writers_single_endpoint
|
||||
|| old_hot.me_adaptive_floor_recover_grace_secs
|
||||
!= new_hot.me_adaptive_floor_recover_grace_secs
|
||||
{
|
||||
info!(
|
||||
"config reload: me_floor: mode={:?} idle={}s min_single={} recover_grace={}s",
|
||||
new_hot.me_floor_mode,
|
||||
new_hot.me_adaptive_floor_idle_secs,
|
||||
new_hot.me_adaptive_floor_min_writers_single_endpoint,
|
||||
new_hot.me_adaptive_floor_recover_grace_secs,
|
||||
);
|
||||
}
|
||||
|
||||
if old_hot.me_route_backpressure_base_timeout_ms
|
||||
!= new_hot.me_route_backpressure_base_timeout_ms
|
||||
|| old_hot.me_route_backpressure_high_timeout_ms
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#![allow(deprecated)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::net::IpAddr;
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::path::Path;
|
||||
|
||||
use rand::Rng;
|
||||
@@ -249,6 +249,14 @@ impl ProxyConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.rpc_proxy_req_every != 0
|
||||
&& !(10..=300).contains(&config.general.rpc_proxy_req_every)
|
||||
{
|
||||
return Err(ProxyError::Config(
|
||||
"general.rpc_proxy_req_every must be 0 or within [10, 300]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_reinit_every_secs == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_reinit_every_secs must be > 0".to_string(),
|
||||
@@ -261,6 +269,15 @@ impl ProxyConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_adaptive_floor_min_writers_single_endpoint == 0
|
||||
|| config.general.me_adaptive_floor_min_writers_single_endpoint > 32
|
||||
{
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_adaptive_floor_min_writers_single_endpoint must be within [1, 32]"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_single_endpoint_outage_backoff_min_ms == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_single_endpoint_outage_backoff_min_ms must be > 0".to_string(),
|
||||
@@ -381,6 +398,24 @@ impl ProxyConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if config.server.api.request_body_limit_bytes == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.request_body_limit_bytes must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.server.api.minimal_runtime_cache_ttl_ms > 60_000 {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.minimal_runtime_cache_ttl_ms must be within [0, 60000]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.server.api.listen.parse::<SocketAddr>().is_err() {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.listen must be in IP:PORT format".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.effective_me_pool_force_close_secs() > 0
|
||||
&& config.general.effective_me_pool_force_close_secs()
|
||||
< config.general.me_pool_drain_ttl_secs
|
||||
@@ -642,6 +677,19 @@ mod tests {
|
||||
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
default_me_single_endpoint_shadow_rotate_every_secs()
|
||||
);
|
||||
assert_eq!(cfg.general.me_floor_mode, MeFloorMode::default());
|
||||
assert_eq!(
|
||||
cfg.general.me_adaptive_floor_idle_secs,
|
||||
default_me_adaptive_floor_idle_secs()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
default_me_adaptive_floor_min_writers_single_endpoint()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.me_adaptive_floor_recover_grace_secs,
|
||||
default_me_adaptive_floor_recover_grace_secs()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.upstream_connect_retry_attempts,
|
||||
default_upstream_connect_retry_attempts()
|
||||
@@ -654,9 +702,31 @@ mod tests {
|
||||
cfg.general.upstream_unhealthy_fail_threshold,
|
||||
default_upstream_unhealthy_fail_threshold()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.upstream_connect_failfast_hard_errors,
|
||||
default_upstream_connect_failfast_hard_errors()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.rpc_proxy_req_every,
|
||||
default_rpc_proxy_req_every()
|
||||
);
|
||||
assert_eq!(cfg.general.update_every, default_update_every());
|
||||
assert_eq!(cfg.server.listen_addr_ipv4, default_listen_addr_ipv4());
|
||||
assert_eq!(cfg.server.listen_addr_ipv6, default_listen_addr_ipv6_opt());
|
||||
assert_eq!(cfg.server.api.listen, default_api_listen());
|
||||
assert_eq!(cfg.server.api.whitelist, default_api_whitelist());
|
||||
assert_eq!(
|
||||
cfg.server.api.request_body_limit_bytes,
|
||||
default_api_request_body_limit_bytes()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.server.api.minimal_runtime_enabled,
|
||||
default_api_minimal_runtime_enabled()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.server.api.minimal_runtime_cache_ttl_ms,
|
||||
default_api_minimal_runtime_cache_ttl_ms()
|
||||
);
|
||||
assert_eq!(cfg.access.users, default_access_users());
|
||||
}
|
||||
|
||||
@@ -704,6 +774,19 @@ mod tests {
|
||||
general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
default_me_single_endpoint_shadow_rotate_every_secs()
|
||||
);
|
||||
assert_eq!(general.me_floor_mode, MeFloorMode::default());
|
||||
assert_eq!(
|
||||
general.me_adaptive_floor_idle_secs,
|
||||
default_me_adaptive_floor_idle_secs()
|
||||
);
|
||||
assert_eq!(
|
||||
general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
default_me_adaptive_floor_min_writers_single_endpoint()
|
||||
);
|
||||
assert_eq!(
|
||||
general.me_adaptive_floor_recover_grace_secs,
|
||||
default_me_adaptive_floor_recover_grace_secs()
|
||||
);
|
||||
assert_eq!(
|
||||
general.upstream_connect_retry_attempts,
|
||||
default_upstream_connect_retry_attempts()
|
||||
@@ -716,10 +799,29 @@ mod tests {
|
||||
general.upstream_unhealthy_fail_threshold,
|
||||
default_upstream_unhealthy_fail_threshold()
|
||||
);
|
||||
assert_eq!(
|
||||
general.upstream_connect_failfast_hard_errors,
|
||||
default_upstream_connect_failfast_hard_errors()
|
||||
);
|
||||
assert_eq!(general.rpc_proxy_req_every, default_rpc_proxy_req_every());
|
||||
assert_eq!(general.update_every, default_update_every());
|
||||
|
||||
let server = ServerConfig::default();
|
||||
assert_eq!(server.listen_addr_ipv6, Some(default_listen_addr_ipv6()));
|
||||
assert_eq!(server.api.listen, default_api_listen());
|
||||
assert_eq!(server.api.whitelist, default_api_whitelist());
|
||||
assert_eq!(
|
||||
server.api.request_body_limit_bytes,
|
||||
default_api_request_body_limit_bytes()
|
||||
);
|
||||
assert_eq!(
|
||||
server.api.minimal_runtime_enabled,
|
||||
default_api_minimal_runtime_enabled()
|
||||
);
|
||||
assert_eq!(
|
||||
server.api.minimal_runtime_cache_ttl_ms,
|
||||
default_api_minimal_runtime_cache_ttl_ms()
|
||||
);
|
||||
|
||||
let access = AccessConfig::default();
|
||||
assert_eq!(access.users, default_access_users());
|
||||
@@ -931,6 +1033,50 @@ mod tests {
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_adaptive_floor_min_writers_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_adaptive_floor_min_writers_single_endpoint = 0
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_adaptive_floor_min_writers_out_of_range_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(
|
||||
err.contains(
|
||||
"general.me_adaptive_floor_min_writers_single_endpoint must be within [1, 32]"
|
||||
)
|
||||
);
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_floor_mode_adaptive_is_parsed() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_floor_mode = "adaptive"
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_floor_mode_adaptive_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let cfg = ProxyConfig::load(&path).unwrap();
|
||||
assert_eq!(cfg.general.me_floor_mode, MeFloorMode::Adaptive);
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn upstream_connect_retry_attempts_zero_is_rejected() {
|
||||
let toml = r#"
|
||||
@@ -971,6 +1117,62 @@ mod tests {
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rpc_proxy_req_every_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
rpc_proxy_req_every = 9
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_rpc_proxy_req_every_out_of_range_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.rpc_proxy_req_every must be 0 or within [10, 300]"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rpc_proxy_req_every_zero_and_valid_range_are_accepted() {
|
||||
let toml_zero = r#"
|
||||
[general]
|
||||
rpc_proxy_req_every = 0
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path_zero = dir.join("telemt_rpc_proxy_req_every_zero_ok_test.toml");
|
||||
std::fs::write(&path_zero, toml_zero).unwrap();
|
||||
let cfg_zero = ProxyConfig::load(&path_zero).unwrap();
|
||||
assert_eq!(cfg_zero.general.rpc_proxy_req_every, 0);
|
||||
let _ = std::fs::remove_file(path_zero);
|
||||
|
||||
let toml_valid = r#"
|
||||
[general]
|
||||
rpc_proxy_req_every = 40
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let path_valid = dir.join("telemt_rpc_proxy_req_every_valid_ok_test.toml");
|
||||
std::fs::write(&path_valid, toml_valid).unwrap();
|
||||
let cfg_valid = ProxyConfig::load(&path_valid).unwrap();
|
||||
assert_eq!(cfg_valid.general.rpc_proxy_req_every, 40);
|
||||
let _ = std::fs::remove_file(path_valid);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_hardswap_warmup_defaults_are_set() {
|
||||
let toml = r#"
|
||||
@@ -1166,6 +1368,28 @@ mod tests {
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn api_minimal_runtime_cache_ttl_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "127.0.0.1:9091"
|
||||
minimal_runtime_cache_ttl_ms = 70000
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_api_minimal_runtime_cache_ttl_invalid_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("server.api.minimal_runtime_cache_ttl_ms must be within [0, 60000]"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn force_close_bumped_when_below_drain_ttl() {
|
||||
let toml = r#"
|
||||
|
||||
@@ -158,6 +158,31 @@ impl MeBindStaleMode {
|
||||
}
|
||||
}
|
||||
|
||||
/// Middle-End writer floor policy mode.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum MeFloorMode {
|
||||
Static,
|
||||
#[default]
|
||||
Adaptive,
|
||||
}
|
||||
|
||||
impl MeFloorMode {
|
||||
pub fn as_u8(self) -> u8 {
|
||||
match self {
|
||||
MeFloorMode::Static => 0,
|
||||
MeFloorMode::Adaptive => 1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_u8(raw: u8) -> Self {
|
||||
match raw {
|
||||
1 => MeFloorMode::Adaptive,
|
||||
_ => MeFloorMode::Static,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Telemetry controls for hot-path counters and ME diagnostics.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct TelemetryConfig {
|
||||
@@ -331,6 +356,11 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_true")]
|
||||
pub me_keepalive_payload_random: bool,
|
||||
|
||||
/// Interval in seconds for service RPC_PROXY_REQ activity signals to ME.
|
||||
/// 0 disables service activity signals.
|
||||
#[serde(default = "default_rpc_proxy_req_every")]
|
||||
pub rpc_proxy_req_every: u64,
|
||||
|
||||
/// Max pending ciphertext buffer per client writer (bytes).
|
||||
/// Controls FakeTLS backpressure vs throughput.
|
||||
#[serde(default = "default_crypto_pending_buffer")]
|
||||
@@ -419,6 +449,22 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_me_single_endpoint_shadow_rotate_every_secs")]
|
||||
pub me_single_endpoint_shadow_rotate_every_secs: u64,
|
||||
|
||||
/// Floor policy mode for ME writer targets.
|
||||
#[serde(default)]
|
||||
pub me_floor_mode: MeFloorMode,
|
||||
|
||||
/// Idle time in seconds before adaptive floor can reduce single-endpoint writer target.
|
||||
#[serde(default = "default_me_adaptive_floor_idle_secs")]
|
||||
pub me_adaptive_floor_idle_secs: u64,
|
||||
|
||||
/// Minimum writer target for single-endpoint DC groups in adaptive floor mode.
|
||||
#[serde(default = "default_me_adaptive_floor_min_writers_single_endpoint")]
|
||||
pub me_adaptive_floor_min_writers_single_endpoint: u8,
|
||||
|
||||
/// Grace period in seconds to hold static floor after activity in adaptive mode.
|
||||
#[serde(default = "default_me_adaptive_floor_recover_grace_secs")]
|
||||
pub me_adaptive_floor_recover_grace_secs: u64,
|
||||
|
||||
/// Connect attempts for the selected upstream before returning error/fallback.
|
||||
#[serde(default = "default_upstream_connect_retry_attempts")]
|
||||
pub upstream_connect_retry_attempts: u32,
|
||||
@@ -431,6 +477,10 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_upstream_unhealthy_fail_threshold")]
|
||||
pub upstream_unhealthy_fail_threshold: u32,
|
||||
|
||||
/// Skip additional retries for hard non-transient upstream connect errors.
|
||||
#[serde(default = "default_upstream_connect_failfast_hard_errors")]
|
||||
pub upstream_connect_failfast_hard_errors: bool,
|
||||
|
||||
/// Ignore STUN/interface IP mismatch (keep using Middle Proxy even if NAT detected).
|
||||
#[serde(default)]
|
||||
pub stun_iface_mismatch_ignore: bool,
|
||||
@@ -621,6 +671,7 @@ impl Default for GeneralConfig {
|
||||
me_keepalive_interval_secs: default_keepalive_interval(),
|
||||
me_keepalive_jitter_secs: default_keepalive_jitter(),
|
||||
me_keepalive_payload_random: default_true(),
|
||||
rpc_proxy_req_every: default_rpc_proxy_req_every(),
|
||||
me_warmup_stagger_enabled: default_true(),
|
||||
me_warmup_step_delay_ms: default_warmup_step_delay_ms(),
|
||||
me_warmup_step_jitter_ms: default_warmup_step_jitter_ms(),
|
||||
@@ -634,9 +685,14 @@ impl Default for GeneralConfig {
|
||||
me_single_endpoint_outage_backoff_min_ms: default_me_single_endpoint_outage_backoff_min_ms(),
|
||||
me_single_endpoint_outage_backoff_max_ms: default_me_single_endpoint_outage_backoff_max_ms(),
|
||||
me_single_endpoint_shadow_rotate_every_secs: default_me_single_endpoint_shadow_rotate_every_secs(),
|
||||
me_floor_mode: MeFloorMode::default(),
|
||||
me_adaptive_floor_idle_secs: default_me_adaptive_floor_idle_secs(),
|
||||
me_adaptive_floor_min_writers_single_endpoint: default_me_adaptive_floor_min_writers_single_endpoint(),
|
||||
me_adaptive_floor_recover_grace_secs: default_me_adaptive_floor_recover_grace_secs(),
|
||||
upstream_connect_retry_attempts: default_upstream_connect_retry_attempts(),
|
||||
upstream_connect_retry_backoff_ms: default_upstream_connect_retry_backoff_ms(),
|
||||
upstream_unhealthy_fail_threshold: default_upstream_unhealthy_fail_threshold(),
|
||||
upstream_connect_failfast_hard_errors: default_upstream_connect_failfast_hard_errors(),
|
||||
stun_iface_mismatch_ignore: false,
|
||||
unknown_dc_log_path: default_unknown_dc_log_path(),
|
||||
log_level: LogLevel::Normal,
|
||||
@@ -737,6 +793,58 @@ impl Default for LinksConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// API settings for control-plane endpoints.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct ApiConfig {
|
||||
/// Enable or disable REST API.
|
||||
#[serde(default)]
|
||||
pub enabled: bool,
|
||||
|
||||
/// Listen address for API in `IP:PORT` format.
|
||||
#[serde(default = "default_api_listen")]
|
||||
pub listen: String,
|
||||
|
||||
/// CIDR whitelist allowed to access API.
|
||||
#[serde(default = "default_api_whitelist")]
|
||||
pub whitelist: Vec<IpNetwork>,
|
||||
|
||||
/// Optional static value for `Authorization` header validation.
|
||||
/// Empty string disables header auth.
|
||||
#[serde(default)]
|
||||
pub auth_header: String,
|
||||
|
||||
/// Maximum accepted HTTP request body size in bytes.
|
||||
#[serde(default = "default_api_request_body_limit_bytes")]
|
||||
pub request_body_limit_bytes: usize,
|
||||
|
||||
/// Enable runtime snapshots that require read-lock aggregation on API request path.
|
||||
#[serde(default = "default_api_minimal_runtime_enabled")]
|
||||
pub minimal_runtime_enabled: bool,
|
||||
|
||||
/// Cache TTL for minimal runtime snapshots in milliseconds (0 disables caching).
|
||||
#[serde(default = "default_api_minimal_runtime_cache_ttl_ms")]
|
||||
pub minimal_runtime_cache_ttl_ms: u64,
|
||||
|
||||
/// Read-only mode: mutating endpoints are rejected.
|
||||
#[serde(default)]
|
||||
pub read_only: bool,
|
||||
}
|
||||
|
||||
impl Default for ApiConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: false,
|
||||
listen: default_api_listen(),
|
||||
whitelist: default_api_whitelist(),
|
||||
auth_header: String::new(),
|
||||
request_body_limit_bytes: default_api_request_body_limit_bytes(),
|
||||
minimal_runtime_enabled: default_api_minimal_runtime_enabled(),
|
||||
minimal_runtime_cache_ttl_ms: default_api_minimal_runtime_cache_ttl_ms(),
|
||||
read_only: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ServerConfig {
|
||||
#[serde(default = "default_port")]
|
||||
@@ -772,6 +880,9 @@ pub struct ServerConfig {
|
||||
#[serde(default = "default_metrics_whitelist")]
|
||||
pub metrics_whitelist: Vec<IpNetwork>,
|
||||
|
||||
#[serde(default, alias = "admin_api")]
|
||||
pub api: ApiConfig,
|
||||
|
||||
#[serde(default)]
|
||||
pub listeners: Vec<ListenerConfig>,
|
||||
}
|
||||
@@ -788,6 +899,7 @@ impl Default for ServerConfig {
|
||||
proxy_protocol: false,
|
||||
metrics_port: None,
|
||||
metrics_whitelist: default_metrics_whitelist(),
|
||||
api: ApiConfig::default(),
|
||||
listeners: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
51
src/main.rs
51
src/main.rs
@@ -15,6 +15,7 @@ use tracing_subscriber::{EnvFilter, fmt, prelude::*, reload};
|
||||
use tokio::net::UnixListener;
|
||||
|
||||
mod cli;
|
||||
mod api;
|
||||
mod config;
|
||||
mod crypto;
|
||||
mod error;
|
||||
@@ -261,11 +262,16 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
warn!("Using default tls_domain. Consider setting a custom domain.");
|
||||
}
|
||||
|
||||
let stats = Arc::new(Stats::new());
|
||||
stats.apply_telemetry_policy(TelemetryPolicy::from_config(&config.general.telemetry));
|
||||
|
||||
let upstream_manager = Arc::new(UpstreamManager::new(
|
||||
config.upstreams.clone(),
|
||||
config.general.upstream_connect_retry_attempts,
|
||||
config.general.upstream_connect_retry_backoff_ms,
|
||||
config.general.upstream_unhealthy_fail_threshold,
|
||||
config.general.upstream_connect_failfast_hard_errors,
|
||||
stats.clone(),
|
||||
));
|
||||
|
||||
let mut tls_domains = Vec::with_capacity(1 + config.censorship.tls_domains.len());
|
||||
@@ -411,8 +417,6 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
let prefer_ipv6 = decision.prefer_ipv6();
|
||||
let mut use_middle_proxy = config.general.use_middle_proxy && (decision.ipv4_me || decision.ipv6_me);
|
||||
let stats = Arc::new(Stats::new());
|
||||
stats.apply_telemetry_policy(TelemetryPolicy::from_config(&config.general.telemetry));
|
||||
let beobachten = Arc::new(BeobachtenStore::new());
|
||||
let rng = Arc::new(SecureRandom::new());
|
||||
|
||||
@@ -531,6 +535,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
config.general.me_keepalive_interval_secs,
|
||||
config.general.me_keepalive_jitter_secs,
|
||||
config.general.me_keepalive_payload_random,
|
||||
config.general.rpc_proxy_req_every,
|
||||
config.general.me_warmup_stagger_enabled,
|
||||
config.general.me_warmup_step_delay_ms,
|
||||
config.general.me_warmup_step_jitter_ms,
|
||||
@@ -544,6 +549,10 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
config.general.me_single_endpoint_outage_backoff_min_ms,
|
||||
config.general.me_single_endpoint_outage_backoff_max_ms,
|
||||
config.general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
config.general.me_floor_mode,
|
||||
config.general.me_adaptive_floor_idle_secs,
|
||||
config.general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
config.general.me_adaptive_floor_recover_grace_secs,
|
||||
config.general.hardswap,
|
||||
config.general.me_pool_drain_ttl_secs,
|
||||
config.general.effective_me_pool_force_close_secs(),
|
||||
@@ -1144,6 +1153,44 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
});
|
||||
}
|
||||
|
||||
if config.server.api.enabled {
|
||||
let listen = match config.server.api.listen.parse::<SocketAddr>() {
|
||||
Ok(listen) => listen,
|
||||
Err(error) => {
|
||||
warn!(
|
||||
error = %error,
|
||||
listen = %config.server.api.listen,
|
||||
"Invalid server.api.listen; API is disabled"
|
||||
);
|
||||
SocketAddr::from(([127, 0, 0, 1], 0))
|
||||
}
|
||||
};
|
||||
if listen.port() != 0 {
|
||||
let stats = stats.clone();
|
||||
let ip_tracker_api = ip_tracker.clone();
|
||||
let me_pool_api = me_pool.clone();
|
||||
let upstream_manager_api = upstream_manager.clone();
|
||||
let config_rx_api = config_rx.clone();
|
||||
let config_path_api = std::path::PathBuf::from(&config_path);
|
||||
let startup_detected_ip_v4 = detected_ip_v4;
|
||||
let startup_detected_ip_v6 = detected_ip_v6;
|
||||
tokio::spawn(async move {
|
||||
api::serve(
|
||||
listen,
|
||||
stats,
|
||||
ip_tracker_api,
|
||||
me_pool_api,
|
||||
upstream_manager_api,
|
||||
config_rx_api,
|
||||
config_path_api,
|
||||
startup_detected_ip_v4,
|
||||
startup_detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for (listener, listener_proxy_protocol) in listeners {
|
||||
let mut config_rx: tokio::sync::watch::Receiver<Arc<ProxyConfig>> = config_rx.clone();
|
||||
let stats = stats.clone();
|
||||
|
||||
422
src/metrics.rs
422
src/metrics.rs
@@ -202,6 +202,195 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_upstream_connect_attempt_total Upstream connect attempts across all requests"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_upstream_connect_attempt_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_attempt_total {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_attempt_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_upstream_connect_success_total Successful upstream connect request cycles"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_upstream_connect_success_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_success_total {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_success_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_upstream_connect_fail_total Failed upstream connect request cycles"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_upstream_connect_fail_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_fail_total {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_fail_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_upstream_connect_failfast_hard_error_total Hard errors that triggered upstream connect failfast"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_upstream_connect_failfast_hard_error_total counter"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_failfast_hard_error_total {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_failfast_hard_error_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_upstream_connect_attempts_per_request Histogram-like buckets for attempts per upstream connect request cycle"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_upstream_connect_attempts_per_request counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_attempts_per_request{{bucket=\"1\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_attempts_bucket_1()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_attempts_per_request{{bucket=\"2\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_attempts_bucket_2()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_attempts_per_request{{bucket=\"3_4\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_attempts_bucket_3_4()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_attempts_per_request{{bucket=\"gt_4\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_attempts_bucket_gt_4()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_upstream_connect_duration_success_total Histogram-like buckets of successful upstream connect cycle duration"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_upstream_connect_duration_success_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_duration_success_total{{bucket=\"le_100ms\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_duration_success_bucket_le_100ms()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_duration_success_total{{bucket=\"101_500ms\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_duration_success_bucket_101_500ms()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_duration_success_total{{bucket=\"501_1000ms\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_duration_success_bucket_501_1000ms()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_duration_success_total{{bucket=\"gt_1000ms\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_duration_success_bucket_gt_1000ms()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_upstream_connect_duration_fail_total Histogram-like buckets of failed upstream connect cycle duration"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_upstream_connect_duration_fail_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_duration_fail_total{{bucket=\"le_100ms\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_duration_fail_bucket_le_100ms()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_duration_fail_total{{bucket=\"101_500ms\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_duration_fail_bucket_101_500ms()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_duration_fail_total{{bucket=\"501_1000ms\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_duration_fail_bucket_501_1000ms()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_upstream_connect_duration_fail_total{{bucket=\"gt_1000ms\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_upstream_connect_duration_fail_bucket_gt_1000ms()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_me_keepalive_sent_total ME keepalive frames sent");
|
||||
let _ = writeln!(out, "# TYPE telemt_me_keepalive_sent_total counter");
|
||||
let _ = writeln!(
|
||||
@@ -250,6 +439,93 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_rpc_proxy_req_signal_sent_total Service RPC_PROXY_REQ activity signals sent"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_rpc_proxy_req_signal_sent_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_rpc_proxy_req_signal_sent_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_rpc_proxy_req_signal_failed_total Service RPC_PROXY_REQ activity signal failures"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_rpc_proxy_req_signal_failed_total counter"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_rpc_proxy_req_signal_failed_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_rpc_proxy_req_signal_failed_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_rpc_proxy_req_signal_skipped_no_meta_total Service RPC_PROXY_REQ skipped due to missing writer metadata"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_rpc_proxy_req_signal_skipped_no_meta_total counter"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_rpc_proxy_req_signal_skipped_no_meta_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_rpc_proxy_req_signal_skipped_no_meta_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_rpc_proxy_req_signal_response_total Service RPC_PROXY_REQ responses observed"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_rpc_proxy_req_signal_response_total counter"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_rpc_proxy_req_signal_response_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_rpc_proxy_req_signal_response_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_rpc_proxy_req_signal_close_sent_total Service RPC_CLOSE_EXT sent after activity signals"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_rpc_proxy_req_signal_close_sent_total counter"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_rpc_proxy_req_signal_close_sent_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_rpc_proxy_req_signal_close_sent_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_me_reconnect_attempts_total ME reconnect attempts");
|
||||
let _ = writeln!(out, "# TYPE telemt_me_reconnect_attempts_total counter");
|
||||
let _ = writeln!(
|
||||
@@ -311,6 +587,21 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_idle_close_by_peer_total ME idle writers closed by peer"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_idle_close_by_peer_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_idle_close_by_peer_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_idle_close_by_peer_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_me_crc_mismatch_total ME CRC mismatches");
|
||||
let _ = writeln!(out, "# TYPE telemt_me_crc_mismatch_total counter");
|
||||
let _ = writeln!(
|
||||
@@ -449,6 +740,21 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_kdf_port_only_drift_total ME KDF client-port changes with stable non-port material"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_kdf_port_only_drift_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_kdf_port_only_drift_total {}",
|
||||
if me_allows_debug {
|
||||
stats.get_me_kdf_port_only_drift_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_hardswap_pending_reuse_total Hardswap cycles that reused an existing pending generation"
|
||||
@@ -587,6 +893,82 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_single_endpoint_shadow_rotate_skipped_quarantine_total Shadow rotations skipped because endpoint is quarantined"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_single_endpoint_shadow_rotate_skipped_quarantine_total counter"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_single_endpoint_shadow_rotate_skipped_quarantine_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_single_endpoint_shadow_rotate_skipped_quarantine_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_floor_mode Runtime ME writer floor policy mode"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_floor_mode gauge");
|
||||
let floor_mode = config.general.me_floor_mode;
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_floor_mode{{mode=\"static\"}} {}",
|
||||
if matches!(floor_mode, crate::config::MeFloorMode::Static) {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_floor_mode{{mode=\"adaptive\"}} {}",
|
||||
if matches!(floor_mode, crate::config::MeFloorMode::Adaptive) {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_floor_mode_switch_all_total Runtime ME floor mode switches"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_floor_mode_switch_all_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_floor_mode_switch_all_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_mode_switch_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_floor_mode_switch_total{{from=\"static\",to=\"adaptive\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_mode_switch_static_to_adaptive_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_floor_mode_switch_total{{from=\"adaptive\",to=\"static\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_mode_switch_adaptive_to_static_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_secure_padding_invalid_total Invalid secure frame lengths");
|
||||
let _ = writeln!(out, "# TYPE telemt_secure_padding_invalid_total counter");
|
||||
let _ = writeln!(
|
||||
@@ -679,7 +1061,7 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_pool_swap_total {}",
|
||||
if me_allows_debug {
|
||||
if me_allows_normal {
|
||||
stats.get_pool_swap_total()
|
||||
} else {
|
||||
0
|
||||
@@ -937,6 +1319,20 @@ mod tests {
|
||||
stats.increment_connects_all();
|
||||
stats.increment_connects_bad();
|
||||
stats.increment_handshake_timeouts();
|
||||
stats.increment_upstream_connect_attempt_total();
|
||||
stats.increment_upstream_connect_attempt_total();
|
||||
stats.increment_upstream_connect_success_total();
|
||||
stats.increment_upstream_connect_fail_total();
|
||||
stats.increment_upstream_connect_failfast_hard_error_total();
|
||||
stats.observe_upstream_connect_attempts_per_request(2);
|
||||
stats.observe_upstream_connect_duration_ms(220, true);
|
||||
stats.observe_upstream_connect_duration_ms(1500, false);
|
||||
stats.increment_me_rpc_proxy_req_signal_sent_total();
|
||||
stats.increment_me_rpc_proxy_req_signal_failed_total();
|
||||
stats.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
|
||||
stats.increment_me_rpc_proxy_req_signal_response_total();
|
||||
stats.increment_me_rpc_proxy_req_signal_close_sent_total();
|
||||
stats.increment_me_idle_close_by_peer_total();
|
||||
stats.increment_user_connects("alice");
|
||||
stats.increment_user_curr_connects("alice");
|
||||
stats.add_user_octets_from("alice", 1024);
|
||||
@@ -954,6 +1350,27 @@ mod tests {
|
||||
assert!(output.contains("telemt_connections_total 2"));
|
||||
assert!(output.contains("telemt_connections_bad_total 1"));
|
||||
assert!(output.contains("telemt_handshake_timeouts_total 1"));
|
||||
assert!(output.contains("telemt_upstream_connect_attempt_total 2"));
|
||||
assert!(output.contains("telemt_upstream_connect_success_total 1"));
|
||||
assert!(output.contains("telemt_upstream_connect_fail_total 1"));
|
||||
assert!(output.contains("telemt_upstream_connect_failfast_hard_error_total 1"));
|
||||
assert!(
|
||||
output.contains("telemt_upstream_connect_attempts_per_request{bucket=\"2\"} 1")
|
||||
);
|
||||
assert!(
|
||||
output.contains(
|
||||
"telemt_upstream_connect_duration_success_total{bucket=\"101_500ms\"} 1"
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
output.contains("telemt_upstream_connect_duration_fail_total{bucket=\"gt_1000ms\"} 1")
|
||||
);
|
||||
assert!(output.contains("telemt_me_rpc_proxy_req_signal_sent_total 1"));
|
||||
assert!(output.contains("telemt_me_rpc_proxy_req_signal_failed_total 1"));
|
||||
assert!(output.contains("telemt_me_rpc_proxy_req_signal_skipped_no_meta_total 1"));
|
||||
assert!(output.contains("telemt_me_rpc_proxy_req_signal_response_total 1"));
|
||||
assert!(output.contains("telemt_me_rpc_proxy_req_signal_close_sent_total 1"));
|
||||
assert!(output.contains("telemt_me_idle_close_by_peer_total 1"));
|
||||
assert!(output.contains("telemt_user_connections_total{user=\"alice\"} 1"));
|
||||
assert!(output.contains("telemt_user_connections_current{user=\"alice\"} 1"));
|
||||
assert!(output.contains("telemt_user_octets_from_client{user=\"alice\"} 1024"));
|
||||
@@ -987,6 +1404,9 @@ mod tests {
|
||||
assert!(output.contains("# TYPE telemt_connections_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_connections_bad_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_handshake_timeouts_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_upstream_connect_attempt_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_me_idle_close_by_peer_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
|
||||
assert!(output.contains(
|
||||
"# TYPE telemt_me_writer_removed_unexpected_minus_restored_total gauge"
|
||||
|
||||
288
src/stats/mod.rs
288
src/stats/mod.rs
@@ -26,18 +26,41 @@ pub struct Stats {
|
||||
connects_all: AtomicU64,
|
||||
connects_bad: AtomicU64,
|
||||
handshake_timeouts: AtomicU64,
|
||||
upstream_connect_attempt_total: AtomicU64,
|
||||
upstream_connect_success_total: AtomicU64,
|
||||
upstream_connect_fail_total: AtomicU64,
|
||||
upstream_connect_failfast_hard_error_total: AtomicU64,
|
||||
upstream_connect_attempts_bucket_1: AtomicU64,
|
||||
upstream_connect_attempts_bucket_2: AtomicU64,
|
||||
upstream_connect_attempts_bucket_3_4: AtomicU64,
|
||||
upstream_connect_attempts_bucket_gt_4: AtomicU64,
|
||||
upstream_connect_duration_success_bucket_le_100ms: AtomicU64,
|
||||
upstream_connect_duration_success_bucket_101_500ms: AtomicU64,
|
||||
upstream_connect_duration_success_bucket_501_1000ms: AtomicU64,
|
||||
upstream_connect_duration_success_bucket_gt_1000ms: AtomicU64,
|
||||
upstream_connect_duration_fail_bucket_le_100ms: AtomicU64,
|
||||
upstream_connect_duration_fail_bucket_101_500ms: AtomicU64,
|
||||
upstream_connect_duration_fail_bucket_501_1000ms: AtomicU64,
|
||||
upstream_connect_duration_fail_bucket_gt_1000ms: AtomicU64,
|
||||
me_keepalive_sent: AtomicU64,
|
||||
me_keepalive_failed: AtomicU64,
|
||||
me_keepalive_pong: AtomicU64,
|
||||
me_keepalive_timeout: AtomicU64,
|
||||
me_rpc_proxy_req_signal_sent_total: AtomicU64,
|
||||
me_rpc_proxy_req_signal_failed_total: AtomicU64,
|
||||
me_rpc_proxy_req_signal_skipped_no_meta_total: AtomicU64,
|
||||
me_rpc_proxy_req_signal_response_total: AtomicU64,
|
||||
me_rpc_proxy_req_signal_close_sent_total: AtomicU64,
|
||||
me_reconnect_attempts: AtomicU64,
|
||||
me_reconnect_success: AtomicU64,
|
||||
me_handshake_reject_total: AtomicU64,
|
||||
me_reader_eof_total: AtomicU64,
|
||||
me_idle_close_by_peer_total: AtomicU64,
|
||||
me_crc_mismatch: AtomicU64,
|
||||
me_seq_mismatch: AtomicU64,
|
||||
me_endpoint_quarantine_total: AtomicU64,
|
||||
me_kdf_drift_total: AtomicU64,
|
||||
me_kdf_port_only_drift_total: AtomicU64,
|
||||
me_hardswap_pending_reuse_total: AtomicU64,
|
||||
me_hardswap_pending_ttl_expired_total: AtomicU64,
|
||||
me_single_endpoint_outage_enter_total: AtomicU64,
|
||||
@@ -46,6 +69,10 @@ pub struct Stats {
|
||||
me_single_endpoint_outage_reconnect_success_total: AtomicU64,
|
||||
me_single_endpoint_quarantine_bypass_total: AtomicU64,
|
||||
me_single_endpoint_shadow_rotate_total: AtomicU64,
|
||||
me_single_endpoint_shadow_rotate_skipped_quarantine_total: AtomicU64,
|
||||
me_floor_mode_switch_total: AtomicU64,
|
||||
me_floor_mode_switch_static_to_adaptive_total: AtomicU64,
|
||||
me_floor_mode_switch_adaptive_to_static_total: AtomicU64,
|
||||
me_handshake_error_codes: DashMap<i32, AtomicU64>,
|
||||
me_route_drop_no_conn: AtomicU64,
|
||||
me_route_drop_channel_closed: AtomicU64,
|
||||
@@ -150,6 +177,99 @@ impl Stats {
|
||||
self.handshake_timeouts.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_upstream_connect_attempt_total(&self) {
|
||||
if self.telemetry_core_enabled() {
|
||||
self.upstream_connect_attempt_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_upstream_connect_success_total(&self) {
|
||||
if self.telemetry_core_enabled() {
|
||||
self.upstream_connect_success_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_upstream_connect_fail_total(&self) {
|
||||
if self.telemetry_core_enabled() {
|
||||
self.upstream_connect_fail_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_upstream_connect_failfast_hard_error_total(&self) {
|
||||
if self.telemetry_core_enabled() {
|
||||
self.upstream_connect_failfast_hard_error_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn observe_upstream_connect_attempts_per_request(&self, attempts: u32) {
|
||||
if !self.telemetry_core_enabled() {
|
||||
return;
|
||||
}
|
||||
match attempts {
|
||||
0 => {}
|
||||
1 => {
|
||||
self.upstream_connect_attempts_bucket_1
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
2 => {
|
||||
self.upstream_connect_attempts_bucket_2
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
3..=4 => {
|
||||
self.upstream_connect_attempts_bucket_3_4
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
_ => {
|
||||
self.upstream_connect_attempts_bucket_gt_4
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn observe_upstream_connect_duration_ms(&self, duration_ms: u64, success: bool) {
|
||||
if !self.telemetry_core_enabled() {
|
||||
return;
|
||||
}
|
||||
let bucket = match duration_ms {
|
||||
0..=100 => 0u8,
|
||||
101..=500 => 1u8,
|
||||
501..=1000 => 2u8,
|
||||
_ => 3u8,
|
||||
};
|
||||
match (success, bucket) {
|
||||
(true, 0) => {
|
||||
self.upstream_connect_duration_success_bucket_le_100ms
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
(true, 1) => {
|
||||
self.upstream_connect_duration_success_bucket_101_500ms
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
(true, 2) => {
|
||||
self.upstream_connect_duration_success_bucket_501_1000ms
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
(true, _) => {
|
||||
self.upstream_connect_duration_success_bucket_gt_1000ms
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
(false, 0) => {
|
||||
self.upstream_connect_duration_fail_bucket_le_100ms
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
(false, 1) => {
|
||||
self.upstream_connect_duration_fail_bucket_101_500ms
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
(false, 2) => {
|
||||
self.upstream_connect_duration_fail_bucket_501_1000ms
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
(false, _) => {
|
||||
self.upstream_connect_duration_fail_bucket_gt_1000ms
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn increment_me_keepalive_sent(&self) {
|
||||
if self.telemetry_me_allows_debug() {
|
||||
self.me_keepalive_sent.fetch_add(1, Ordering::Relaxed);
|
||||
@@ -175,6 +295,36 @@ impl Stats {
|
||||
self.me_keepalive_timeout.fetch_add(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_rpc_proxy_req_signal_sent_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_rpc_proxy_req_signal_sent_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_rpc_proxy_req_signal_failed_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_rpc_proxy_req_signal_failed_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_rpc_proxy_req_signal_skipped_no_meta_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_rpc_proxy_req_signal_skipped_no_meta_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_rpc_proxy_req_signal_response_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_rpc_proxy_req_signal_response_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_rpc_proxy_req_signal_close_sent_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_rpc_proxy_req_signal_close_sent_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_reconnect_attempt(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_reconnect_attempts.fetch_add(1, Ordering::Relaxed);
|
||||
@@ -205,6 +355,12 @@ impl Stats {
|
||||
self.me_reader_eof_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_idle_close_by_peer_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_idle_close_by_peer_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_crc_mismatch(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_crc_mismatch.fetch_add(1, Ordering::Relaxed);
|
||||
@@ -290,7 +446,7 @@ impl Stats {
|
||||
}
|
||||
}
|
||||
pub fn increment_pool_swap_total(&self) {
|
||||
if self.telemetry_me_allows_debug() {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.pool_swap_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
@@ -377,6 +533,12 @@ impl Stats {
|
||||
self.me_kdf_drift_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_kdf_port_only_drift_total(&self) {
|
||||
if self.telemetry_me_allows_debug() {
|
||||
self.me_kdf_port_only_drift_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_hardswap_pending_reuse_total(&self) {
|
||||
if self.telemetry_me_allows_debug() {
|
||||
self.me_hardswap_pending_reuse_total
|
||||
@@ -425,12 +587,56 @@ impl Stats {
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_single_endpoint_shadow_rotate_skipped_quarantine_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_single_endpoint_shadow_rotate_skipped_quarantine_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_floor_mode_switch_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_mode_switch_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_floor_mode_switch_static_to_adaptive_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_mode_switch_static_to_adaptive_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_floor_mode_switch_adaptive_to_static_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_mode_switch_adaptive_to_static_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn get_connects_all(&self) -> u64 { self.connects_all.load(Ordering::Relaxed) }
|
||||
pub fn get_connects_bad(&self) -> u64 { self.connects_bad.load(Ordering::Relaxed) }
|
||||
pub fn get_me_keepalive_sent(&self) -> u64 { self.me_keepalive_sent.load(Ordering::Relaxed) }
|
||||
pub fn get_me_keepalive_failed(&self) -> u64 { self.me_keepalive_failed.load(Ordering::Relaxed) }
|
||||
pub fn get_me_keepalive_pong(&self) -> u64 { self.me_keepalive_pong.load(Ordering::Relaxed) }
|
||||
pub fn get_me_keepalive_timeout(&self) -> u64 { self.me_keepalive_timeout.load(Ordering::Relaxed) }
|
||||
pub fn get_me_rpc_proxy_req_signal_sent_total(&self) -> u64 {
|
||||
self.me_rpc_proxy_req_signal_sent_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_rpc_proxy_req_signal_failed_total(&self) -> u64 {
|
||||
self.me_rpc_proxy_req_signal_failed_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_rpc_proxy_req_signal_skipped_no_meta_total(&self) -> u64 {
|
||||
self.me_rpc_proxy_req_signal_skipped_no_meta_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_rpc_proxy_req_signal_response_total(&self) -> u64 {
|
||||
self.me_rpc_proxy_req_signal_response_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_rpc_proxy_req_signal_close_sent_total(&self) -> u64 {
|
||||
self.me_rpc_proxy_req_signal_close_sent_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_reconnect_attempts(&self) -> u64 { self.me_reconnect_attempts.load(Ordering::Relaxed) }
|
||||
pub fn get_me_reconnect_success(&self) -> u64 { self.me_reconnect_success.load(Ordering::Relaxed) }
|
||||
pub fn get_me_handshake_reject_total(&self) -> u64 {
|
||||
@@ -439,6 +645,9 @@ impl Stats {
|
||||
pub fn get_me_reader_eof_total(&self) -> u64 {
|
||||
self.me_reader_eof_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_idle_close_by_peer_total(&self) -> u64 {
|
||||
self.me_idle_close_by_peer_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_crc_mismatch(&self) -> u64 { self.me_crc_mismatch.load(Ordering::Relaxed) }
|
||||
pub fn get_me_seq_mismatch(&self) -> u64 { self.me_seq_mismatch.load(Ordering::Relaxed) }
|
||||
pub fn get_me_endpoint_quarantine_total(&self) -> u64 {
|
||||
@@ -447,6 +656,9 @@ impl Stats {
|
||||
pub fn get_me_kdf_drift_total(&self) -> u64 {
|
||||
self.me_kdf_drift_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_kdf_port_only_drift_total(&self) -> u64 {
|
||||
self.me_kdf_port_only_drift_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_hardswap_pending_reuse_total(&self) -> u64 {
|
||||
self.me_hardswap_pending_reuse_total
|
||||
.load(Ordering::Relaxed)
|
||||
@@ -479,6 +691,21 @@ impl Stats {
|
||||
self.me_single_endpoint_shadow_rotate_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_single_endpoint_shadow_rotate_skipped_quarantine_total(&self) -> u64 {
|
||||
self.me_single_endpoint_shadow_rotate_skipped_quarantine_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_mode_switch_total(&self) -> u64 {
|
||||
self.me_floor_mode_switch_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_mode_switch_static_to_adaptive_total(&self) -> u64 {
|
||||
self.me_floor_mode_switch_static_to_adaptive_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_mode_switch_adaptive_to_static_total(&self) -> u64 {
|
||||
self.me_floor_mode_switch_adaptive_to_static_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_handshake_error_code_counts(&self) -> Vec<(i32, u64)> {
|
||||
let mut out: Vec<(i32, u64)> = self
|
||||
.me_handshake_error_codes
|
||||
@@ -650,6 +877,65 @@ impl Stats {
|
||||
}
|
||||
|
||||
pub fn get_handshake_timeouts(&self) -> u64 { self.handshake_timeouts.load(Ordering::Relaxed) }
|
||||
pub fn get_upstream_connect_attempt_total(&self) -> u64 {
|
||||
self.upstream_connect_attempt_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_success_total(&self) -> u64 {
|
||||
self.upstream_connect_success_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_fail_total(&self) -> u64 {
|
||||
self.upstream_connect_fail_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_failfast_hard_error_total(&self) -> u64 {
|
||||
self.upstream_connect_failfast_hard_error_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_attempts_bucket_1(&self) -> u64 {
|
||||
self.upstream_connect_attempts_bucket_1.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_attempts_bucket_2(&self) -> u64 {
|
||||
self.upstream_connect_attempts_bucket_2.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_attempts_bucket_3_4(&self) -> u64 {
|
||||
self.upstream_connect_attempts_bucket_3_4
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_attempts_bucket_gt_4(&self) -> u64 {
|
||||
self.upstream_connect_attempts_bucket_gt_4
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_duration_success_bucket_le_100ms(&self) -> u64 {
|
||||
self.upstream_connect_duration_success_bucket_le_100ms
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_duration_success_bucket_101_500ms(&self) -> u64 {
|
||||
self.upstream_connect_duration_success_bucket_101_500ms
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_duration_success_bucket_501_1000ms(&self) -> u64 {
|
||||
self.upstream_connect_duration_success_bucket_501_1000ms
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_duration_success_bucket_gt_1000ms(&self) -> u64 {
|
||||
self.upstream_connect_duration_success_bucket_gt_1000ms
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_duration_fail_bucket_le_100ms(&self) -> u64 {
|
||||
self.upstream_connect_duration_fail_bucket_le_100ms
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_duration_fail_bucket_101_500ms(&self) -> u64 {
|
||||
self.upstream_connect_duration_fail_bucket_101_500ms
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_duration_fail_bucket_501_1000ms(&self) -> u64 {
|
||||
self.upstream_connect_duration_fail_bucket_501_1000ms
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_upstream_connect_duration_fail_bucket_gt_1000ms(&self) -> u64 {
|
||||
self.upstream_connect_duration_fail_bucket_gt_1000ms
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn iter_user_stats(&self) -> dashmap::iter::Iter<'_, String, UserStats> {
|
||||
self.user_stats.iter()
|
||||
|
||||
@@ -282,6 +282,10 @@ async fn run_update_cycle(
|
||||
cfg.general.me_single_endpoint_outage_backoff_min_ms,
|
||||
cfg.general.me_single_endpoint_outage_backoff_max_ms,
|
||||
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
cfg.general.me_floor_mode,
|
||||
cfg.general.me_adaptive_floor_idle_secs,
|
||||
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
cfg.general.me_adaptive_floor_recover_grace_secs,
|
||||
);
|
||||
|
||||
let required_cfg_snapshots = cfg.general.me_config_stable_snapshots.max(1);
|
||||
@@ -490,6 +494,10 @@ pub async fn me_config_updater(
|
||||
cfg.general.me_single_endpoint_outage_backoff_min_ms,
|
||||
cfg.general.me_single_endpoint_outage_backoff_max_ms,
|
||||
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
cfg.general.me_floor_mode,
|
||||
cfg.general.me_adaptive_floor_idle_secs,
|
||||
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
cfg.general.me_adaptive_floor_recover_grace_secs,
|
||||
);
|
||||
let new_secs = cfg.general.effective_update_every_secs().max(1);
|
||||
if new_secs == update_every_secs {
|
||||
|
||||
@@ -38,6 +38,22 @@ use super::MePool;
|
||||
|
||||
const ME_KDF_DRIFT_STRICT: bool = false;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
enum KdfClientPortSource {
|
||||
LocalSocket = 0,
|
||||
SocksBound = 1,
|
||||
}
|
||||
|
||||
impl KdfClientPortSource {
|
||||
fn from_socks_bound_port(socks_bound_port: Option<u16>) -> Self {
|
||||
if socks_bound_port.is_some() {
|
||||
Self::SocksBound
|
||||
} else {
|
||||
Self::LocalSocket
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of a successful ME handshake with timings.
|
||||
pub(crate) struct HandshakeOutput {
|
||||
pub rd: ReadHalf<TcpStream>,
|
||||
@@ -52,18 +68,18 @@ pub(crate) struct HandshakeOutput {
|
||||
|
||||
impl MePool {
|
||||
fn kdf_material_fingerprint(
|
||||
local_addr_nat: SocketAddr,
|
||||
local_ip_nat: IpAddr,
|
||||
peer_addr_nat: SocketAddr,
|
||||
client_port_for_kdf: u16,
|
||||
reflected: Option<SocketAddr>,
|
||||
socks_bound_addr: Option<SocketAddr>,
|
||||
reflected_ip: Option<IpAddr>,
|
||||
socks_bound_ip: Option<IpAddr>,
|
||||
client_port_source: KdfClientPortSource,
|
||||
) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
local_addr_nat.hash(&mut hasher);
|
||||
local_ip_nat.hash(&mut hasher);
|
||||
peer_addr_nat.hash(&mut hasher);
|
||||
client_port_for_kdf.hash(&mut hasher);
|
||||
reflected.hash(&mut hasher);
|
||||
socks_bound_addr.hash(&mut hasher);
|
||||
reflected_ip.hash(&mut hasher);
|
||||
socks_bound_ip.hash(&mut hasher);
|
||||
client_port_source.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
@@ -359,35 +375,48 @@ impl MePool {
|
||||
|
||||
let ts_bytes = crypto_ts.to_le_bytes();
|
||||
let server_port_bytes = peer_addr_nat.port().to_le_bytes();
|
||||
let client_port_for_kdf = socks_bound_addr
|
||||
let socks_bound_port = socks_bound_addr
|
||||
.map(|bound| bound.port())
|
||||
.filter(|port| *port != 0)
|
||||
.unwrap_or(local_addr_nat.port());
|
||||
.filter(|port| *port != 0);
|
||||
let client_port_for_kdf = socks_bound_port.unwrap_or(local_addr_nat.port());
|
||||
let client_port_source = KdfClientPortSource::from_socks_bound_port(socks_bound_port);
|
||||
let kdf_fingerprint = Self::kdf_material_fingerprint(
|
||||
local_addr_nat,
|
||||
local_addr_nat.ip(),
|
||||
peer_addr_nat,
|
||||
client_port_for_kdf,
|
||||
reflected,
|
||||
socks_bound_addr,
|
||||
reflected.map(|value| value.ip()),
|
||||
socks_bound_addr.map(|value| value.ip()),
|
||||
client_port_source,
|
||||
);
|
||||
let mut kdf_fingerprint_guard = self.kdf_material_fingerprint.lock().await;
|
||||
if let Some(prev_fingerprint) = kdf_fingerprint_guard.get(&peer_addr_nat).copied()
|
||||
&& prev_fingerprint != kdf_fingerprint
|
||||
if let Some((prev_fingerprint, prev_client_port)) =
|
||||
kdf_fingerprint_guard.get(&peer_addr_nat).copied()
|
||||
{
|
||||
self.stats.increment_me_kdf_drift_total();
|
||||
warn!(
|
||||
%peer_addr_nat,
|
||||
%local_addr_nat,
|
||||
client_port_for_kdf,
|
||||
"ME KDF input drift detected for endpoint"
|
||||
);
|
||||
if ME_KDF_DRIFT_STRICT {
|
||||
return Err(ProxyError::InvalidHandshake(
|
||||
"ME KDF input drift detected (strict mode)".to_string(),
|
||||
));
|
||||
if prev_fingerprint != kdf_fingerprint {
|
||||
self.stats.increment_me_kdf_drift_total();
|
||||
warn!(
|
||||
%peer_addr_nat,
|
||||
%local_addr_nat,
|
||||
client_port_for_kdf,
|
||||
client_port_source = ?client_port_source,
|
||||
"ME KDF material drift detected for endpoint"
|
||||
);
|
||||
if ME_KDF_DRIFT_STRICT {
|
||||
return Err(ProxyError::InvalidHandshake(
|
||||
"ME KDF material drift detected (strict mode)".to_string(),
|
||||
));
|
||||
}
|
||||
} else if prev_client_port != client_port_for_kdf {
|
||||
self.stats.increment_me_kdf_port_only_drift_total();
|
||||
debug!(
|
||||
%peer_addr_nat,
|
||||
previous_client_port_for_kdf = prev_client_port,
|
||||
client_port_for_kdf,
|
||||
client_port_source = ?client_port_source,
|
||||
"ME KDF client port changed with stable material"
|
||||
);
|
||||
}
|
||||
}
|
||||
kdf_fingerprint_guard.insert(peer_addr_nat, kdf_fingerprint);
|
||||
kdf_fingerprint_guard.insert(peer_addr_nat, (kdf_fingerprint, client_port_for_kdf));
|
||||
drop(kdf_fingerprint_guard);
|
||||
|
||||
let client_port_bytes = client_port_for_kdf.to_le_bytes();
|
||||
|
||||
@@ -7,6 +7,7 @@ use std::time::{Duration, Instant};
|
||||
use rand::Rng;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::config::MeFloorMode;
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::IpFamily;
|
||||
|
||||
@@ -17,6 +18,10 @@ const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
|
||||
#[allow(dead_code)]
|
||||
const MAX_CONCURRENT_PER_DC_DEFAULT: usize = 1;
|
||||
const SHADOW_ROTATE_RETRY_SECS: u64 = 30;
|
||||
const IDLE_REFRESH_TRIGGER_BASE_SECS: u64 = 45;
|
||||
const IDLE_REFRESH_TRIGGER_JITTER_SECS: u64 = 5;
|
||||
const IDLE_REFRESH_RETRY_SECS: u64 = 8;
|
||||
const IDLE_REFRESH_SUCCESS_GUARD_SECS: u64 = 5;
|
||||
|
||||
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
|
||||
let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new();
|
||||
@@ -26,6 +31,9 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
let mut outage_next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut single_endpoint_outage: HashSet<(i32, IpFamily)> = HashSet::new();
|
||||
let mut shadow_rotate_deadline: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut idle_refresh_next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut adaptive_idle_since: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut adaptive_recover_until: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
loop {
|
||||
tokio::time::sleep(Duration::from_secs(HEALTH_INTERVAL_SECS)).await;
|
||||
pool.prune_closed_writers().await;
|
||||
@@ -40,6 +48,9 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
&mut outage_next_attempt,
|
||||
&mut single_endpoint_outage,
|
||||
&mut shadow_rotate_deadline,
|
||||
&mut idle_refresh_next_attempt,
|
||||
&mut adaptive_idle_since,
|
||||
&mut adaptive_recover_until,
|
||||
)
|
||||
.await;
|
||||
check_family(
|
||||
@@ -53,6 +64,9 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
&mut outage_next_attempt,
|
||||
&mut single_endpoint_outage,
|
||||
&mut shadow_rotate_deadline,
|
||||
&mut idle_refresh_next_attempt,
|
||||
&mut adaptive_idle_since,
|
||||
&mut adaptive_recover_until,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -69,6 +83,9 @@ async fn check_family(
|
||||
outage_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
single_endpoint_outage: &mut HashSet<(i32, IpFamily)>,
|
||||
shadow_rotate_deadline: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) {
|
||||
let enabled = match family {
|
||||
IpFamily::V4 => pool.decision.ipv4_me,
|
||||
@@ -95,6 +112,11 @@ async fn check_family(
|
||||
endpoints.dedup();
|
||||
}
|
||||
|
||||
if pool.floor_mode() == MeFloorMode::Static {
|
||||
adaptive_idle_since.clear();
|
||||
adaptive_recover_until.clear();
|
||||
}
|
||||
|
||||
let mut live_addr_counts = HashMap::<SocketAddr, usize>::new();
|
||||
let mut live_writer_ids_by_addr = HashMap::<SocketAddr, Vec<u64>>::new();
|
||||
for writer in pool.writers.read().await.iter().filter(|w| {
|
||||
@@ -106,17 +128,27 @@ async fn check_family(
|
||||
.or_default()
|
||||
.push(writer.id);
|
||||
}
|
||||
let writer_idle_since = pool.registry.writer_idle_since_snapshot().await;
|
||||
|
||||
for (dc, endpoints) in dc_endpoints {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let required = pool.required_writers_for_dc(endpoints.len());
|
||||
let key = (dc, family);
|
||||
let reduce_for_idle = should_reduce_floor_for_idle(
|
||||
pool,
|
||||
key,
|
||||
&endpoints,
|
||||
&live_writer_ids_by_addr,
|
||||
adaptive_idle_since,
|
||||
adaptive_recover_until,
|
||||
)
|
||||
.await;
|
||||
let required = pool.required_writers_for_dc_with_floor_mode(endpoints.len(), reduce_for_idle);
|
||||
let alive = endpoints
|
||||
.iter()
|
||||
.map(|addr| *live_addr_counts.get(addr).unwrap_or(&0))
|
||||
.sum::<usize>();
|
||||
let key = (dc, family);
|
||||
|
||||
if endpoints.len() == 1 && pool.single_endpoint_outage_mode_enabled() && alive == 0 {
|
||||
if single_endpoint_outage.insert(key) {
|
||||
@@ -148,6 +180,9 @@ async fn check_family(
|
||||
outage_backoff.remove(&key);
|
||||
outage_next_attempt.remove(&key);
|
||||
shadow_rotate_deadline.remove(&key);
|
||||
idle_refresh_next_attempt.remove(&key);
|
||||
adaptive_idle_since.remove(&key);
|
||||
adaptive_recover_until.remove(&key);
|
||||
info!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
@@ -159,6 +194,20 @@ async fn check_family(
|
||||
}
|
||||
|
||||
if alive >= required {
|
||||
maybe_refresh_idle_writer_for_dc(
|
||||
pool,
|
||||
rng,
|
||||
key,
|
||||
dc,
|
||||
family,
|
||||
&endpoints,
|
||||
alive,
|
||||
required,
|
||||
&live_writer_ids_by_addr,
|
||||
&writer_idle_since,
|
||||
idle_refresh_next_attempt,
|
||||
)
|
||||
.await;
|
||||
maybe_rotate_single_endpoint_shadow(
|
||||
pool,
|
||||
rng,
|
||||
@@ -262,6 +311,161 @@ async fn check_family(
|
||||
}
|
||||
}
|
||||
|
||||
async fn maybe_refresh_idle_writer_for_dc(
|
||||
pool: &Arc<MePool>,
|
||||
rng: &Arc<SecureRandom>,
|
||||
key: (i32, IpFamily),
|
||||
dc: i32,
|
||||
family: IpFamily,
|
||||
endpoints: &[SocketAddr],
|
||||
alive: usize,
|
||||
required: usize,
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
writer_idle_since: &HashMap<u64, u64>,
|
||||
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) {
|
||||
if alive < required {
|
||||
return;
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
if let Some(next) = idle_refresh_next_attempt.get(&key)
|
||||
&& now < *next
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let mut candidate: Option<(u64, SocketAddr, u64, u64)> = None;
|
||||
for endpoint in endpoints {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(endpoint) else {
|
||||
continue;
|
||||
};
|
||||
for writer_id in writer_ids {
|
||||
let Some(idle_since_epoch_secs) = writer_idle_since.get(writer_id).copied() else {
|
||||
continue;
|
||||
};
|
||||
let idle_age_secs = now_epoch_secs.saturating_sub(idle_since_epoch_secs);
|
||||
let threshold_secs = IDLE_REFRESH_TRIGGER_BASE_SECS
|
||||
+ (*writer_id % (IDLE_REFRESH_TRIGGER_JITTER_SECS + 1));
|
||||
if idle_age_secs < threshold_secs {
|
||||
continue;
|
||||
}
|
||||
if candidate
|
||||
.as_ref()
|
||||
.map(|(_, _, age, _)| idle_age_secs > *age)
|
||||
.unwrap_or(true)
|
||||
{
|
||||
candidate = Some((*writer_id, *endpoint, idle_age_secs, threshold_secs));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let Some((old_writer_id, endpoint, idle_age_secs, threshold_secs)) = candidate else {
|
||||
return;
|
||||
};
|
||||
|
||||
let rotate_ok = match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(error)) => {
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
%endpoint,
|
||||
old_writer_id,
|
||||
idle_age_secs,
|
||||
threshold_secs,
|
||||
%error,
|
||||
"Idle writer pre-refresh connect failed"
|
||||
);
|
||||
false
|
||||
}
|
||||
Err(_) => {
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
%endpoint,
|
||||
old_writer_id,
|
||||
idle_age_secs,
|
||||
threshold_secs,
|
||||
"Idle writer pre-refresh connect timed out"
|
||||
);
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
if !rotate_ok {
|
||||
idle_refresh_next_attempt.insert(key, now + Duration::from_secs(IDLE_REFRESH_RETRY_SECS));
|
||||
return;
|
||||
}
|
||||
|
||||
pool.mark_writer_draining_with_timeout(old_writer_id, pool.force_close_timeout(), false)
|
||||
.await;
|
||||
idle_refresh_next_attempt.insert(
|
||||
key,
|
||||
now + Duration::from_secs(IDLE_REFRESH_SUCCESS_GUARD_SECS),
|
||||
);
|
||||
info!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
%endpoint,
|
||||
old_writer_id,
|
||||
idle_age_secs,
|
||||
threshold_secs,
|
||||
alive,
|
||||
required,
|
||||
"Idle writer refreshed before upstream idle timeout"
|
||||
);
|
||||
}
|
||||
|
||||
async fn should_reduce_floor_for_idle(
|
||||
pool: &Arc<MePool>,
|
||||
key: (i32, IpFamily),
|
||||
endpoints: &[SocketAddr],
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) -> bool {
|
||||
if endpoints.len() != 1 || pool.floor_mode() != MeFloorMode::Adaptive {
|
||||
adaptive_idle_since.remove(&key);
|
||||
adaptive_recover_until.remove(&key);
|
||||
return false;
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
let endpoint = endpoints[0];
|
||||
let writer_ids = live_writer_ids_by_addr
|
||||
.get(&endpoint)
|
||||
.map(Vec::as_slice)
|
||||
.unwrap_or(&[]);
|
||||
let has_bound_clients = has_bound_clients_on_endpoint(pool, writer_ids).await;
|
||||
if has_bound_clients {
|
||||
adaptive_idle_since.remove(&key);
|
||||
adaptive_recover_until.insert(key, now + pool.adaptive_floor_recover_grace_duration());
|
||||
return false;
|
||||
}
|
||||
|
||||
if let Some(recover_until) = adaptive_recover_until.get(&key)
|
||||
&& now < *recover_until
|
||||
{
|
||||
adaptive_idle_since.remove(&key);
|
||||
return false;
|
||||
}
|
||||
adaptive_recover_until.remove(&key);
|
||||
|
||||
let idle_since = adaptive_idle_since.entry(key).or_insert(now);
|
||||
now.saturating_duration_since(*idle_since) >= pool.adaptive_floor_idle_duration()
|
||||
}
|
||||
|
||||
async fn has_bound_clients_on_endpoint(pool: &Arc<MePool>, writer_ids: &[u64]) -> bool {
|
||||
for writer_id in writer_ids {
|
||||
if !pool.registry.is_writer_empty(*writer_id).await {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
async fn recover_single_endpoint_outage(
|
||||
pool: &Arc<MePool>,
|
||||
rng: &Arc<SecureRandom>,
|
||||
@@ -395,6 +599,19 @@ async fn maybe_rotate_single_endpoint_shadow(
|
||||
}
|
||||
|
||||
let endpoint = endpoints[0];
|
||||
if pool.is_endpoint_quarantined(endpoint).await {
|
||||
pool.stats
|
||||
.increment_me_single_endpoint_shadow_rotate_skipped_quarantine_total();
|
||||
shadow_rotate_deadline.insert(key, now + Duration::from_secs(SHADOW_ROTATE_RETRY_SECS));
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
%endpoint,
|
||||
"Single-endpoint shadow rotation skipped: endpoint is quarantined"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&endpoint) else {
|
||||
shadow_rotate_deadline.insert(key, now + Duration::from_secs(SHADOW_ROTATE_RETRY_SECS));
|
||||
return;
|
||||
|
||||
@@ -18,6 +18,7 @@ mod rotation;
|
||||
mod send;
|
||||
mod secret;
|
||||
mod wire;
|
||||
mod pool_status;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
use tokio::sync::{Mutex, Notify, RwLock, mpsc};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::config::{MeBindStaleMode, MeSocksKdfPolicy};
|
||||
use crate::config::{MeBindStaleMode, MeFloorMode, MeSocksKdfPolicy};
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::IpFamily;
|
||||
use crate::network::probe::NetworkDecision;
|
||||
@@ -94,6 +94,7 @@ pub struct MePool {
|
||||
pub(super) me_keepalive_interval: Duration,
|
||||
pub(super) me_keepalive_jitter: Duration,
|
||||
pub(super) me_keepalive_payload_random: bool,
|
||||
pub(super) rpc_proxy_req_every_secs: AtomicU64,
|
||||
pub(super) me_warmup_stagger_enabled: bool,
|
||||
pub(super) me_warmup_step_delay: Duration,
|
||||
pub(super) me_warmup_step_jitter: Duration,
|
||||
@@ -107,6 +108,10 @@ pub struct MePool {
|
||||
pub(super) me_single_endpoint_outage_backoff_min_ms: AtomicU64,
|
||||
pub(super) me_single_endpoint_outage_backoff_max_ms: AtomicU64,
|
||||
pub(super) me_single_endpoint_shadow_rotate_every_secs: AtomicU64,
|
||||
pub(super) me_floor_mode: AtomicU8,
|
||||
pub(super) me_adaptive_floor_idle_secs: AtomicU64,
|
||||
pub(super) me_adaptive_floor_min_writers_single_endpoint: AtomicU8,
|
||||
pub(super) me_adaptive_floor_recover_grace_secs: AtomicU64,
|
||||
pub(super) proxy_map_v4: Arc<RwLock<HashMap<i32, Vec<(IpAddr, u16)>>>>,
|
||||
pub(super) proxy_map_v6: Arc<RwLock<HashMap<i32, Vec<(IpAddr, u16)>>>>,
|
||||
pub(super) default_dc: AtomicI32,
|
||||
@@ -127,7 +132,7 @@ pub struct MePool {
|
||||
pub(super) pending_hardswap_map_hash: AtomicU64,
|
||||
pub(super) hardswap: AtomicBool,
|
||||
pub(super) endpoint_quarantine: Arc<Mutex<HashMap<SocketAddr, Instant>>>,
|
||||
pub(super) kdf_material_fingerprint: Arc<Mutex<HashMap<SocketAddr, u64>>>,
|
||||
pub(super) kdf_material_fingerprint: Arc<Mutex<HashMap<SocketAddr, (u64, u16)>>>,
|
||||
pub(super) me_pool_drain_ttl_secs: AtomicU64,
|
||||
pub(super) me_pool_force_close_secs: AtomicU64,
|
||||
pub(super) me_pool_min_fresh_ratio_permille: AtomicU32,
|
||||
@@ -188,6 +193,7 @@ impl MePool {
|
||||
me_keepalive_interval_secs: u64,
|
||||
me_keepalive_jitter_secs: u64,
|
||||
me_keepalive_payload_random: bool,
|
||||
rpc_proxy_req_every_secs: u64,
|
||||
me_warmup_stagger_enabled: bool,
|
||||
me_warmup_step_delay_ms: u64,
|
||||
me_warmup_step_jitter_ms: u64,
|
||||
@@ -201,6 +207,10 @@ impl MePool {
|
||||
me_single_endpoint_outage_backoff_min_ms: u64,
|
||||
me_single_endpoint_outage_backoff_max_ms: u64,
|
||||
me_single_endpoint_shadow_rotate_every_secs: u64,
|
||||
me_floor_mode: MeFloorMode,
|
||||
me_adaptive_floor_idle_secs: u64,
|
||||
me_adaptive_floor_min_writers_single_endpoint: u8,
|
||||
me_adaptive_floor_recover_grace_secs: u64,
|
||||
hardswap: bool,
|
||||
me_pool_drain_ttl_secs: u64,
|
||||
me_pool_force_close_secs: u64,
|
||||
@@ -264,6 +274,7 @@ impl MePool {
|
||||
me_keepalive_interval: Duration::from_secs(me_keepalive_interval_secs),
|
||||
me_keepalive_jitter: Duration::from_secs(me_keepalive_jitter_secs),
|
||||
me_keepalive_payload_random,
|
||||
rpc_proxy_req_every_secs: AtomicU64::new(rpc_proxy_req_every_secs),
|
||||
me_warmup_stagger_enabled,
|
||||
me_warmup_step_delay: Duration::from_millis(me_warmup_step_delay_ms),
|
||||
me_warmup_step_jitter: Duration::from_millis(me_warmup_step_jitter_ms),
|
||||
@@ -287,6 +298,14 @@ impl MePool {
|
||||
me_single_endpoint_shadow_rotate_every_secs: AtomicU64::new(
|
||||
me_single_endpoint_shadow_rotate_every_secs,
|
||||
),
|
||||
me_floor_mode: AtomicU8::new(me_floor_mode.as_u8()),
|
||||
me_adaptive_floor_idle_secs: AtomicU64::new(me_adaptive_floor_idle_secs),
|
||||
me_adaptive_floor_min_writers_single_endpoint: AtomicU8::new(
|
||||
me_adaptive_floor_min_writers_single_endpoint,
|
||||
),
|
||||
me_adaptive_floor_recover_grace_secs: AtomicU64::new(
|
||||
me_adaptive_floor_recover_grace_secs,
|
||||
),
|
||||
pool_size: 2,
|
||||
proxy_map_v4: Arc::new(RwLock::new(proxy_map_v4)),
|
||||
proxy_map_v6: Arc::new(RwLock::new(proxy_map_v6)),
|
||||
@@ -351,6 +370,10 @@ impl MePool {
|
||||
single_endpoint_outage_backoff_min_ms: u64,
|
||||
single_endpoint_outage_backoff_max_ms: u64,
|
||||
single_endpoint_shadow_rotate_every_secs: u64,
|
||||
floor_mode: MeFloorMode,
|
||||
adaptive_floor_idle_secs: u64,
|
||||
adaptive_floor_min_writers_single_endpoint: u8,
|
||||
adaptive_floor_recover_grace_secs: u64,
|
||||
) {
|
||||
self.hardswap.store(hardswap, Ordering::Relaxed);
|
||||
self.me_pool_drain_ttl_secs
|
||||
@@ -387,6 +410,29 @@ impl MePool {
|
||||
.store(single_endpoint_outage_backoff_max_ms, Ordering::Relaxed);
|
||||
self.me_single_endpoint_shadow_rotate_every_secs
|
||||
.store(single_endpoint_shadow_rotate_every_secs, Ordering::Relaxed);
|
||||
let previous_floor_mode = self.floor_mode();
|
||||
self.me_floor_mode
|
||||
.store(floor_mode.as_u8(), Ordering::Relaxed);
|
||||
self.me_adaptive_floor_idle_secs
|
||||
.store(adaptive_floor_idle_secs, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_min_writers_single_endpoint
|
||||
.store(adaptive_floor_min_writers_single_endpoint, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_recover_grace_secs
|
||||
.store(adaptive_floor_recover_grace_secs, Ordering::Relaxed);
|
||||
if previous_floor_mode != floor_mode {
|
||||
self.stats.increment_me_floor_mode_switch_total();
|
||||
match (previous_floor_mode, floor_mode) {
|
||||
(MeFloorMode::Static, MeFloorMode::Adaptive) => {
|
||||
self.stats
|
||||
.increment_me_floor_mode_switch_static_to_adaptive_total();
|
||||
}
|
||||
(MeFloorMode::Adaptive, MeFloorMode::Static) => {
|
||||
self.stats
|
||||
.increment_me_floor_mode_switch_adaptive_to_static_total();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_stun_state(&self) {
|
||||
@@ -464,6 +510,40 @@ impl MePool {
|
||||
endpoint_count.max(3)
|
||||
}
|
||||
|
||||
pub(super) fn floor_mode(&self) -> MeFloorMode {
|
||||
MeFloorMode::from_u8(self.me_floor_mode.load(Ordering::Relaxed))
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_idle_duration(&self) -> Duration {
|
||||
Duration::from_secs(self.me_adaptive_floor_idle_secs.load(Ordering::Relaxed))
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_recover_grace_duration(&self) -> Duration {
|
||||
Duration::from_secs(
|
||||
self.me_adaptive_floor_recover_grace_secs
|
||||
.load(Ordering::Relaxed),
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn required_writers_for_dc_with_floor_mode(
|
||||
&self,
|
||||
endpoint_count: usize,
|
||||
reduce_for_idle: bool,
|
||||
) -> usize {
|
||||
let base_required = self.required_writers_for_dc(endpoint_count);
|
||||
if !reduce_for_idle {
|
||||
return base_required;
|
||||
}
|
||||
if endpoint_count != 1 || self.floor_mode() != MeFloorMode::Adaptive {
|
||||
return base_required;
|
||||
}
|
||||
let min_writers = (self
|
||||
.me_adaptive_floor_min_writers_single_endpoint
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1);
|
||||
base_required.min(min_writers)
|
||||
}
|
||||
|
||||
pub(super) fn single_endpoint_outage_mode_enabled(&self) -> bool {
|
||||
self.me_single_endpoint_outage_mode_enabled
|
||||
.load(Ordering::Relaxed)
|
||||
|
||||
@@ -37,7 +37,7 @@ impl MePool {
|
||||
);
|
||||
}
|
||||
|
||||
async fn is_endpoint_quarantined(&self, addr: SocketAddr) -> bool {
|
||||
pub(super) async fn is_endpoint_quarantined(&self, addr: SocketAddr) -> bool {
|
||||
let mut guard = self.endpoint_quarantine.lock().await;
|
||||
let now = Instant::now();
|
||||
guard.retain(|_, expiry| *expiry > now);
|
||||
|
||||
424
src/transport/middle_proxy/pool_status.rs
Normal file
424
src/transport/middle_proxy/pool_status.rs
Normal file
@@ -0,0 +1,424 @@
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Instant;
|
||||
|
||||
use super::pool::{MePool, WriterContour};
|
||||
use crate::config::{MeBindStaleMode, MeFloorMode, MeSocksKdfPolicy};
|
||||
use crate::transport::upstream::IpPreference;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiWriterStatusSnapshot {
|
||||
pub writer_id: u64,
|
||||
pub dc: Option<i16>,
|
||||
pub endpoint: SocketAddr,
|
||||
pub generation: u64,
|
||||
pub state: &'static str,
|
||||
pub draining: bool,
|
||||
pub degraded: bool,
|
||||
pub bound_clients: usize,
|
||||
pub idle_for_secs: Option<u64>,
|
||||
pub rtt_ema_ms: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiDcStatusSnapshot {
|
||||
pub dc: i16,
|
||||
pub endpoints: Vec<SocketAddr>,
|
||||
pub available_endpoints: usize,
|
||||
pub available_pct: f64,
|
||||
pub required_writers: usize,
|
||||
pub alive_writers: usize,
|
||||
pub coverage_pct: f64,
|
||||
pub rtt_ms: Option<f64>,
|
||||
pub load: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiStatusSnapshot {
|
||||
pub generated_at_epoch_secs: u64,
|
||||
pub configured_dc_groups: usize,
|
||||
pub configured_endpoints: usize,
|
||||
pub available_endpoints: usize,
|
||||
pub available_pct: f64,
|
||||
pub required_writers: usize,
|
||||
pub alive_writers: usize,
|
||||
pub coverage_pct: f64,
|
||||
pub writers: Vec<MeApiWriterStatusSnapshot>,
|
||||
pub dcs: Vec<MeApiDcStatusSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiQuarantinedEndpointSnapshot {
|
||||
pub endpoint: SocketAddr,
|
||||
pub remaining_ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiDcPathSnapshot {
|
||||
pub dc: i16,
|
||||
pub ip_preference: Option<&'static str>,
|
||||
pub selected_addr_v4: Option<SocketAddr>,
|
||||
pub selected_addr_v6: Option<SocketAddr>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiRuntimeSnapshot {
|
||||
pub active_generation: u64,
|
||||
pub warm_generation: u64,
|
||||
pub pending_hardswap_generation: u64,
|
||||
pub pending_hardswap_age_secs: Option<u64>,
|
||||
pub hardswap_enabled: bool,
|
||||
pub floor_mode: &'static str,
|
||||
pub adaptive_floor_idle_secs: u64,
|
||||
pub adaptive_floor_min_writers_single_endpoint: u8,
|
||||
pub adaptive_floor_recover_grace_secs: u64,
|
||||
pub me_keepalive_enabled: bool,
|
||||
pub me_keepalive_interval_secs: u64,
|
||||
pub me_keepalive_jitter_secs: u64,
|
||||
pub me_keepalive_payload_random: bool,
|
||||
pub rpc_proxy_req_every_secs: u64,
|
||||
pub me_reconnect_max_concurrent_per_dc: u32,
|
||||
pub me_reconnect_backoff_base_ms: u64,
|
||||
pub me_reconnect_backoff_cap_ms: u64,
|
||||
pub me_reconnect_fast_retry_count: u32,
|
||||
pub me_pool_drain_ttl_secs: u64,
|
||||
pub me_pool_force_close_secs: u64,
|
||||
pub me_pool_min_fresh_ratio: f32,
|
||||
pub me_bind_stale_mode: &'static str,
|
||||
pub me_bind_stale_ttl_secs: u64,
|
||||
pub me_single_endpoint_shadow_writers: u8,
|
||||
pub me_single_endpoint_outage_mode_enabled: bool,
|
||||
pub me_single_endpoint_outage_disable_quarantine: bool,
|
||||
pub me_single_endpoint_outage_backoff_min_ms: u64,
|
||||
pub me_single_endpoint_outage_backoff_max_ms: u64,
|
||||
pub me_single_endpoint_shadow_rotate_every_secs: u64,
|
||||
pub me_deterministic_writer_sort: bool,
|
||||
pub me_socks_kdf_policy: &'static str,
|
||||
pub quarantined_endpoints: Vec<MeApiQuarantinedEndpointSnapshot>,
|
||||
pub network_path: Vec<MeApiDcPathSnapshot>,
|
||||
}
|
||||
|
||||
impl MePool {
|
||||
pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot {
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
|
||||
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await.clone();
|
||||
for (dc, addrs) in map {
|
||||
let abs_dc = dc.abs();
|
||||
if abs_dc == 0 {
|
||||
continue;
|
||||
}
|
||||
let Ok(dc_idx) = i16::try_from(abs_dc) else {
|
||||
continue;
|
||||
};
|
||||
let entry = endpoints_by_dc.entry(dc_idx).or_default();
|
||||
for (ip, port) in addrs {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await.clone();
|
||||
for (dc, addrs) in map {
|
||||
let abs_dc = dc.abs();
|
||||
if abs_dc == 0 {
|
||||
continue;
|
||||
}
|
||||
let Ok(dc_idx) = i16::try_from(abs_dc) else {
|
||||
continue;
|
||||
};
|
||||
let entry = endpoints_by_dc.entry(dc_idx).or_default();
|
||||
for (ip, port) in addrs {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut endpoint_to_dc = HashMap::<SocketAddr, i16>::new();
|
||||
for (dc, endpoints) in &endpoints_by_dc {
|
||||
for endpoint in endpoints {
|
||||
endpoint_to_dc.entry(*endpoint).or_insert(*dc);
|
||||
}
|
||||
}
|
||||
|
||||
let configured_dc_groups = endpoints_by_dc.len();
|
||||
let configured_endpoints = endpoints_by_dc.values().map(BTreeSet::len).sum();
|
||||
|
||||
let required_writers = endpoints_by_dc
|
||||
.values()
|
||||
.map(|endpoints| self.required_writers_for_dc_with_floor_mode(endpoints.len(), false))
|
||||
.sum();
|
||||
|
||||
let idle_since = self.registry.writer_idle_since_snapshot().await;
|
||||
let activity = self.registry.writer_activity_snapshot().await;
|
||||
let rtt = self.rtt_stats.lock().await.clone();
|
||||
let writers = self.writers.read().await.clone();
|
||||
|
||||
let mut live_writers_by_endpoint = HashMap::<SocketAddr, usize>::new();
|
||||
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
|
||||
let mut dc_rtt_agg = HashMap::<i16, (f64, u64)>::new();
|
||||
let mut writer_rows = Vec::<MeApiWriterStatusSnapshot>::with_capacity(writers.len());
|
||||
|
||||
for writer in writers {
|
||||
let endpoint = writer.addr;
|
||||
let dc = endpoint_to_dc.get(&endpoint).copied();
|
||||
let draining = writer.draining.load(Ordering::Relaxed);
|
||||
let degraded = writer.degraded.load(Ordering::Relaxed);
|
||||
let bound_clients = activity
|
||||
.bound_clients_by_writer
|
||||
.get(&writer.id)
|
||||
.copied()
|
||||
.unwrap_or(0);
|
||||
let idle_for_secs = idle_since
|
||||
.get(&writer.id)
|
||||
.map(|idle_ts| now_epoch_secs.saturating_sub(*idle_ts));
|
||||
let rtt_ema_ms = rtt.get(&writer.id).map(|(_, ema)| *ema);
|
||||
let state = match WriterContour::from_u8(writer.contour.load(Ordering::Relaxed)) {
|
||||
WriterContour::Warm => "warm",
|
||||
WriterContour::Active => "active",
|
||||
WriterContour::Draining => "draining",
|
||||
};
|
||||
|
||||
if !draining {
|
||||
*live_writers_by_endpoint.entry(endpoint).or_insert(0) += 1;
|
||||
if let Some(dc_idx) = dc {
|
||||
*live_writers_by_dc.entry(dc_idx).or_insert(0) += 1;
|
||||
if let Some(ema_ms) = rtt_ema_ms {
|
||||
let entry = dc_rtt_agg.entry(dc_idx).or_insert((0.0, 0));
|
||||
entry.0 += ema_ms;
|
||||
entry.1 += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer_rows.push(MeApiWriterStatusSnapshot {
|
||||
writer_id: writer.id,
|
||||
dc,
|
||||
endpoint,
|
||||
generation: writer.generation,
|
||||
state,
|
||||
draining,
|
||||
degraded,
|
||||
bound_clients,
|
||||
idle_for_secs,
|
||||
rtt_ema_ms,
|
||||
});
|
||||
}
|
||||
|
||||
writer_rows.sort_by_key(|row| (row.dc.unwrap_or(i16::MAX), row.endpoint, row.writer_id));
|
||||
|
||||
let mut dcs = Vec::<MeApiDcStatusSnapshot>::with_capacity(endpoints_by_dc.len());
|
||||
let mut available_endpoints = 0usize;
|
||||
let mut alive_writers = 0usize;
|
||||
for (dc, endpoints) in endpoints_by_dc {
|
||||
let endpoint_count = endpoints.len();
|
||||
let dc_available_endpoints = endpoints
|
||||
.iter()
|
||||
.filter(|endpoint| live_writers_by_endpoint.contains_key(endpoint))
|
||||
.count();
|
||||
let dc_required_writers =
|
||||
self.required_writers_for_dc_with_floor_mode(endpoint_count, false);
|
||||
let dc_alive_writers = live_writers_by_dc.get(&dc).copied().unwrap_or(0);
|
||||
let dc_load = activity
|
||||
.active_sessions_by_target_dc
|
||||
.get(&dc)
|
||||
.copied()
|
||||
.unwrap_or(0);
|
||||
let dc_rtt_ms = dc_rtt_agg
|
||||
.get(&dc)
|
||||
.and_then(|(sum, count)| (*count > 0).then_some(*sum / (*count as f64)));
|
||||
|
||||
available_endpoints += dc_available_endpoints;
|
||||
alive_writers += dc_alive_writers;
|
||||
|
||||
dcs.push(MeApiDcStatusSnapshot {
|
||||
dc,
|
||||
endpoints: endpoints.into_iter().collect(),
|
||||
available_endpoints: dc_available_endpoints,
|
||||
available_pct: ratio_pct(dc_available_endpoints, endpoint_count),
|
||||
required_writers: dc_required_writers,
|
||||
alive_writers: dc_alive_writers,
|
||||
coverage_pct: ratio_pct(dc_alive_writers, dc_required_writers),
|
||||
rtt_ms: dc_rtt_ms,
|
||||
load: dc_load,
|
||||
});
|
||||
}
|
||||
|
||||
MeApiStatusSnapshot {
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
configured_dc_groups,
|
||||
configured_endpoints,
|
||||
available_endpoints,
|
||||
available_pct: ratio_pct(available_endpoints, configured_endpoints),
|
||||
required_writers,
|
||||
alive_writers,
|
||||
coverage_pct: ratio_pct(alive_writers, required_writers),
|
||||
writers: writer_rows,
|
||||
dcs,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn api_runtime_snapshot(&self) -> MeApiRuntimeSnapshot {
|
||||
let now = Instant::now();
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
let pending_started_at = self
|
||||
.pending_hardswap_started_at_epoch_secs
|
||||
.load(Ordering::Relaxed);
|
||||
let pending_hardswap_age_secs = (pending_started_at > 0)
|
||||
.then_some(now_epoch_secs.saturating_sub(pending_started_at));
|
||||
|
||||
let mut quarantined_endpoints = Vec::<MeApiQuarantinedEndpointSnapshot>::new();
|
||||
{
|
||||
let guard = self.endpoint_quarantine.lock().await;
|
||||
for (endpoint, expires_at) in guard.iter() {
|
||||
if *expires_at <= now {
|
||||
continue;
|
||||
}
|
||||
let remaining_ms = expires_at.duration_since(now).as_millis() as u64;
|
||||
quarantined_endpoints.push(MeApiQuarantinedEndpointSnapshot {
|
||||
endpoint: *endpoint,
|
||||
remaining_ms,
|
||||
});
|
||||
}
|
||||
}
|
||||
quarantined_endpoints.sort_by_key(|entry| entry.endpoint);
|
||||
|
||||
let mut network_path = Vec::<MeApiDcPathSnapshot>::new();
|
||||
if let Some(upstream) = &self.upstream {
|
||||
for dc in 1..=5 {
|
||||
let dc_idx = dc as i16;
|
||||
let ip_preference = upstream
|
||||
.get_dc_ip_preference(dc_idx)
|
||||
.await
|
||||
.map(ip_preference_label);
|
||||
let selected_addr_v4 = upstream.get_dc_addr(dc_idx, false).await;
|
||||
let selected_addr_v6 = upstream.get_dc_addr(dc_idx, true).await;
|
||||
network_path.push(MeApiDcPathSnapshot {
|
||||
dc: dc_idx,
|
||||
ip_preference,
|
||||
selected_addr_v4,
|
||||
selected_addr_v6,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
MeApiRuntimeSnapshot {
|
||||
active_generation: self.active_generation.load(Ordering::Relaxed),
|
||||
warm_generation: self.warm_generation.load(Ordering::Relaxed),
|
||||
pending_hardswap_generation: self.pending_hardswap_generation.load(Ordering::Relaxed),
|
||||
pending_hardswap_age_secs,
|
||||
hardswap_enabled: self.hardswap.load(Ordering::Relaxed),
|
||||
floor_mode: floor_mode_label(self.floor_mode()),
|
||||
adaptive_floor_idle_secs: self.me_adaptive_floor_idle_secs.load(Ordering::Relaxed),
|
||||
adaptive_floor_min_writers_single_endpoint: self
|
||||
.me_adaptive_floor_min_writers_single_endpoint
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_recover_grace_secs: self
|
||||
.me_adaptive_floor_recover_grace_secs
|
||||
.load(Ordering::Relaxed),
|
||||
me_keepalive_enabled: self.me_keepalive_enabled,
|
||||
me_keepalive_interval_secs: self.me_keepalive_interval.as_secs(),
|
||||
me_keepalive_jitter_secs: self.me_keepalive_jitter.as_secs(),
|
||||
me_keepalive_payload_random: self.me_keepalive_payload_random,
|
||||
rpc_proxy_req_every_secs: self.rpc_proxy_req_every_secs.load(Ordering::Relaxed),
|
||||
me_reconnect_max_concurrent_per_dc: self.me_reconnect_max_concurrent_per_dc,
|
||||
me_reconnect_backoff_base_ms: self.me_reconnect_backoff_base.as_millis() as u64,
|
||||
me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64,
|
||||
me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count,
|
||||
me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed),
|
||||
me_pool_force_close_secs: self.me_pool_force_close_secs.load(Ordering::Relaxed),
|
||||
me_pool_min_fresh_ratio: Self::permille_to_ratio(
|
||||
self.me_pool_min_fresh_ratio_permille.load(Ordering::Relaxed),
|
||||
),
|
||||
me_bind_stale_mode: bind_stale_mode_label(self.bind_stale_mode()),
|
||||
me_bind_stale_ttl_secs: self.me_bind_stale_ttl_secs.load(Ordering::Relaxed),
|
||||
me_single_endpoint_shadow_writers: self
|
||||
.me_single_endpoint_shadow_writers
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_outage_mode_enabled: self
|
||||
.me_single_endpoint_outage_mode_enabled
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_outage_disable_quarantine: self
|
||||
.me_single_endpoint_outage_disable_quarantine
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_outage_backoff_min_ms: self
|
||||
.me_single_endpoint_outage_backoff_min_ms
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_outage_backoff_max_ms: self
|
||||
.me_single_endpoint_outage_backoff_max_ms
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_shadow_rotate_every_secs: self
|
||||
.me_single_endpoint_shadow_rotate_every_secs
|
||||
.load(Ordering::Relaxed),
|
||||
me_deterministic_writer_sort: self
|
||||
.me_deterministic_writer_sort
|
||||
.load(Ordering::Relaxed),
|
||||
me_socks_kdf_policy: socks_kdf_policy_label(self.socks_kdf_policy()),
|
||||
quarantined_endpoints,
|
||||
network_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ratio_pct(part: usize, total: usize) -> f64 {
|
||||
if total == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
let pct = ((part as f64) / (total as f64)) * 100.0;
|
||||
pct.clamp(0.0, 100.0)
|
||||
}
|
||||
|
||||
fn floor_mode_label(mode: MeFloorMode) -> &'static str {
|
||||
match mode {
|
||||
MeFloorMode::Static => "static",
|
||||
MeFloorMode::Adaptive => "adaptive",
|
||||
}
|
||||
}
|
||||
|
||||
fn bind_stale_mode_label(mode: MeBindStaleMode) -> &'static str {
|
||||
match mode {
|
||||
MeBindStaleMode::Never => "never",
|
||||
MeBindStaleMode::Ttl => "ttl",
|
||||
MeBindStaleMode::Always => "always",
|
||||
}
|
||||
}
|
||||
|
||||
fn socks_kdf_policy_label(policy: MeSocksKdfPolicy) -> &'static str {
|
||||
match policy {
|
||||
MeSocksKdfPolicy::Strict => "strict",
|
||||
MeSocksKdfPolicy::Compat => "compat",
|
||||
}
|
||||
}
|
||||
|
||||
fn ip_preference_label(preference: IpPreference) -> &'static str {
|
||||
match preference {
|
||||
IpPreference::Unknown => "unknown",
|
||||
IpPreference::PreferV6 => "prefer_v6",
|
||||
IpPreference::PreferV4 => "prefer_v4",
|
||||
IpPreference::BothWork => "both",
|
||||
IpPreference::Unavailable => "unavailable",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ratio_pct;
|
||||
|
||||
#[test]
|
||||
fn ratio_pct_is_zero_when_denominator_is_zero() {
|
||||
assert_eq!(ratio_pct(1, 0), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ratio_pct_is_capped_at_100() {
|
||||
assert_eq!(ratio_pct(7, 3), 100.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ratio_pct_reports_expected_value() {
|
||||
assert_eq!(ratio_pct(1, 4), 25.0);
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use bytes::BytesMut;
|
||||
use rand::Rng;
|
||||
@@ -12,16 +13,22 @@ use tracing::{debug, info, warn};
|
||||
use crate::config::MeBindStaleMode;
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::error::{ProxyError, Result};
|
||||
use crate::protocol::constants::RPC_PING_U32;
|
||||
use crate::protocol::constants::{RPC_CLOSE_EXT_U32, RPC_PING_U32};
|
||||
|
||||
use super::codec::{RpcWriter, WriterCommand};
|
||||
use super::pool::{MePool, MeWriter, WriterContour};
|
||||
use super::reader::reader_loop;
|
||||
use super::registry::BoundConn;
|
||||
use super::wire::build_proxy_req_payload;
|
||||
|
||||
const ME_ACTIVE_PING_SECS: u64 = 25;
|
||||
const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
|
||||
const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
|
||||
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
|
||||
|
||||
fn is_me_peer_closed_error(error: &ProxyError) -> bool {
|
||||
matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof)
|
||||
}
|
||||
|
||||
impl MePool {
|
||||
pub(crate) async fn prune_closed_writers(self: &Arc<Self>) {
|
||||
@@ -115,6 +122,7 @@ impl MePool {
|
||||
allow_drain_fallback: allow_drain_fallback.clone(),
|
||||
};
|
||||
self.writers.write().await.push(writer.clone());
|
||||
self.registry.mark_writer_idle(writer_id).await;
|
||||
self.conn_count.fetch_add(1, Ordering::Relaxed);
|
||||
self.writer_available.notify_one();
|
||||
|
||||
@@ -124,6 +132,7 @@ impl MePool {
|
||||
let ping_tracker_reader = ping_tracker.clone();
|
||||
let rtt_stats = self.rtt_stats.clone();
|
||||
let stats_reader = self.stats.clone();
|
||||
let stats_reader_close = self.stats.clone();
|
||||
let stats_ping = self.stats.clone();
|
||||
let pool = Arc::downgrade(self);
|
||||
let cancel_ping = cancel.clone();
|
||||
@@ -135,6 +144,13 @@ impl MePool {
|
||||
let keepalive_enabled = self.me_keepalive_enabled;
|
||||
let keepalive_interval = self.me_keepalive_interval;
|
||||
let keepalive_jitter = self.me_keepalive_jitter;
|
||||
let rpc_proxy_req_every_secs = self.rpc_proxy_req_every_secs.load(Ordering::Relaxed);
|
||||
let tx_signal = tx.clone();
|
||||
let stats_signal = self.stats.clone();
|
||||
let cancel_signal = cancel.clone();
|
||||
let cleanup_for_signal = cleanup_done.clone();
|
||||
let pool_signal = Arc::downgrade(self);
|
||||
let keepalive_jitter_signal = self.me_keepalive_jitter;
|
||||
let cancel_reader_token = cancel.clone();
|
||||
let cancel_ping_token = cancel_ping.clone();
|
||||
|
||||
@@ -156,6 +172,15 @@ impl MePool {
|
||||
cancel_reader_token.clone(),
|
||||
)
|
||||
.await;
|
||||
let idle_close_by_peer = if let Err(e) = res.as_ref() {
|
||||
is_me_peer_closed_error(e) && reg.is_writer_empty(writer_id).await
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if idle_close_by_peer {
|
||||
stats_reader_close.increment_me_idle_close_by_peer_total();
|
||||
info!(writer_id, "ME socket closed by peer on idle writer");
|
||||
}
|
||||
if let Some(pool) = pool.upgrade()
|
||||
&& cleanup_for_reader
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||
@@ -164,7 +189,9 @@ impl MePool {
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
}
|
||||
if let Err(e) = res {
|
||||
warn!(error = %e, "ME reader ended");
|
||||
if !idle_close_by_peer {
|
||||
warn!(error = %e, "ME reader ended");
|
||||
}
|
||||
}
|
||||
let mut ws = writers_arc.write().await;
|
||||
ws.retain(|w| w.id != writer_id);
|
||||
@@ -253,6 +280,116 @@ impl MePool {
|
||||
}
|
||||
});
|
||||
|
||||
tokio::spawn(async move {
|
||||
if rpc_proxy_req_every_secs == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let interval = Duration::from_secs(rpc_proxy_req_every_secs);
|
||||
let startup_jitter_ms = {
|
||||
let jitter_cap_ms = interval.as_millis() / 2;
|
||||
let effective_jitter_ms = keepalive_jitter_signal
|
||||
.as_millis()
|
||||
.min(jitter_cap_ms)
|
||||
.max(1);
|
||||
rand::rng().random_range(0..=effective_jitter_ms as u64)
|
||||
};
|
||||
|
||||
tokio::select! {
|
||||
_ = cancel_signal.cancelled() => return,
|
||||
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
|
||||
}
|
||||
|
||||
loop {
|
||||
let wait = {
|
||||
let jitter_cap_ms = interval.as_millis() / 2;
|
||||
let effective_jitter_ms = keepalive_jitter_signal
|
||||
.as_millis()
|
||||
.min(jitter_cap_ms)
|
||||
.max(1);
|
||||
interval + Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
|
||||
};
|
||||
|
||||
tokio::select! {
|
||||
_ = cancel_signal.cancelled() => break,
|
||||
_ = tokio::time::sleep(wait) => {}
|
||||
}
|
||||
|
||||
let Some(pool) = pool_signal.upgrade() else {
|
||||
break;
|
||||
};
|
||||
|
||||
let Some(meta) = pool.registry.get_last_writer_meta(writer_id).await else {
|
||||
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
|
||||
continue;
|
||||
};
|
||||
|
||||
let (conn_id, mut service_rx) = pool.registry.register().await;
|
||||
pool.registry
|
||||
.bind_writer(conn_id, writer_id, tx_signal.clone(), meta.clone())
|
||||
.await;
|
||||
|
||||
let payload = build_proxy_req_payload(
|
||||
conn_id,
|
||||
meta.client_addr,
|
||||
meta.our_addr,
|
||||
&[],
|
||||
pool.proxy_tag.as_deref(),
|
||||
meta.proto_flags,
|
||||
);
|
||||
|
||||
if tx_signal.send(WriterCommand::DataAndFlush(payload)).await.is_err() {
|
||||
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
|
||||
let _ = pool.registry.unregister(conn_id).await;
|
||||
cancel_signal.cancel();
|
||||
if cleanup_for_signal
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
{
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
|
||||
|
||||
if matches!(
|
||||
tokio::time::timeout(
|
||||
Duration::from_millis(ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS),
|
||||
service_rx.recv(),
|
||||
)
|
||||
.await,
|
||||
Ok(Some(_))
|
||||
) {
|
||||
stats_signal.increment_me_rpc_proxy_req_signal_response_total();
|
||||
}
|
||||
|
||||
let mut close_payload = Vec::with_capacity(12);
|
||||
close_payload.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
|
||||
close_payload.extend_from_slice(&conn_id.to_le_bytes());
|
||||
|
||||
if tx_signal
|
||||
.send(WriterCommand::DataAndFlush(close_payload))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
|
||||
let _ = pool.registry.unregister(conn_id).await;
|
||||
cancel_signal.cancel();
|
||||
if cleanup_for_signal
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
{
|
||||
pool.remove_writer_and_close_clients(writer_id).await;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
|
||||
let _ = pool.registry.unregister(conn_id).await;
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
|
||||
use std::time::Duration;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use tokio::sync::{mpsc, RwLock};
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
@@ -45,12 +45,20 @@ pub struct ConnWriter {
|
||||
pub tx: mpsc::Sender<WriterCommand>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(super) struct WriterActivitySnapshot {
|
||||
pub bound_clients_by_writer: HashMap<u64, usize>,
|
||||
pub active_sessions_by_target_dc: HashMap<i16, usize>,
|
||||
}
|
||||
|
||||
struct RegistryInner {
|
||||
map: HashMap<u64, mpsc::Sender<MeResponse>>,
|
||||
writers: HashMap<u64, mpsc::Sender<WriterCommand>>,
|
||||
writer_for_conn: HashMap<u64, u64>,
|
||||
conns_for_writer: HashMap<u64, HashSet<u64>>,
|
||||
meta: HashMap<u64, ConnMeta>,
|
||||
last_meta_for_writer: HashMap<u64, ConnMeta>,
|
||||
writer_idle_since_epoch_secs: HashMap<u64, u64>,
|
||||
}
|
||||
|
||||
impl RegistryInner {
|
||||
@@ -61,6 +69,8 @@ impl RegistryInner {
|
||||
writer_for_conn: HashMap::new(),
|
||||
conns_for_writer: HashMap::new(),
|
||||
meta: HashMap::new(),
|
||||
last_meta_for_writer: HashMap::new(),
|
||||
writer_idle_since_epoch_secs: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -74,6 +84,13 @@ pub struct ConnRegistry {
|
||||
}
|
||||
|
||||
impl ConnRegistry {
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
pub fn new() -> Self {
|
||||
let start = rand::random::<u64>() | 1;
|
||||
Self {
|
||||
@@ -121,8 +138,16 @@ impl ConnRegistry {
|
||||
inner.map.remove(&id);
|
||||
inner.meta.remove(&id);
|
||||
if let Some(writer_id) = inner.writer_for_conn.remove(&id) {
|
||||
if let Some(set) = inner.conns_for_writer.get_mut(&writer_id) {
|
||||
let became_empty = if let Some(set) = inner.conns_for_writer.get_mut(&writer_id) {
|
||||
set.remove(&id);
|
||||
set.is_empty()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if became_empty {
|
||||
inner
|
||||
.writer_idle_since_epoch_secs
|
||||
.insert(writer_id, Self::now_epoch_secs());
|
||||
}
|
||||
return Some(writer_id);
|
||||
}
|
||||
@@ -191,8 +216,10 @@ impl ConnRegistry {
|
||||
meta: ConnMeta,
|
||||
) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.meta.entry(conn_id).or_insert(meta);
|
||||
inner.meta.entry(conn_id).or_insert(meta.clone());
|
||||
inner.writer_for_conn.insert(conn_id, writer_id);
|
||||
inner.last_meta_for_writer.insert(writer_id, meta);
|
||||
inner.writer_idle_since_epoch_secs.remove(&writer_id);
|
||||
inner.writers.entry(writer_id).or_insert_with(|| tx.clone());
|
||||
inner
|
||||
.conns_for_writer
|
||||
@@ -201,6 +228,49 @@ impl ConnRegistry {
|
||||
.insert(conn_id);
|
||||
}
|
||||
|
||||
pub async fn mark_writer_idle(&self, writer_id: u64) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.conns_for_writer.entry(writer_id).or_insert_with(HashSet::new);
|
||||
inner
|
||||
.writer_idle_since_epoch_secs
|
||||
.entry(writer_id)
|
||||
.or_insert(Self::now_epoch_secs());
|
||||
}
|
||||
|
||||
pub async fn get_last_writer_meta(&self, writer_id: u64) -> Option<ConnMeta> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.last_meta_for_writer.get(&writer_id).cloned()
|
||||
}
|
||||
|
||||
pub async fn writer_idle_since_snapshot(&self) -> HashMap<u64, u64> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.writer_idle_since_epoch_secs.clone()
|
||||
}
|
||||
|
||||
pub(super) async fn writer_activity_snapshot(&self) -> WriterActivitySnapshot {
|
||||
let inner = self.inner.read().await;
|
||||
let mut bound_clients_by_writer = HashMap::<u64, usize>::new();
|
||||
let mut active_sessions_by_target_dc = HashMap::<i16, usize>::new();
|
||||
|
||||
for (writer_id, conn_ids) in &inner.conns_for_writer {
|
||||
bound_clients_by_writer.insert(*writer_id, conn_ids.len());
|
||||
}
|
||||
for conn_meta in inner.meta.values() {
|
||||
let dc_u16 = conn_meta.target_dc.unsigned_abs();
|
||||
if dc_u16 == 0 {
|
||||
continue;
|
||||
}
|
||||
if let Ok(dc) = i16::try_from(dc_u16) {
|
||||
*active_sessions_by_target_dc.entry(dc).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
WriterActivitySnapshot {
|
||||
bound_clients_by_writer,
|
||||
active_sessions_by_target_dc,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_writer(&self, conn_id: u64) -> Option<ConnWriter> {
|
||||
let inner = self.inner.read().await;
|
||||
let writer_id = inner.writer_for_conn.get(&conn_id).cloned()?;
|
||||
@@ -211,6 +281,8 @@ impl ConnRegistry {
|
||||
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.writers.remove(&writer_id);
|
||||
inner.last_meta_for_writer.remove(&writer_id);
|
||||
inner.writer_idle_since_epoch_secs.remove(&writer_id);
|
||||
let conns = inner
|
||||
.conns_for_writer
|
||||
.remove(&writer_id)
|
||||
@@ -246,3 +318,69 @@ impl ConnRegistry {
|
||||
.unwrap_or(true)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
|
||||
use super::ConnMeta;
|
||||
use super::ConnRegistry;
|
||||
|
||||
#[tokio::test]
|
||||
async fn writer_activity_snapshot_tracks_writer_and_dc_load() {
|
||||
let registry = ConnRegistry::new();
|
||||
|
||||
let (conn_a, _rx_a) = registry.register().await;
|
||||
let (conn_b, _rx_b) = registry.register().await;
|
||||
let (conn_c, _rx_c) = registry.register().await;
|
||||
let (writer_tx_a, _writer_rx_a) = tokio::sync::mpsc::channel(8);
|
||||
let (writer_tx_b, _writer_rx_b) = tokio::sync::mpsc::channel(8);
|
||||
|
||||
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443);
|
||||
registry
|
||||
.bind_writer(
|
||||
conn_a,
|
||||
10,
|
||||
writer_tx_a.clone(),
|
||||
ConnMeta {
|
||||
target_dc: 2,
|
||||
client_addr: addr,
|
||||
our_addr: addr,
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
registry
|
||||
.bind_writer(
|
||||
conn_b,
|
||||
10,
|
||||
writer_tx_a,
|
||||
ConnMeta {
|
||||
target_dc: -2,
|
||||
client_addr: addr,
|
||||
our_addr: addr,
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
registry
|
||||
.bind_writer(
|
||||
conn_c,
|
||||
20,
|
||||
writer_tx_b,
|
||||
ConnMeta {
|
||||
target_dc: 4,
|
||||
client_addr: addr,
|
||||
our_addr: addr,
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let snapshot = registry.writer_activity_snapshot().await;
|
||||
assert_eq!(snapshot.bound_clients_by_writer.get(&10), Some(&2));
|
||||
assert_eq!(snapshot.bound_clients_by_writer.get(&20), Some(&1));
|
||||
assert_eq!(snapshot.active_sessions_by_target_dc.get(&2), Some(&2));
|
||||
assert_eq!(snapshot.active_sessions_by_target_dc.get(&4), Some(&1));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
@@ -18,6 +19,9 @@ use super::wire::build_proxy_req_payload;
|
||||
use rand::seq::SliceRandom;
|
||||
use super::registry::ConnMeta;
|
||||
|
||||
const IDLE_WRITER_PENALTY_MID_SECS: u64 = 45;
|
||||
const IDLE_WRITER_PENALTY_HIGH_SECS: u64 = 55;
|
||||
|
||||
impl MePool {
|
||||
/// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default.
|
||||
pub async fn send_proxy_req(
|
||||
@@ -152,6 +156,8 @@ impl MePool {
|
||||
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
|
||||
}
|
||||
}
|
||||
let writer_idle_since = self.registry.writer_idle_since_snapshot().await;
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
|
||||
if self.me_deterministic_writer_sort.load(Ordering::Relaxed) {
|
||||
candidate_indices.sort_by(|lhs, rhs| {
|
||||
@@ -161,6 +167,11 @@ impl MePool {
|
||||
self.writer_contour_rank_for_selection(left),
|
||||
(left.generation < self.current_generation()) as usize,
|
||||
left.degraded.load(Ordering::Relaxed) as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
left,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(left.tx.capacity()),
|
||||
left.addr,
|
||||
left.id,
|
||||
@@ -169,6 +180,11 @@ impl MePool {
|
||||
self.writer_contour_rank_for_selection(right),
|
||||
(right.generation < self.current_generation()) as usize,
|
||||
right.degraded.load(Ordering::Relaxed) as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
right,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(right.tx.capacity()),
|
||||
right.addr,
|
||||
right.id,
|
||||
@@ -184,6 +200,11 @@ impl MePool {
|
||||
self.writer_contour_rank_for_selection(w),
|
||||
stale,
|
||||
degraded as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
w,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(w.tx.capacity()),
|
||||
)
|
||||
});
|
||||
@@ -367,4 +388,23 @@ impl MePool {
|
||||
WriterContour::Draining => 2,
|
||||
}
|
||||
}
|
||||
|
||||
fn writer_idle_rank_for_selection(
|
||||
&self,
|
||||
writer: &super::pool::MeWriter,
|
||||
idle_since_by_writer: &HashMap<u64, u64>,
|
||||
now_epoch_secs: u64,
|
||||
) -> usize {
|
||||
let Some(idle_since) = idle_since_by_writer.get(&writer.id).copied() else {
|
||||
return 0;
|
||||
};
|
||||
let idle_age_secs = now_epoch_secs.saturating_sub(idle_since);
|
||||
if idle_age_secs >= IDLE_WRITER_PENALTY_HIGH_SECS {
|
||||
2
|
||||
} else if idle_age_secs >= IDLE_WRITER_PENALTY_MID_SECS {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ use crate::config::{UpstreamConfig, UpstreamType};
|
||||
use crate::error::{Result, ProxyError};
|
||||
use crate::network::dns_overrides::{resolve_socket_addr, split_host_port};
|
||||
use crate::protocol::constants::{TG_DATACENTERS_V4, TG_DATACENTERS_V6, TG_DATACENTER_PORT};
|
||||
use crate::stats::Stats;
|
||||
use crate::transport::socket::{create_outgoing_socket_bound, resolve_interface_ip};
|
||||
use crate::transport::socks::{connect_socks4, connect_socks5};
|
||||
|
||||
@@ -164,6 +165,43 @@ pub enum UpstreamRouteKind {
|
||||
Socks5,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UpstreamApiDcSnapshot {
|
||||
pub dc: i16,
|
||||
pub latency_ema_ms: Option<f64>,
|
||||
pub ip_preference: IpPreference,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UpstreamApiItemSnapshot {
|
||||
pub upstream_id: usize,
|
||||
pub route_kind: UpstreamRouteKind,
|
||||
pub address: String,
|
||||
pub weight: u16,
|
||||
pub scopes: String,
|
||||
pub healthy: bool,
|
||||
pub fails: u32,
|
||||
pub last_check_age_secs: u64,
|
||||
pub effective_latency_ms: Option<f64>,
|
||||
pub dc: Vec<UpstreamApiDcSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct UpstreamApiSummarySnapshot {
|
||||
pub configured_total: usize,
|
||||
pub healthy_total: usize,
|
||||
pub unhealthy_total: usize,
|
||||
pub direct_total: usize,
|
||||
pub socks4_total: usize,
|
||||
pub socks5_total: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UpstreamApiSnapshot {
|
||||
pub summary: UpstreamApiSummarySnapshot,
|
||||
pub upstreams: Vec<UpstreamApiItemSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct UpstreamEgressInfo {
|
||||
pub route_kind: UpstreamRouteKind,
|
||||
@@ -188,6 +226,8 @@ pub struct UpstreamManager {
|
||||
connect_retry_attempts: u32,
|
||||
connect_retry_backoff: Duration,
|
||||
unhealthy_fail_threshold: u32,
|
||||
connect_failfast_hard_errors: bool,
|
||||
stats: Arc<Stats>,
|
||||
}
|
||||
|
||||
impl UpstreamManager {
|
||||
@@ -196,6 +236,8 @@ impl UpstreamManager {
|
||||
connect_retry_attempts: u32,
|
||||
connect_retry_backoff_ms: u64,
|
||||
unhealthy_fail_threshold: u32,
|
||||
connect_failfast_hard_errors: bool,
|
||||
stats: Arc<Stats>,
|
||||
) -> Self {
|
||||
let states = configs.into_iter()
|
||||
.filter(|c| c.enabled)
|
||||
@@ -207,9 +249,69 @@ impl UpstreamManager {
|
||||
connect_retry_attempts: connect_retry_attempts.max(1),
|
||||
connect_retry_backoff: Duration::from_millis(connect_retry_backoff_ms),
|
||||
unhealthy_fail_threshold: unhealthy_fail_threshold.max(1),
|
||||
connect_failfast_hard_errors,
|
||||
stats,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_api_snapshot(&self) -> Option<UpstreamApiSnapshot> {
|
||||
let guard = self.upstreams.try_read().ok()?;
|
||||
let now = std::time::Instant::now();
|
||||
|
||||
let mut summary = UpstreamApiSummarySnapshot {
|
||||
configured_total: guard.len(),
|
||||
..UpstreamApiSummarySnapshot::default()
|
||||
};
|
||||
let mut upstreams = Vec::with_capacity(guard.len());
|
||||
|
||||
for (idx, upstream) in guard.iter().enumerate() {
|
||||
if upstream.healthy {
|
||||
summary.healthy_total += 1;
|
||||
} else {
|
||||
summary.unhealthy_total += 1;
|
||||
}
|
||||
|
||||
let (route_kind, address) = match &upstream.config.upstream_type {
|
||||
UpstreamType::Direct { .. } => {
|
||||
summary.direct_total += 1;
|
||||
(UpstreamRouteKind::Direct, "direct".to_string())
|
||||
}
|
||||
UpstreamType::Socks4 { address, .. } => {
|
||||
summary.socks4_total += 1;
|
||||
(UpstreamRouteKind::Socks4, address.clone())
|
||||
}
|
||||
UpstreamType::Socks5 { address, .. } => {
|
||||
summary.socks5_total += 1;
|
||||
(UpstreamRouteKind::Socks5, address.clone())
|
||||
}
|
||||
};
|
||||
|
||||
let mut dc = Vec::with_capacity(NUM_DCS);
|
||||
for dc_idx in 0..NUM_DCS {
|
||||
dc.push(UpstreamApiDcSnapshot {
|
||||
dc: (dc_idx + 1) as i16,
|
||||
latency_ema_ms: upstream.dc_latency[dc_idx].get(),
|
||||
ip_preference: upstream.dc_ip_pref[dc_idx],
|
||||
});
|
||||
}
|
||||
|
||||
upstreams.push(UpstreamApiItemSnapshot {
|
||||
upstream_id: idx,
|
||||
route_kind,
|
||||
address,
|
||||
weight: upstream.config.weight,
|
||||
scopes: upstream.config.scopes.clone(),
|
||||
healthy: upstream.healthy,
|
||||
fails: upstream.fails,
|
||||
last_check_age_secs: now.saturating_duration_since(upstream.last_check).as_secs(),
|
||||
effective_latency_ms: upstream.effective_latency(None),
|
||||
dc,
|
||||
});
|
||||
}
|
||||
|
||||
Some(UpstreamApiSnapshot { summary, upstreams })
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn resolve_interface_addrs(name: &str, want_ipv6: bool) -> Vec<IpAddr> {
|
||||
use nix::ifaddrs::getifaddrs;
|
||||
@@ -349,6 +451,34 @@ impl UpstreamManager {
|
||||
}
|
||||
}
|
||||
|
||||
fn retry_backoff_with_jitter(&self) -> Duration {
|
||||
if self.connect_retry_backoff.is_zero() {
|
||||
return Duration::ZERO;
|
||||
}
|
||||
let base_ms = self.connect_retry_backoff.as_millis() as u64;
|
||||
if base_ms == 0 {
|
||||
return self.connect_retry_backoff;
|
||||
}
|
||||
let jitter_cap_ms = (base_ms / 2).max(1);
|
||||
let jitter_ms = rand::rng().gen_range(0..=jitter_cap_ms);
|
||||
Duration::from_millis(base_ms.saturating_add(jitter_ms))
|
||||
}
|
||||
|
||||
fn is_hard_connect_error(error: &ProxyError) -> bool {
|
||||
match error {
|
||||
ProxyError::Config(_) | ProxyError::ConnectionRefused { .. } => true,
|
||||
ProxyError::Io(ioe) => matches!(
|
||||
ioe.kind(),
|
||||
std::io::ErrorKind::ConnectionRefused
|
||||
| std::io::ErrorKind::AddrInUse
|
||||
| std::io::ErrorKind::AddrNotAvailable
|
||||
| std::io::ErrorKind::InvalidInput
|
||||
| std::io::ErrorKind::Unsupported
|
||||
),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Select upstream using latency-weighted random selection.
|
||||
async fn select_upstream(&self, dc_idx: Option<i16>, scope: Option<&str>) -> Option<usize> {
|
||||
let upstreams = self.upstreams.read().await;
|
||||
@@ -459,8 +589,12 @@ impl UpstreamManager {
|
||||
guard.get(idx).map(|u| u.bind_rr.clone())
|
||||
};
|
||||
|
||||
let connect_started_at = Instant::now();
|
||||
let mut last_error: Option<ProxyError> = None;
|
||||
let mut attempts_used = 0u32;
|
||||
for attempt in 1..=self.connect_retry_attempts {
|
||||
attempts_used = attempt;
|
||||
self.stats.increment_upstream_connect_attempt_total();
|
||||
let start = Instant::now();
|
||||
match self
|
||||
.connect_via_upstream(&upstream, target, bind_rr.clone())
|
||||
@@ -468,6 +602,13 @@ impl UpstreamManager {
|
||||
{
|
||||
Ok((stream, egress)) => {
|
||||
let rtt_ms = start.elapsed().as_secs_f64() * 1000.0;
|
||||
self.stats.increment_upstream_connect_success_total();
|
||||
self.stats
|
||||
.observe_upstream_connect_attempts_per_request(attempts_used);
|
||||
self.stats.observe_upstream_connect_duration_ms(
|
||||
connect_started_at.elapsed().as_millis() as u64,
|
||||
true,
|
||||
);
|
||||
let mut guard = self.upstreams.write().await;
|
||||
if let Some(u) = guard.get_mut(idx) {
|
||||
if !u.healthy {
|
||||
@@ -491,7 +632,13 @@ impl UpstreamManager {
|
||||
return Ok((stream, egress));
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt < self.connect_retry_attempts {
|
||||
let hard_error =
|
||||
self.connect_failfast_hard_errors && Self::is_hard_connect_error(&e);
|
||||
if hard_error {
|
||||
self.stats
|
||||
.increment_upstream_connect_failfast_hard_error_total();
|
||||
}
|
||||
if attempt < self.connect_retry_attempts && !hard_error {
|
||||
debug!(
|
||||
attempt,
|
||||
attempts = self.connect_retry_attempts,
|
||||
@@ -499,21 +646,43 @@ impl UpstreamManager {
|
||||
error = %e,
|
||||
"Upstream connect attempt failed, retrying"
|
||||
);
|
||||
if !self.connect_retry_backoff.is_zero() {
|
||||
tokio::time::sleep(self.connect_retry_backoff).await;
|
||||
let backoff = self.retry_backoff_with_jitter();
|
||||
if !backoff.is_zero() {
|
||||
tokio::time::sleep(backoff).await;
|
||||
}
|
||||
} else if hard_error {
|
||||
debug!(
|
||||
attempt,
|
||||
attempts = self.connect_retry_attempts,
|
||||
target = %target,
|
||||
error = %e,
|
||||
"Upstream connect failed with hard error, failfast is active"
|
||||
);
|
||||
}
|
||||
last_error = Some(e);
|
||||
if hard_error {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.stats.increment_upstream_connect_fail_total();
|
||||
self.stats
|
||||
.observe_upstream_connect_attempts_per_request(attempts_used);
|
||||
self.stats.observe_upstream_connect_duration_ms(
|
||||
connect_started_at.elapsed().as_millis() as u64,
|
||||
false,
|
||||
);
|
||||
|
||||
let error = last_error.unwrap_or_else(|| {
|
||||
ProxyError::Config("Upstream connect attempts exhausted".to_string())
|
||||
});
|
||||
|
||||
let mut guard = self.upstreams.write().await;
|
||||
if let Some(u) = guard.get_mut(idx) {
|
||||
// Intermediate attempts are intentionally ignored here.
|
||||
// Health state is degraded only when the entire connect cycle fails.
|
||||
u.fails += 1;
|
||||
warn!(
|
||||
fails = u.fails,
|
||||
@@ -1364,4 +1533,20 @@ mod tests {
|
||||
.contains(&"198.51.100.2:443".parse::<SocketAddr>().unwrap()));
|
||||
assert!(dc9.fallback.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hard_connect_error_classification_detects_connection_refused() {
|
||||
let error = ProxyError::ConnectionRefused {
|
||||
addr: "127.0.0.1:443".to_string(),
|
||||
};
|
||||
assert!(UpstreamManager::is_hard_connect_error(&error));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hard_connect_error_classification_skips_timeouts() {
|
||||
let error = ProxyError::ConnectionTimeout {
|
||||
addr: "127.0.0.1:443".to_string(),
|
||||
};
|
||||
assert!(!UpstreamManager::is_hard_connect_error(&error));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user