Compare commits

..

138 Commits
3.1.3 ... 3.3.0

Author SHA1 Message Date
Alexey
0494f8ac8b Merge pull request #325 from telemt/bump
Update Cargo.toml
2026-03-05 16:40:40 +03:00
Alexey
48ce59900e Update Cargo.toml 2026-03-05 16:40:28 +03:00
Alexey
84e95fd229 ME Pool Init fixes: merge pull request #324 from telemt/flow-fixes
ME Pool Init fixes
2026-03-05 16:35:00 +03:00
Alexey
a80be78345 DC writer floor is below required only in runtime 2026-03-05 16:32:31 +03:00
Alexey
64130dd02e MEP not ready only after 3 attempts 2026-03-05 16:13:40 +03:00
Alexey
d62a6e0417 Shutdown Timer fixes 2026-03-05 16:04:32 +03:00
Alexey
3260746785 Init + Uptime timers 2026-03-05 15:48:09 +03:00
Alexey
8066ea2163 ME Pool Init fixes 2026-03-05 15:31:36 +03:00
Alexey
813f1df63e Performance improvements: merge pull request #323 from telemt/flow-perf
Performance improvements
2026-03-05 14:43:10 +03:00
Alexey
09bdafa718 Performance improvements 2026-03-05 14:39:32 +03:00
Alexey
fb0f75df43 Merge pull request #322 from Dimasssss/patch-3
Update README.md
2026-03-05 14:10:01 +03:00
Alexey
39255df549 Unique IP always in Metrics+API: merge pull request #321 from telemt/flow-iplimit
Unique IP always in Metrics+API
2026-03-05 14:09:40 +03:00
Dimasssss
456495fd62 Update README.md 2026-03-05 13:59:58 +03:00
Alexey
83cadc0bf3 No lock-contention in ip-tracker 2026-03-05 13:52:27 +03:00
Alexey
0b1a8cd3f8 IP Limit fixes 2026-03-05 13:41:41 +03:00
Alexey
565b4ee923 Unique IP always in Metrics+API
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-05 13:21:11 +03:00
Alexey
7a9c1e79c2 Merge pull request #320 from telemt/bump
Update Cargo.toml
2026-03-05 12:47:09 +03:00
Alexey
02c6af4912 Update Cargo.toml 2026-03-05 12:46:57 +03:00
Alexey
8ba4dea59f Merge pull request #319 from telemt/flow-api
New IP Limit + Hot-Reload fixes + API Docs + ME2DC Fallback + ME Init Retries
2026-03-05 12:46:34 +03:00
Alexey
ccfda10713 ME2DC Fallback + ME Init Retries
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-05 12:43:07 +03:00
Alexey
bd1327592e Merge pull request #318 from telemt/readme
Update README.md
2026-03-05 12:40:34 +03:00
Alexey
30b22fe2bf Update README.md 2026-03-05 12:40:04 +03:00
Alexey
651f257a5d Update API.md
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-05 12:30:29 +03:00
Alexey
a9209fd3c7 Hot-Reload fixes
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-05 12:18:09 +03:00
Alexey
4ae4ca8ca8 New IP Limit Method
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-05 02:28:19 +03:00
Alexey
8be1ddc0d8 Merge pull request #315 from telemt/contributing
Update CONTRIBUTING.md
2026-03-04 17:52:17 +03:00
Alexey
b55fa5ec8f Update CONTRIBUTING.md 2026-03-04 17:52:02 +03:00
Alexey
16c6ce850e Merge pull request #313 from badcdd/patch-2
Add new prometheus metrics to zabbix template
2026-03-04 16:46:21 +03:00
badcdd
12251e730f Add new prometheus metrics to zabbix template 2026-03-04 16:24:00 +03:00
Alexey
925b10f9fc Merge pull request #312 from Dimasssss/patch-2
Update README.md
2026-03-04 14:25:13 +03:00
Dimasssss
306b653318 Update README.md 2026-03-04 14:23:48 +03:00
Alexey
8791a52b7e Merge pull request #311 from Dimasssss/patch-6
Правка гайдов
2026-03-04 14:19:48 +03:00
Dimasssss
0d9470a840 Update QUICK_START_GUIDE.en.md 2026-03-04 14:10:46 +03:00
Dimasssss
0d320c20e0 Update QUICK_START_GUIDE.ru.md 2026-03-04 14:10:12 +03:00
Alexey
9b3ba2e1c6 API for UpstreamManager: merge pull request #310 from telemt/flow-api
API for UpstreamManager
2026-03-04 11:46:07 +03:00
Alexey
dbadbf0221 Update config.toml 2026-03-04 11:45:32 +03:00
Alexey
173624c838 Update Cargo.toml 2026-03-04 11:44:50 +03:00
Alexey
de2047adf2 API UpstreamManager
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 11:41:41 +03:00
Alexey
5df2fe9f97 Autodetect IP in API User-links
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 11:04:54 +03:00
Alexey
2510ebaa79 Merge pull request #306 from telemt/flow-api
API + Runtime Stats
2026-03-04 02:56:54 +03:00
Alexey
314f30a434 Update Cargo.toml 2026-03-04 02:53:47 +03:00
Alexey
c86a511638 Update API.md
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 02:53:17 +03:00
Alexey
f1efaf4491 User-links in API
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 02:48:43 +03:00
Alexey
716b4adef2 Runtime Stats in API
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 02:46:47 +03:00
Alexey
5876623bb0 Runtime API Stats
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 02:46:26 +03:00
Alexey
6b9c7f7862 Runtime API in defaults
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 02:46:12 +03:00
Alexey
7ea6387278 API ME Pool Status
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 02:45:32 +03:00
Alexey
4c2bc2f41f Pool Status hooks in ME Registry
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:42:24 +03:00
Alexey
c86f35f059 Pool Status in Docs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:41:57 +03:00
Alexey
3492566842 Update mod.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:41:43 +03:00
Alexey
349bbbb8fa API Pool Status Model
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:41:33 +03:00
Alexey
ead08981e7 API Pool Status pull-up
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:41:11 +03:00
Alexey
068cf825b9 API Pool Status
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:40:58 +03:00
Alexey
7269dfbdc5 API in defaults+load+reload
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:09:32 +03:00
Alexey
533708f885 API in defaults
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:08:59 +03:00
Alexey
5e93ce258f API pull-up
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:08:42 +03:00
Alexey
1236505502 API Docs V1
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:08:19 +03:00
Alexey
f7d451e689 API V1 Drafts
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-04 01:08:05 +03:00
Alexey
e11da6d2ae Merge pull request #305 from telemt/bump
Update Cargo.toml
2026-03-03 23:38:26 +03:00
Alexey
d31b4cd6c8 Update Cargo.toml 2026-03-03 23:38:15 +03:00
Alexey
f4ec6bb303 Upstream Connect + Idle tolerance + Adaptive floor by default + RPC Proxy Req: merge pull request #304 from telemt/flow-connclose
Upstream Connect + Idle tolerance + Adaptive floor by default + RPC Proxy Req
2026-03-03 23:36:25 +03:00
Alexey
a6132bac38 Idle tolerance + Adaptive floor by default + RPC Proxy Req
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 23:16:25 +03:00
Alexey
624870109e Upstream Connect in defaults
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 20:50:31 +03:00
Alexey
cdf829de91 Upstream Connect in Metrics
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 20:50:08 +03:00
Alexey
6ef51dbfb0 Upstream Connect pull-up
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 20:49:53 +03:00
Alexey
af5f0b9692 Upstream Connect in Stats
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 20:49:29 +03:00
Alexey
bd0dcfff15 Upstream Error classifier
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 20:49:09 +03:00
Alexey
ec4e48808e Merge pull request #302 from ivulit/fix/metrics-port-localhost
fix:docker-compose.yml bind metrics port to localhost only
2026-03-03 18:35:50 +03:00
ivulit
c293901669 fix: bind metrics port to localhost only 2026-03-03 17:18:19 +03:00
Alexey
f4e5a08614 Merge pull request #300 from Dimasssss/patch-5
Небольшое обновление гайдов
2026-03-03 16:39:17 +03:00
Dimasssss
430a0ae6b4 Update FAQ.ru.md 2026-03-03 15:20:39 +03:00
Dimasssss
53d93880ad Update QUICK_START_GUIDE.ru.md 2026-03-03 15:16:22 +03:00
Alexey
1706698a83 Update README.md 2026-03-03 04:06:26 +03:00
Alexey
cb0832b803 ME Adaptive Floor: merge pull request #299 from telemt/flow-drift
ME Adaptive Floor
2026-03-03 03:42:12 +03:00
Alexey
c01ca40b6d ME Adaptive Floor in Tests
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:39:28 +03:00
Alexey
cfec6dbb3c ME Adaptive Floor pull-up
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:38:06 +03:00
Alexey
1fe1acadd4 ME Adaptive Floor in Metrics
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:37:24 +03:00
Alexey
225fc3e4ea ME Adaptive Floor Drafts
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:37:00 +03:00
Alexey
4a0d88ad43 Update health.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:35:57 +03:00
Alexey
58ff0c7971 Update pool.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:35:47 +03:00
Alexey
7d39bf1698 Merge pull request #298 from telemt/bump
Update Cargo.toml
2026-03-03 03:28:49 +03:00
Alexey
3b8eea762b Update Cargo.toml 2026-03-03 03:28:37 +03:00
Alexey
07ec84d071 ME Healthcheck + ME Keepalive + ME Pool in Metrics: merge pull request #297 from telemt/flow-drift
ME Healthcheck + ME Keepalive + ME Pool in Metrics
2026-03-03 03:27:44 +03:00
Alexey
235642459a ME Keepalive 8/2
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:08:15 +03:00
Alexey
3799fc13c4 ME Pool in Metrics
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:04:45 +03:00
Alexey
71261522bd Update pool.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:04:07 +03:00
Alexey
762deac511 ME Healthcheck fixes
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-03 03:03:44 +03:00
Alexey
4300720d35 Merge pull request #296 from telemt/bump
Update Cargo.toml
2026-03-02 21:36:12 +03:00
Alexey
b7a8e759eb Update Cargo.toml 2026-03-02 21:36:00 +03:00
Alexey
1a68dc1c2d ME Dual-Trio Pool + ME Pool Shadow Writers: merge pull request #295 from telemt/flow-drift
ME Dual-Trio Pool + ME Pool Shadow Writers
2026-03-02 21:10:55 +03:00
Alexey
a6d22e8a57 ME Pool Shadow Writers
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-02 21:04:06 +03:00
Alexey
9477103f89 Update pool.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-02 20:45:43 +03:00
Alexey
e589891706 ME Dual-Trio Pool Drafts 2026-03-02 20:41:51 +03:00
Alexey
fad4b652c4 Merge pull request #292 from telemt/flow-mep
ME Hardswap Generation stability + Dead-code deletion
2026-03-02 01:23:39 +03:00
Alexey
96bfc223fe Merge pull request #293 from telemt/l7-router
Create XRAY-SINGBOX-ROUTING.ru.md
2026-03-02 01:23:20 +03:00
Alexey
265b9a5f11 Create XRAY-SINGBOX-ROUTING.ru.md 2026-03-02 01:23:09 +03:00
Alexey
74ad9037de Dead-code deletion: has_proxy_tag 2026-03-02 00:54:02 +03:00
Alexey
49f4a7bb22 ME Hardswap Generation stability
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-02 00:39:18 +03:00
Alexey
ac453638b8 Adtag + ME Pool improvements: merge pull request #291 from telemt/flow-adtag
Adtag + ME Pool improvements
2026-03-02 00:22:45 +03:00
Alexey
e7773b2bda Merge branch 'main' into flow-adtag 2026-03-02 00:18:47 +03:00
Alexey
6f1980dfd7 ME Pool improvements
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-02 00:17:58 +03:00
Alexey
427fbef50f Merge pull request #289 from telemt/docs-me-kdf
Docs me kdf
2026-03-01 23:41:09 +03:00
Alexey
08609f4b6d Create MIDDLE-END-KDF.ru.md 2026-03-01 23:40:46 +03:00
Alexey
501d802b8d Create MIDDLE-END-KDF.de.md 2026-03-01 23:39:42 +03:00
Alexey
e8ff39d2ae Merge pull request #288 from telemt/docs-me-kdf
Create MIDDLE-END-KDF.en.md
2026-03-01 23:38:04 +03:00
Alexey
6c1b837d5b Create MIDDLE-END-KDF.en.md 2026-03-01 23:37:49 +03:00
Alexey
b112908c86 Merge pull request #286 from Dimasssss/patch-4
Update QUICK_START_GUIDE.ru.md
2026-03-01 22:32:29 +03:00
Dimasssss
1e400d4cc2 Update QUICK_START_GUIDE.ru.md 2026-03-01 19:05:53 +03:00
Alexey
a11c8b659b Merge pull request #285 from xaosproxy/adtag_per_user
Add per-user ad_tag with global fallback and hot-reload
2026-03-01 16:36:25 +03:00
sintanial
bc432f06e2 Add per-user ad_tag with global fallback and hot-reload
- Per-user ad_tag in [access.user_ad_tags], global fallback in general.ad_tag
- User tag overrides global; if no user tag, general.ad_tag is used
- Both general.ad_tag and user_ad_tags support hot-reload (no restart)
2026-03-01 16:28:55 +03:00
Alexey
338636ede6 Merge pull request #283 from Dimasssss/patch-3
Fix typos and update save instructions in documentation
2026-03-01 15:12:14 +03:00
Dimasssss
c05779208e Update QUICK_START_GUIDE.en.md 2026-03-01 15:05:39 +03:00
Dimasssss
7ba21ec5a8 Update save instructions in QUICK_START_GUIDE.ru.md 2026-03-01 15:05:25 +03:00
Dimasssss
d997c0b216 Fix typos and update save instructions in FAQ.ru.md 2026-03-01 15:03:44 +03:00
Alexey
62cf4f0a1c Merge pull request #278 from Dimasssss/patch-1
Update config.full.toml
2026-03-01 14:48:49 +03:00
Alexey
e710fefed2 Merge pull request #279 from Dimasssss/patch-2
Create FAQ.ru.md
2026-03-01 14:48:36 +03:00
Dimasssss
edef06edb5 Update FAQ.ru.md 2026-03-01 14:45:33 +03:00
Dimasssss
7a0b015e65 Create FAQ.ru.md 2026-03-01 14:04:18 +03:00
Dimasssss
8b2ec35c46 Update config.full.toml 2026-03-01 13:38:50 +03:00
Alexey
d324d84ec7 Merge pull request #276 from telemt/flow-mep
UpstreamManager Health-check for ME Pool over SOCKS
2026-03-01 04:02:59 +03:00
Alexey
47b12f9489 UpstreamManager Health-check for ME Pool over SOCKS
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-01 04:02:32 +03:00
Alexey
a5967d0ca3 Merge pull request #275 from telemt/flow-mep
ME Pool improvements
2026-03-01 03:38:53 +03:00
Alexey
44cdfd4b23 ME Pool improvements
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-01 03:36:00 +03:00
Alexey
25ffcf6081 Merge pull request #273 from ivulit/fix/proxy-protocol-unix-sock
fix: send PROXY protocol header to mask unix socket
2026-03-01 03:19:52 +03:00
Alexey
60322807b6 Merge pull request #271 from An0nX/patch-1
Rewrite configuration as a self-contained deployment guide with hardened anti-censorship defaults
2026-03-01 03:14:13 +03:00
ivulit
ed93b0a030 fix: send PROXY protocol header to mask unix socket
When mask_unix_sock is configured, mask_proxy_protocol was silently
ignored and no PROXY protocol header was sent to the backend. Apply
the same header-building logic as the TCP path in both masking relay
and TLS fetcher (raw and rustls).
2026-03-01 00:14:55 +03:00
Alexey
2370c8d5e4 Merge pull request #268 from radjah/patch-1
Update install.sh
2026-02-28 23:56:20 +03:00
Alexey
a3197b0fe1 Merge pull request #270 from ivulit/fix/proxy-protocol-dst-addr
fix: pass correct dst address to outgoing PROXY protocol header
2026-02-28 23:56:04 +03:00
ivulit
e27ef04c3d fix: pass correct dst address to outgoing PROXY protocol header
Previously handle_bad_client used stream.local_addr() (the ephemeral
socket to the mask backend) as the dst in the outgoing PROXY protocol
header. This is wrong: the dst should be the address telemt is listening
on, or the dst from the incoming PROXY protocol header if one was present.

- handle_bad_client now receives local_addr from the caller
- handle_client_stream resolves local_addr from PROXY protocol info.dst_addr
  or falls back to a synthetic address based on config.server.port
- RunningClientHandler.do_handshake resolves local_addr from stream.local_addr()
  overridden by PROXY protocol info.dst_addr when present, and passes it
  down to handle_tls_client / handle_direct_client
- masking.rs uses the caller-supplied local_addr directly, eliminating the
  stream.local_addr() call
2026-02-28 22:47:24 +03:00
An0nX
cf7e2ebf4b refactor: rewrite telemt config as self-documenting deployment reference
- Reorganize all sections with clear visual block separators
- Move inline comments to dedicated lines above each parameter
- Add Quick Start guide in the file header explaining 7-step deployment
- Add Modes of Operation explanation (Direct vs Middle-Proxy)
- Group related parameters under labeled subsections with separators
- Expand every comment to full plain-English explanation
- Remove all inline comments to prevent TOML parser edge cases
- Tune anti-censorship defaults for maximum DPI resistance:
  fast_mode_min_tls_record=1400, server_hello_delay=50-150ms,
  tls_new_session_tickets=2, tls_full_cert_ttl_secs=0,
  tls_emulation=true, desync_all_full=true, beobachten_minutes=30,
  me_reinit_every_secs=600
2026-02-28 21:36:56 +03:00
Pavel Frolov
685bfafe74 Update install.sh
Попытался привести к единообразию текст.
2026-02-28 19:02:00 +03:00
Alexey
0f6fcf49a7 Merge pull request #267 from Dimasssss/main
QUICK_START_GUIDE.en.md
2026-02-28 17:47:30 +03:00
Dimasssss
036f0e1569 Add files via upload 2026-02-28 17:46:11 +03:00
Dimasssss
291c22583f Update QUICK_START_GUIDE.ru.md 2026-02-28 17:39:12 +03:00
Alexey
ee5b01bb31 Merge pull request #266 from Dimasssss/main
Create QUICK_START_GUIDE.ru.md
2026-02-28 17:21:29 +03:00
Dimasssss
ccacf78890 Create QUICK_START_GUIDE.ru.md 2026-02-28 17:17:50 +03:00
Alexey
42db1191a8 Merge pull request #265 from Dimasssss/main
install.sh
2026-02-28 17:08:15 +03:00
Dimasssss
9ce26d16cb Add files via upload 2026-02-28 17:04:06 +03:00
52 changed files with 12187 additions and 1098 deletions

View File

@@ -1,3 +1,8 @@
# Issues - Rules
## What it is not
- NOT Question and Answer
- NOT Helpdesk
# Pull Requests - Rules
## General
- ONLY signed and verified commits

2
Cargo.lock generated
View File

@@ -2087,7 +2087,7 @@ dependencies = [
[[package]]
name = "telemt"
version = "3.0.13"
version = "3.1.3"
dependencies = [
"aes",
"anyhow",

View File

@@ -1,6 +1,6 @@
[package]
name = "telemt"
version = "3.1.3"
version = "3.3.0"
edition = "2024"
[dependencies]

120
README.md
View File

@@ -1,6 +1,13 @@
# Telemt - MTProxy on Rust + Tokio
**Telemt** is a fast, secure, and feature-rich server written in Rust: it fully implements the official Telegram proxy algo and adds many production-ready improvements such as connection pooling, replay protection, detailed statistics, masking from "prying" eyes
***Löst Probleme, bevor andere überhaupt wissen, dass sie existieren*** / ***It solves problems before others even realize they exist***
**Telemt** is a fast, secure, and feature-rich server written in Rust: it fully implements the official Telegram proxy algo and adds many production-ready improvements such as:
- ME Pool + Reader/Writer + Registry + Refill + Adaptive Floor + Trio-State + Generation Lifecycle
- [Full-covered API w/ management](https://github.com/telemt/telemt/blob/main/docs/API.md)
- Anti-Replay on Sliding Window
- Prometheus-format Metrics
- TLS-Fronting and TCP-Splicing for masking from "prying" eyes
[**Telemt Chat in Telegram**](https://t.me/telemtrs)
@@ -110,115 +117,18 @@ We welcome ideas, architectural feedback, and pull requests.
- Extensive logging via `trace` and `debug` with `RUST_LOG` method
## Quick Start Guide
**This software is designed for Debian-based OS: in addition to Debian, these are Ubuntu, Mint, Kali, MX and many other Linux**
1. Download release
```bash
wget -qO- "https://github.com/telemt/telemt/releases/latest/download/telemt-$(uname -m)-linux-$(ldd --version 2>&1 | grep -iq musl && echo musl || echo gnu).tar.gz" | tar -xz
```
2. Move to Bin Folder
```bash
mv telemt /bin
```
4. Make Executable
```bash
chmod +x /bin/telemt
```
5. Go to [How to use?](#how-to-use) section for for further steps
## How to use?
### Telemt via Systemd
**This instruction "assume" that you:**
- logged in as root or executed `su -` / `sudo su`
- you already have an assembled and executable `telemt` in /bin folder as a result of the [Quick Start Guide](#quick-start-guide) or [Build](#build)
### [Quick Start Guide RU](docs/QUICK_START_GUIDE.ru.md)
### [Quick Start Guide EN](docs/QUICK_START_GUIDE.en.md)
**0. Check port and generate secrets**
The port you have selected for use should be MISSING from the list, when:
```bash
netstat -lnp
```
Generate 16 bytes/32 characters HEX with OpenSSL or another way:
```bash
openssl rand -hex 16
```
OR
```bash
xxd -l 16 -p /dev/urandom
```
OR
```bash
python3 -c 'import os; print(os.urandom(16).hex())'
```
**1. Place your config to /etc/telemt.toml**
Open nano
```bash
nano /etc/telemt.toml
```
paste your config from [Configuration](#configuration) section
then Ctrl+X -> Y -> Enter to save
**2. Create service on /etc/systemd/system/telemt.service**
Open nano
```bash
nano /etc/systemd/system/telemt.service
```
paste this Systemd Module
```bash
[Unit]
Description=Telemt
After=network.target
[Service]
Type=simple
WorkingDirectory=/bin
ExecStart=/bin/telemt /etc/telemt.toml
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
```
then Ctrl+X -> Y -> Enter to save
**3.** In Shell type `systemctl start telemt` - it must start with zero exit-code
**4.** In Shell type `systemctl status telemt` - there you can reach info about current MTProxy status
**5.** In Shell type `systemctl enable telemt` - then telemt will start with system startup, after the network is up
**6.** In Shell type `journalctl -u telemt -n -g "links" --no-pager -o cat | tac` - get the connection links
## Configuration
### Minimal Configuration for First Start
```toml
# === General Settings ===
[general]
# ad_tag = "00000000000000000000000000000000"
[general.modes]
classic = false
secure = false
tls = true
# === Anti-Censorship & Masking ===
[censorship]
tls_domain = "petrovich.ru"
[access.users]
# format: "username" = "32_hex_chars_secret"
hello = "00000000000000000000000000000000"
```
### Advanced
#### Adtag
To use channel advertising and usage statistics from Telegram, get Adtag from [@mtproxybot](https://t.me/mtproxybot), add this parameter to section `[General]`
#### Adtag (per-user)
To use channel advertising and usage statistics from Telegram, get an Adtag from [@mtproxybot](https://t.me/mtproxybot). Set it per user in `[access.user_ad_tags]` (32 hex chars):
```toml
ad_tag = "00000000000000000000000000000000" # Replace zeros to your adtag from @mtproxybot
[access.user_ad_tags]
username1 = "11111111111111111111111111111111" # Replace with your tag from @mtproxybot
username2 = "22222222222222222222222222222222"
```
#### Listening and Announce IPs
To specify listening address and/or address in links, add to section `[[server.listeners]]` of config.toml:

View File

@@ -1,176 +1,669 @@
# Telemt full config with default values.
# Examples are kept in comments after '#'.
# ==============================================================================
#
# TELEMT — Advanced Rust-based Telegram MTProto Proxy
# Full Configuration Reference
#
# This file is both a working config and a complete documentation.
# Every parameter is explained. Read it top to bottom before deploying.
#
# Quick Start:
# 1. Set [server].port to your desired port (443 recommended)
# 2. Generate a secret: openssl rand -hex 16
# 3. Put it in [access.users] under a name you choose
# 4. Set [censorship].tls_domain to a popular unblocked HTTPS site
# 5. Set your public IP in [general].middle_proxy_nat_ip
# and [general.links].public_host
# 6. Set announce IP in [[server.listeners]]
# 7. Run Telemt. It prints a tg:// link. Send it to your users.
#
# Modes of Operation:
# Direct Mode (use_middle_proxy = false)
# Connects straight to Telegram DCs via TCP. Simple, fast, low overhead.
# No ad_tag support. No CDN DC support (203, etc).
#
# Middle-Proxy Mode (use_middle_proxy = true)
# Connects to Telegram Middle-End servers via RPC protocol.
# Required for ad_tag monetization and CDN support.
# Requires proxy_secret_path and a valid public IP.
#
# ==============================================================================
# Top-level legacy field.
show_link = [] # example: "*" or ["alice", "bob"]
# default_dc = 2 # example: default DC for unmapped non-standard DCs
# ==============================================================================
# LEGACY TOP-LEVEL FIELDS
# ==============================================================================
# Deprecated. Use [general.links].show instead.
# Accepts "*" for all users, or an array like ["alice", "bob"].
show_link = ["0"]
# Fallback Datacenter index (1-5) when a client requests an unknown DC ID.
# DC 2 is Amsterdam (Europe), closest for most CIS users.
# default_dc = 2
# ==============================================================================
# GENERAL SETTINGS
# ==============================================================================
[general]
# ------------------------------------------------------------------------------
# Core Protocol
# ------------------------------------------------------------------------------
# Coalesce the MTProto handshake and first data payload into a single TCP packet.
# Significantly reduces connection latency. No reason to disable.
fast_mode = true
use_middle_proxy = false
# ad_tag = "00000000000000000000000000000000" # example
# proxy_secret_path = "proxy-secret" # example custom path
# middle_proxy_nat_ip = "203.0.113.10" # example public NAT IP override
# How the proxy connects to Telegram servers.
# false = Direct TCP to Telegram DCs (simple, low overhead)
# true = Middle-End RPC protocol (required for ad_tag and CDN DCs)
use_middle_proxy = true
# 32-char hex Ad-Tag from @MTProxybot for sponsored channel injection.
# Only works when use_middle_proxy = true.
# Obtain yours: message @MTProxybot on Telegram, register your proxy.
# ad_tag = "00000000000000000000000000000000"
# ------------------------------------------------------------------------------
# Middle-End Authentication
# ------------------------------------------------------------------------------
# Path to the Telegram infrastructure AES key file.
# Auto-downloaded from https://core.telegram.org/getProxySecret on first run.
# This key authenticates your proxy with Middle-End servers.
proxy_secret_path = "proxy-secret"
# ------------------------------------------------------------------------------
# Public IP Configuration (Critical for Middle-Proxy Mode)
# ------------------------------------------------------------------------------
# Your server's PUBLIC IPv4 address.
# Middle-End servers need this for the cryptographic Key Derivation Function.
# If your server has a direct public IP, set it here.
# If behind NAT (AWS, Docker, etc.), this MUST be your external IP.
# If omitted, Telemt uses STUN to auto-detect (see middle_proxy_nat_probe).
# middle_proxy_nat_ip = "203.0.113.10"
# Auto-detect public IP via STUN servers defined in [network].
# Set to false if you hardcoded middle_proxy_nat_ip above.
# Set to true if you want automatic detection.
middle_proxy_nat_probe = true
# middle_proxy_nat_stun = "stun.l.google.com:19302" # example
# middle_proxy_nat_stun_servers = [] # example: ["stun1.l.google.com:19302", "stun2.l.google.com:19302"]
# ------------------------------------------------------------------------------
# Middle-End Connection Pool
# ------------------------------------------------------------------------------
# Number of persistent multiplexed RPC connections to ME servers.
# All client traffic is routed through these "fat pipes".
# 8 handles thousands of concurrent users comfortably.
middle_proxy_pool_size = 8
# Legacy field. Connections kept initialized but idle as warm standby.
middle_proxy_warm_standby = 16
# ------------------------------------------------------------------------------
# Middle-End Keepalive
# Telegram ME servers aggressively kill idle TCP connections.
# These settings send periodic RPC_PING frames to keep pipes alive.
# ------------------------------------------------------------------------------
me_keepalive_enabled = true
# Base interval between pings in seconds.
me_keepalive_interval_secs = 25
# Random jitter added to interval to prevent all connections pinging simultaneously.
me_keepalive_jitter_secs = 5
# Randomize ping payload bytes to prevent DPI from fingerprinting ping patterns.
me_keepalive_payload_random = true
# ------------------------------------------------------------------------------
# Client-Side Limits
# ------------------------------------------------------------------------------
# Max buffered ciphertext per client (bytes) when upstream is slow.
# Acts as backpressure to prevent memory exhaustion. 256KB is safe.
crypto_pending_buffer = 262144
# Maximum single MTProto frame size from client. 16MB is protocol standard.
max_client_frame = 16777216
desync_all_full = false
# ------------------------------------------------------------------------------
# Crypto Desynchronization Logging
# Desync errors usually mean DPI/GFW is tampering with connections.
# ------------------------------------------------------------------------------
# true = full forensics (trace ID, IP hash, hex dumps) for EVERY desync event
# false = deduplicated logging, one entry per time window (prevents log spam)
# Set true if you are actively debugging DPI interference.
desync_all_full = true
# ------------------------------------------------------------------------------
# Beobachten — Built-in Honeypot / Active Probe Tracker
# Tracks IPs that fail handshakes or behave like TLS scanners.
# Output file can be fed into fail2ban or iptables for auto-blocking.
# ------------------------------------------------------------------------------
beobachten = true
beobachten_minutes = 10
# How long (minutes) to remember a suspicious IP before expiring it.
beobachten_minutes = 30
# How often (seconds) to flush tracker state to disk.
beobachten_flush_secs = 15
# File path for the tracker output.
beobachten_file = "cache/beobachten.txt"
# ------------------------------------------------------------------------------
# Hardswap — Zero-Downtime ME Pool Rotation
# When Telegram updates ME server IPs, Hardswap creates a completely new pool,
# waits until it is fully ready, migrates traffic, then kills the old pool.
# Users experience zero interruption.
# ------------------------------------------------------------------------------
hardswap = true
# ------------------------------------------------------------------------------
# ME Pool Warmup Staggering
# When creating a new pool, connections are opened one by one with delays
# to avoid a burst of SYN packets that could trigger ISP flood protection.
# ------------------------------------------------------------------------------
me_warmup_stagger_enabled = true
# Delay between each connection creation (milliseconds).
me_warmup_step_delay_ms = 500
# Random jitter added to the delay (milliseconds).
me_warmup_step_jitter_ms = 300
# ------------------------------------------------------------------------------
# ME Reconnect Backoff
# If an ME server drops the connection, Telemt retries with this strategy.
# ------------------------------------------------------------------------------
# Max simultaneous reconnect attempts per DC.
me_reconnect_max_concurrent_per_dc = 8
# Exponential backoff base (milliseconds).
me_reconnect_backoff_base_ms = 500
# Backoff ceiling (milliseconds). Will never wait longer than this.
me_reconnect_backoff_cap_ms = 30000
# Number of instant retries before switching to exponential backoff.
me_reconnect_fast_retry_count = 12
# ------------------------------------------------------------------------------
# NAT Mismatch Behavior
# If STUN-detected IP differs from local interface IP (you are behind NAT).
# false = abort ME mode (safe default)
# true = force ME mode anyway (use if you know your NAT setup is correct)
# ------------------------------------------------------------------------------
stun_iface_mismatch_ignore = false
unknown_dc_log_path = "unknown-dc.txt" # to disable: set to null
log_level = "normal" # debug | verbose | normal | silent
# ------------------------------------------------------------------------------
# Logging
# ------------------------------------------------------------------------------
# File to log unknown DC requests (DC IDs outside standard 1-5).
unknown_dc_log_path = "unknown-dc.txt"
# Verbosity: "debug" | "verbose" | "normal" | "silent"
log_level = "normal"
# Disable ANSI color codes in log output (useful for file logging).
disable_colors = false
fast_mode_min_tls_record = 0
# ------------------------------------------------------------------------------
# FakeTLS Record Sizing
# Buffer small MTProto packets into larger TLS records to mimic real HTTPS.
# Real HTTPS servers send records close to MTU size (~1400 bytes).
# A stream of tiny TLS records is a strong DPI signal.
# Set to 0 to disable. Set to 1400 for realistic HTTPS emulation.
# ------------------------------------------------------------------------------
fast_mode_min_tls_record = 1400
# ------------------------------------------------------------------------------
# Periodic Updates
# ------------------------------------------------------------------------------
# How often (seconds) to re-fetch ME server lists and proxy secrets
# from core.telegram.org. Keeps your proxy in sync with Telegram infrastructure.
update_every = 300
me_reinit_every_secs = 900
# How often (seconds) to force a Hardswap even if the ME map is unchanged.
# Shorter intervals mean shorter-lived TCP flows, harder for DPI to profile.
me_reinit_every_secs = 600
# ------------------------------------------------------------------------------
# Hardswap Warmup Tuning
# Fine-grained control over how the new pool is warmed up before traffic switch.
# ------------------------------------------------------------------------------
me_hardswap_warmup_delay_min_ms = 1000
me_hardswap_warmup_delay_max_ms = 2000
me_hardswap_warmup_extra_passes = 3
me_hardswap_warmup_pass_backoff_base_ms = 500
# ------------------------------------------------------------------------------
# Config Update Debouncing
# Telegram sometimes pushes transient/broken configs. Debouncing requires
# N consecutive identical fetches before applying a change.
# ------------------------------------------------------------------------------
# ME server list must be identical for this many fetches before applying.
me_config_stable_snapshots = 2
# Minimum seconds between config applications.
me_config_apply_cooldown_secs = 300
# Proxy secret must be identical for this many fetches before applying.
proxy_secret_stable_snapshots = 2
# ------------------------------------------------------------------------------
# Proxy Secret Rotation
# ------------------------------------------------------------------------------
# Apply newly downloaded secrets at runtime without restart.
proxy_secret_rotate_runtime = true
# Maximum acceptable secret length (bytes). Rejects abnormally large secrets.
proxy_secret_len_max = 256
# ------------------------------------------------------------------------------
# Hardswap Drain Settings
# Controls graceful shutdown of old ME connections during pool rotation.
# ------------------------------------------------------------------------------
# Seconds to keep old connections alive for in-flight data before force-closing.
me_pool_drain_ttl_secs = 90
# Minimum ratio of healthy connections in new pool before draining old pool.
# 0.8 = at least 80% of new pool must be ready.
me_pool_min_fresh_ratio = 0.8
# Maximum seconds to wait for drain to complete before force-killing.
me_reinit_drain_timeout_secs = 120
# Legacy compatibility fields used when update_every is omitted.
proxy_secret_auto_reload_secs = 3600
proxy_config_auto_reload_secs = 3600
# ------------------------------------------------------------------------------
# NTP Clock Check
# MTProto uses timestamps. Clock drift > 30 seconds breaks handshakes.
# Telemt checks on startup and warns if out of sync.
# ------------------------------------------------------------------------------
ntp_check = true
ntp_servers = ["pool.ntp.org"] # example: ["pool.ntp.org", "time.cloudflare.com"]
ntp_servers = ["pool.ntp.org"]
# ------------------------------------------------------------------------------
# Auto-Degradation
# If ME servers become completely unreachable (ISP blocking),
# automatically fall back to Direct Mode so users stay connected.
# ------------------------------------------------------------------------------
auto_degradation_enabled = true
# Number of DC groups that must be unreachable before triggering fallback.
degradation_min_unavailable_dc_groups = 2
# ==============================================================================
# ALLOWED CLIENT PROTOCOLS
# Only enable what you need. In censored regions, TLS-only is safest.
# ==============================================================================
[general.modes]
# Classic MTProto. Unobfuscated length prefixes. Trivially detected by DPI.
# No reason to enable unless you have ancient clients.
classic = false
# Obfuscated MTProto with randomized padding. Better than classic, but
# still detectable by statistical analysis of packet sizes.
secure = false
# FakeTLS (ee-secrets). Wraps MTProto in TLS 1.3 framing.
# To DPI, it looks like a normal HTTPS connection.
# This should be the ONLY enabled mode in censored environments.
tls = true
# ==============================================================================
# STARTUP LINK GENERATION
# Controls what tg:// invite links are printed to console on startup.
# ==============================================================================
[general.links]
show ="*" # example: "*" or ["alice", "bob"]
# public_host = "proxy.example.com" # example explicit host/IP for tg:// links
# public_port = 443 # example explicit port for tg:// links
# Which users to generate links for.
# "*" = all users, or an array like ["alice", "bob"].
show = "*"
# IP or domain to embed in the tg:// link.
# If omitted, Telemt uses STUN to auto-detect.
# Set this to your server's public IP or domain for reliable links.
# public_host = "proxy.example.com"
# Port to embed in the tg:// link.
# If omitted, uses [server].port.
# public_port = 443
# ==============================================================================
# NETWORK & IP RESOLUTION
# ==============================================================================
[network]
# Enable IPv4 for outbound connections to Telegram.
ipv4 = true
ipv6 = false # set true to enable IPv6
prefer = 4 # 4 or 6
# Enable IPv6 for outbound connections to Telegram.
ipv6 = false
# Prefer IPv4 (4) or IPv6 (6) when both are available.
prefer = 4
# Experimental: use both IPv4 and IPv6 ME servers simultaneously.
# May improve reliability but doubles connection count.
multipath = false
# STUN servers for external IP discovery.
# Used for Middle-Proxy KDF (if nat_probe=true) and link generation.
stun_servers = [
"stun.l.google.com:5349",
"stun1.l.google.com:3478",
"stun.gmx.net:3478",
"stun.l.google.com:19302",
"stun.1und1.de:3478",
"stun1.l.google.com:19302",
"stun2.l.google.com:19302",
"stun3.l.google.com:19302",
"stun4.l.google.com:19302",
"stun.services.mozilla.com:3478",
"stun.stunprotocol.org:3478",
"stun.nextcloud.com:3478",
"stun.voip.eutelia.it:3478",
"stun.l.google.com:5349",
"stun1.l.google.com:3478",
"stun.gmx.net:3478",
"stun.l.google.com:19302"
]
# If UDP STUN is blocked, attempt TCP-based STUN as fallback.
stun_tcp_fallback = true
http_ip_detect_urls = ["https://ifconfig.me/ip", "https://api.ipify.org"]
# If all STUN fails, use HTTP APIs to discover public IP.
http_ip_detect_urls = [
"https://ifconfig.me/ip",
"https://api.ipify.org"
]
# Cache discovered public IP to this file to survive restarts.
cache_public_ip_path = "cache/public_ip.txt"
# ==============================================================================
# SERVER BINDING & METRICS
# ==============================================================================
[server]
# TCP port to listen on.
# 443 is recommended (looks like normal HTTPS traffic).
port = 443
# IPv4 bind address. "0.0.0.0" = all interfaces.
listen_addr_ipv4 = "0.0.0.0"
# IPv6 bind address. "::" = all interfaces.
listen_addr_ipv6 = "::"
# listen_unix_sock = "/var/run/telemt.sock" # example
# listen_unix_sock_perm = "0660" # example unix socket mode
# listen_tcp = true # example explicit override (auto-detected when omitted)
# Unix socket listener (for reverse proxy setups with Nginx/HAProxy).
# listen_unix_sock = "/var/run/telemt.sock"
# listen_unix_sock_perm = "0660"
# Enable PROXY protocol header parsing.
# Set true ONLY if Telemt is behind HAProxy/Nginx that injects PROXY headers.
# If enabled without a proxy in front, clients will fail to connect.
proxy_protocol = false
# metrics_port = 9090 # example
metrics_whitelist = ["127.0.0.1/32", "::1/128"]
# Example explicit listeners (default: omitted, auto-generated from listen_addr_*):
# Prometheus metrics HTTP endpoint port.
# Uncomment to enable. Access at http://your-server:9090/metrics
# metrics_port = 9090
# IP ranges allowed to access the metrics endpoint.
metrics_whitelist = [
"127.0.0.1/32",
"::1/128"
]
# ------------------------------------------------------------------------------
# Listener Overrides
# Define explicit listeners with specific bind IPs and announce IPs.
# The announce IP is what gets embedded in tg:// links and sent to ME servers.
# You MUST set announce to your server's public IP for ME mode to work.
# ------------------------------------------------------------------------------
# [[server.listeners]]
# ip = "0.0.0.0"
# announce = "proxy-v4.example.com"
# # announce_ip = "203.0.113.10" # deprecated alias
# proxy_protocol = false
# reuse_allow = false
#
# [[server.listeners]]
# ip = "::"
# announce = "proxy-v6.example.com"
# proxy_protocol = false
# announce = "203.0.113.10"
# reuse_allow = false
# ==============================================================================
# TIMEOUTS (seconds unless noted)
# ==============================================================================
[timeouts]
# Maximum time for client to complete FakeTLS + MTProto handshake.
client_handshake = 15
# Maximum time to establish TCP connection to upstream Telegram DC.
tg_connect = 10
# TCP keepalive interval for client connections.
client_keepalive = 60
# Maximum client inactivity before dropping the connection.
client_ack = 300
# Instant retry count for a single ME endpoint before giving up on it.
me_one_retry = 3
# Timeout (milliseconds) for a single ME endpoint connection attempt.
me_one_timeout_ms = 1500
# ==============================================================================
# ANTI-CENSORSHIP / FAKETLS / MASKING
# This is where Telemt becomes invisible to Deep Packet Inspection.
# ==============================================================================
[censorship]
tls_domain = "petrovich.ru"
# tls_domains = ["example.com", "cdn.example.net"] # Additional domains for EE links
# ------------------------------------------------------------------------------
# TLS Domain Fronting
# The SNI (Server Name Indication) your proxy presents to connecting clients.
# Must be a popular, unblocked HTTPS website in your target country.
# DPI sees traffic to this domain. Choose carefully.
# Good choices: major CDNs, banks, government sites, search engines.
# Bad choices: obscure sites, already-blocked domains.
# ------------------------------------------------------------------------------
tls_domain = "www.google.com"
# ------------------------------------------------------------------------------
# Active Probe Masking
# When someone connects but fails the MTProto handshake (wrong secret),
# they might be an ISP active prober testing if this is a proxy.
#
# mask = false: drop the connection (prober knows something is here)
# mask = true: transparently proxy them to mask_host (prober sees a real website)
#
# With mask enabled, your server is indistinguishable from a real web server
# to anyone who doesn't have the correct secret.
# ------------------------------------------------------------------------------
mask = true
# mask_host = "www.google.com" # example, defaults to tls_domain when both mask_host/mask_unix_sock are unset
# mask_unix_sock = "/var/run/nginx.sock" # example, mutually exclusive with mask_host
# The real web server to forward failed handshakes to.
# If omitted, defaults to tls_domain.
# mask_host = "www.google.com"
# Port on the mask host to connect to.
mask_port = 443
# mask_proxy_protocol = 0 # Send PROXY protocol header to mask_host: 0 = off, 1 = v1 (text), 2 = v2 (binary)
fake_cert_len = 2048 # if tls_emulation=false and default value is used, loader may randomize this value at runtime
# Inject PROXY protocol header when forwarding to mask host.
# 0 = disabled, 1 = v1, 2 = v2. Leave disabled unless mask_host expects it.
# mask_proxy_protocol = 0
# ------------------------------------------------------------------------------
# TLS Certificate Emulation
# ------------------------------------------------------------------------------
# Size (bytes) of the locally generated fake TLS certificate.
# Only used when tls_emulation is disabled.
fake_cert_len = 2048
# KILLER FEATURE: Real-Time TLS Emulation.
# Telemt connects to tls_domain, fetches its actual TLS 1.3 certificate chain,
# and exactly replicates the byte sizes of ServerHello and Certificate records.
# Defeats DPI that uses TLS record length heuristics to detect proxies.
# Strongly recommended in censored environments.
tls_emulation = true
# Directory to cache fetched TLS certificates.
tls_front_dir = "tlsfront"
server_hello_delay_min_ms = 0
server_hello_delay_max_ms = 0
tls_new_session_tickets = 0
tls_full_cert_ttl_secs = 90
# ------------------------------------------------------------------------------
# ServerHello Timing
# Real web servers take 30-150ms to respond to ClientHello due to network
# latency and crypto processing. A proxy responding in <1ms is suspicious.
# These settings add realistic delay to mimic genuine server behavior.
# ------------------------------------------------------------------------------
# Minimum delay before sending ServerHello (milliseconds).
server_hello_delay_min_ms = 50
# Maximum delay before sending ServerHello (milliseconds).
server_hello_delay_max_ms = 150
# ------------------------------------------------------------------------------
# TLS Session Tickets
# Real TLS 1.3 servers send 1-2 NewSessionTicket messages after handshake.
# A server that sends zero tickets is anomalous and may trigger DPI flags.
# Set this to match your tls_domain's behavior (usually 2).
# ------------------------------------------------------------------------------
# tls_new_session_tickets = 0
# ------------------------------------------------------------------------------
# Full Certificate Frequency
# When tls_emulation is enabled, this controls how often (per client IP)
# to send the complete emulated certificate chain.
#
# > 0: Subsequent connections within TTL seconds get a smaller cached version.
# Saves bandwidth but creates a detectable size difference between
# first and repeat connections.
#
# = 0: Every connection gets the full certificate. More bandwidth but
# perfectly consistent behavior, no anomalies for DPI to detect.
# ------------------------------------------------------------------------------
tls_full_cert_ttl_secs = 0
# ------------------------------------------------------------------------------
# ALPN Enforcement
# Ensure ServerHello responds with the exact ALPN protocol the client requested.
# Mismatched ALPN (e.g., client asks h2, server says http/1.1) is a DPI red flag.
# ------------------------------------------------------------------------------
alpn_enforce = true
# ==============================================================================
# ACCESS CONTROL & USERS
# ==============================================================================
[access]
# ------------------------------------------------------------------------------
# Replay Attack Protection
# DPI can record a legitimate user's handshake and replay it later to probe
# whether the server is a proxy. Telemt remembers recent handshake nonces
# and rejects duplicates.
# ------------------------------------------------------------------------------
# Number of nonce slots in the replay detection buffer.
replay_check_len = 65536
# How long (seconds) to remember nonces before expiring them.
replay_window_secs = 1800
# Allow clients with incorrect system clocks to connect.
# false = reject clients with significant time skew (more secure)
# true = accept anyone regardless of clock (more permissive)
ignore_time_skew = false
# ------------------------------------------------------------------------------
# User Secrets
# Each user needs a unique 32-character hex string as their secret.
# Generate with: openssl rand -hex 16
#
# This secret is embedded in the tg:// link. Anyone with it can connect.
# Format: username = "hex_secret"
# ------------------------------------------------------------------------------
[access.users]
# format: "username" = "32_hex_chars_secret"
hello = "00000000000000000000000000000000"
# alice = "11111111111111111111111111111111" # example
# alice = "0123456789abcdef0123456789abcdef"
# bob = "fedcba9876543210fedcba9876543210"
# ------------------------------------------------------------------------------
# Per-User Connection Limits
# Limits concurrent TCP connections per user to prevent secret sharing.
# Uncomment and set for each user as needed.
# ------------------------------------------------------------------------------
[access.user_max_tcp_conns]
# alice = 100 # example
# alice = 100
# bob = 50
# ------------------------------------------------------------------------------
# Per-User Expiration Dates
# Automatically revoke access after the specified date (ISO 8601 format).
# ------------------------------------------------------------------------------
[access.user_expirations]
# alice = "2078-01-01T00:00:00Z" # example
# alice = "2025-12-31T23:59:59Z"
# bob = "2026-06-15T00:00:00Z"
# ------------------------------------------------------------------------------
# Per-User Data Quotas
# Maximum total bytes transferred per user. Connection refused after limit.
# ------------------------------------------------------------------------------
[access.user_data_quota]
# hello = 10737418240 # example bytes
# alice = 10737418240 # example bytes
# alice = 107374182400
# bob = 53687091200
# ------------------------------------------------------------------------------
# Per-User Unique IP Limits
# Maximum number of different IP addresses that can use this secret
# at the same time. Highly effective against secret leaking/sharing.
# Set to 1 for single-device, 2-3 for phone+desktop, etc.
# ------------------------------------------------------------------------------
[access.user_max_unique_ips]
# hello = 10 # example
# alice = 100 # example
# alice = 3
# bob = 2
# ==============================================================================
# UPSTREAM ROUTING
# Controls how Telemt connects to Telegram servers (or ME servers).
# If omitted entirely, uses the OS default route.
# ==============================================================================
# ------------------------------------------------------------------------------
# Direct upstream: use the server's own network interface.
# You can optionally bind to a specific interface or local IP.
# ------------------------------------------------------------------------------
# Default behavior if [[upstreams]] is omitted: loader injects one direct upstream.
# Example explicit upstreams:
# [[upstreams]]
# type = "direct"
# interface = "eth0"
@@ -178,28 +671,27 @@ hello = "00000000000000000000000000000000"
# weight = 1
# enabled = true
# scopes = "*"
#
# [[upstreams]]
# type = "socks4"
# address = "198.51.100.20:1080"
# interface = "eth0"
# user_id = "telemt"
# weight = 1
# enabled = true
# scopes = "*"
#
# ------------------------------------------------------------------------------
# SOCKS5 upstream: route Telegram traffic through a SOCKS5 proxy.
# Useful if your server's IP is blocked from reaching Telegram DCs.
# ------------------------------------------------------------------------------
# [[upstreams]]
# type = "socks5"
# address = "198.51.100.30:1080"
# interface = "eth0"
# username = "proxy-user"
# password = "proxy-pass"
# weight = 1
# enabled = true
# scopes = "*"
# === DC Address Overrides ===
# ==============================================================================
# DATACENTER OVERRIDES
# Force specific DC IDs to route to specific IP:Port combinations.
# DC 203 (CDN) is auto-injected by Telemt if not specified here.
# ==============================================================================
# [dc_overrides]
# "201" = "149.154.175.50:443" # example
# "202" = ["149.154.167.51:443", "149.154.175.100:443"] # example
# "203" = "91.105.192.100:443" # loader auto-adds this one when omitted
# "201" = "149.154.175.50:443"
# "202" = ["149.154.167.51:443", "149.154.175.100:443"]

View File

@@ -5,7 +5,9 @@
# === General Settings ===
[general]
use_middle_proxy = false
# Global ad_tag fallback when user has no per-user tag in [access.user_ad_tags]
# ad_tag = "00000000000000000000000000000000"
# Per-user ad_tag in [access.user_ad_tags] (32 hex from @MTProxybot)
# === Log Level ===
# Log level: debug | verbose | normal | silent
@@ -32,6 +34,13 @@ port = 443
# metrics_port = 9090
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
[server.api]
enabled = true
listen = "0.0.0.0:9091"
whitelist = ["127.0.0.0/8"]
minimal_runtime_enabled = false
minimal_runtime_cache_ttl_ms = 1000
# Listen on multiple interfaces/IPs - IPv4
[[server.listeners]]
ip = "0.0.0.0"

View File

@@ -6,7 +6,7 @@ services:
restart: unless-stopped
ports:
- "443:443"
- "9090:9090"
- "127.0.0.1:9090:9090"
# Allow caching 'proxy-secret' in read-only container
working_dir: /run/telemt
volumes:

548
docs/API.md Normal file
View File

@@ -0,0 +1,548 @@
# Telemt Control API
## Purpose
Control-plane HTTP API for runtime visibility and user/config management.
Data-plane MTProto traffic is out of scope.
## Runtime Configuration
API runtime is configured in `[server.api]`.
| Field | Type | Default | Description |
| --- | --- | --- | --- |
| `enabled` | `bool` | `false` | Enables REST API listener. |
| `listen` | `string` (`IP:PORT`) | `127.0.0.1:9091` | API bind address. |
| `whitelist` | `CIDR[]` | `127.0.0.1/32, ::1/128` | Source IP allowlist. Empty list means allow all. |
| `auth_header` | `string` | `""` | Exact value for `Authorization` header. Empty disables header auth. |
| `request_body_limit_bytes` | `usize` | `65536` | Maximum request body size. Must be `> 0`. |
| `minimal_runtime_enabled` | `bool` | `false` | Enables runtime snapshot endpoints requiring ME pool read-lock aggregation. |
| `minimal_runtime_cache_ttl_ms` | `u64` | `1000` | Cache TTL for minimal snapshots. `0` disables cache; valid range is `[0, 60000]`. |
| `read_only` | `bool` | `false` | Disables mutating endpoints. |
`server.admin_api` is accepted as an alias for backward compatibility.
Runtime validation for API config:
- `server.api.listen` must be a valid `IP:PORT`.
- `server.api.request_body_limit_bytes` must be `> 0`.
- `server.api.minimal_runtime_cache_ttl_ms` must be within `[0, 60000]`.
## Protocol Contract
| Item | Value |
| --- | --- |
| Transport | HTTP/1.1 |
| Content type | `application/json; charset=utf-8` |
| Prefix | `/v1` |
| Optimistic concurrency | `If-Match: <revision>` on mutating requests (optional) |
| Revision format | SHA-256 hex of current `config.toml` content |
### Success Envelope
```json
{
"ok": true,
"data": {},
"revision": "sha256-hex"
}
```
### Error Envelope
```json
{
"ok": false,
"error": {
"code": "machine_code",
"message": "human-readable"
},
"request_id": 1
}
```
## Request Processing Order
Requests are processed in this order:
1. `api_enabled` gate (`503 api_disabled` if disabled).
2. Source IP whitelist gate (`403 forbidden`).
3. `Authorization` header gate when configured (`401 unauthorized`).
4. Route and method matching (`404 not_found` or `405 method_not_allowed`).
5. `read_only` gate for mutating routes (`403 read_only`).
6. Request body read/limit/JSON decode (`413 payload_too_large`, `400 bad_request`).
7. Business validation and config write path.
Notes:
- Whitelist is evaluated against the direct TCP peer IP (`SocketAddr::ip`), without `X-Forwarded-For` support.
- `Authorization` check is exact string equality against configured `auth_header`.
## Endpoint Matrix
| Method | Path | Body | Success | `data` contract |
| --- | --- | --- | --- | --- |
| `GET` | `/v1/health` | none | `200` | `HealthData` |
| `GET` | `/v1/stats/summary` | none | `200` | `SummaryData` |
| `GET` | `/v1/stats/zero/all` | none | `200` | `ZeroAllData` |
| `GET` | `/v1/stats/upstreams` | none | `200` | `UpstreamsData` |
| `GET` | `/v1/stats/minimal/all` | none | `200` | `MinimalAllData` |
| `GET` | `/v1/stats/me-writers` | none | `200` | `MeWritersData` |
| `GET` | `/v1/stats/dcs` | none | `200` | `DcStatusData` |
| `GET` | `/v1/stats/users` | none | `200` | `UserInfo[]` |
| `GET` | `/v1/users` | none | `200` | `UserInfo[]` |
| `POST` | `/v1/users` | `CreateUserRequest` | `201` | `CreateUserResponse` |
| `GET` | `/v1/users/{username}` | none | `200` | `UserInfo` |
| `PATCH` | `/v1/users/{username}` | `PatchUserRequest` | `200` | `UserInfo` |
| `DELETE` | `/v1/users/{username}` | none | `200` | `string` (deleted username) |
| `POST` | `/v1/users/{username}/rotate-secret` | `RotateSecretRequest` or empty body | `404` | `ErrorResponse` (`not_found`, current runtime behavior) |
## Common Error Codes
| HTTP | `error.code` | Trigger |
| --- | --- | --- |
| `400` | `bad_request` | Invalid JSON, validation failures, malformed request body. |
| `401` | `unauthorized` | Missing/invalid `Authorization` when `auth_header` is configured. |
| `403` | `forbidden` | Source IP is not allowed by whitelist. |
| `403` | `read_only` | Mutating endpoint called while `read_only=true`. |
| `404` | `not_found` | Unknown route, unknown user, or unsupported sub-route (including current `rotate-secret` route). |
| `405` | `method_not_allowed` | Unsupported method for `/v1/users/{username}` route shape. |
| `409` | `revision_conflict` | `If-Match` revision mismatch. |
| `409` | `user_exists` | User already exists on create. |
| `409` | `last_user_forbidden` | Attempt to delete last configured user. |
| `413` | `payload_too_large` | Body exceeds `request_body_limit_bytes`. |
| `500` | `internal_error` | Internal error (I/O, serialization, config load/save). |
| `503` | `api_disabled` | API disabled in config. |
## Routing and Method Edge Cases
| Case | Behavior |
| --- | --- |
| Path matching | Exact match on `req.uri().path()`. Query string does not affect route matching. |
| Trailing slash | Not normalized. Example: `/v1/users/` is `404`. |
| Username route with extra slash | `/v1/users/{username}/...` is not treated as user route and returns `404`. |
| `PUT /v1/users/{username}` | `405 method_not_allowed`. |
| `POST /v1/users/{username}` | `404 not_found`. |
| `POST /v1/users/{username}/rotate-secret` | `404 not_found` in current release due route matcher limitation. |
## Body and JSON Semantics
- Request body is read only for mutating routes that define a body contract.
- Body size limit is enforced during streaming read (`413 payload_too_large`).
- Invalid transport body frame returns `400 bad_request` (`Invalid request body`).
- Invalid JSON returns `400 bad_request` (`Invalid JSON body`).
- `Content-Type` is not required for JSON parsing.
- Unknown JSON fields are ignored by deserialization.
- `PATCH` updates only provided fields and does not support explicit clearing of optional fields.
- `If-Match` supports both quoted and unquoted values; surrounding whitespace is trimmed.
## Request Contracts
### `CreateUserRequest`
| Field | Type | Required | Description |
| --- | --- | --- | --- |
| `username` | `string` | yes | `[A-Za-z0-9_.-]`, length `1..64`. |
| `secret` | `string` | no | Exactly 32 hex chars. If missing, generated automatically. |
| `user_ad_tag` | `string` | no | Exactly 32 hex chars. |
| `max_tcp_conns` | `usize` | no | Per-user concurrent TCP limit. |
| `expiration_rfc3339` | `string` | no | RFC3339 expiration timestamp. |
| `data_quota_bytes` | `u64` | no | Per-user traffic quota. |
| `max_unique_ips` | `usize` | no | Per-user unique source IP limit. |
### `PatchUserRequest`
| Field | Type | Required | Description |
| --- | --- | --- | --- |
| `secret` | `string` | no | Exactly 32 hex chars. |
| `user_ad_tag` | `string` | no | Exactly 32 hex chars. |
| `max_tcp_conns` | `usize` | no | Per-user concurrent TCP limit. |
| `expiration_rfc3339` | `string` | no | RFC3339 expiration timestamp. |
| `data_quota_bytes` | `u64` | no | Per-user traffic quota. |
| `max_unique_ips` | `usize` | no | Per-user unique source IP limit. |
### `RotateSecretRequest`
| Field | Type | Required | Description |
| --- | --- | --- | --- |
| `secret` | `string` | no | Exactly 32 hex chars. If missing, generated automatically. |
Note: the request contract is defined, but the corresponding route currently returns `404` (see routing edge cases).
## Response Data Contracts
### `HealthData`
| Field | Type | Description |
| --- | --- | --- |
| `status` | `string` | Always `"ok"`. |
| `read_only` | `bool` | Mirrors current API `read_only` mode. |
### `SummaryData`
| Field | Type | Description |
| --- | --- | --- |
| `uptime_seconds` | `f64` | Process uptime in seconds. |
| `connections_total` | `u64` | Total accepted client connections. |
| `connections_bad_total` | `u64` | Failed/invalid client connections. |
| `handshake_timeouts_total` | `u64` | Handshake timeout count. |
| `configured_users` | `usize` | Number of configured users in config. |
### `ZeroAllData`
| Field | Type | Description |
| --- | --- | --- |
| `generated_at_epoch_secs` | `u64` | Snapshot time (Unix epoch seconds). |
| `core` | `ZeroCoreData` | Core counters and telemetry policy snapshot. |
| `upstream` | `ZeroUpstreamData` | Upstream connect counters/histogram buckets. |
| `middle_proxy` | `ZeroMiddleProxyData` | ME protocol/health counters. |
| `pool` | `ZeroPoolData` | ME pool lifecycle counters. |
| `desync` | `ZeroDesyncData` | Frame desync counters. |
#### `ZeroCoreData`
| Field | Type | Description |
| --- | --- | --- |
| `uptime_seconds` | `f64` | Process uptime. |
| `connections_total` | `u64` | Total accepted connections. |
| `connections_bad_total` | `u64` | Failed/invalid connections. |
| `handshake_timeouts_total` | `u64` | Handshake timeouts. |
| `configured_users` | `usize` | Configured user count. |
| `telemetry_core_enabled` | `bool` | Core telemetry toggle. |
| `telemetry_user_enabled` | `bool` | User telemetry toggle. |
| `telemetry_me_level` | `string` | ME telemetry level (`off|normal|verbose`). |
#### `ZeroUpstreamData`
| Field | Type | Description |
| --- | --- | --- |
| `connect_attempt_total` | `u64` | Total upstream connect attempts. |
| `connect_success_total` | `u64` | Successful upstream connects. |
| `connect_fail_total` | `u64` | Failed upstream connects. |
| `connect_failfast_hard_error_total` | `u64` | Fail-fast hard errors. |
| `connect_attempts_bucket_1` | `u64` | Connect attempts resolved in 1 try. |
| `connect_attempts_bucket_2` | `u64` | Connect attempts resolved in 2 tries. |
| `connect_attempts_bucket_3_4` | `u64` | Connect attempts resolved in 3-4 tries. |
| `connect_attempts_bucket_gt_4` | `u64` | Connect attempts requiring more than 4 tries. |
| `connect_duration_success_bucket_le_100ms` | `u64` | Successful connects <=100 ms. |
| `connect_duration_success_bucket_101_500ms` | `u64` | Successful connects 101-500 ms. |
| `connect_duration_success_bucket_501_1000ms` | `u64` | Successful connects 501-1000 ms. |
| `connect_duration_success_bucket_gt_1000ms` | `u64` | Successful connects >1000 ms. |
| `connect_duration_fail_bucket_le_100ms` | `u64` | Failed connects <=100 ms. |
| `connect_duration_fail_bucket_101_500ms` | `u64` | Failed connects 101-500 ms. |
| `connect_duration_fail_bucket_501_1000ms` | `u64` | Failed connects 501-1000 ms. |
| `connect_duration_fail_bucket_gt_1000ms` | `u64` | Failed connects >1000 ms. |
### `UpstreamsData`
| Field | Type | Description |
| --- | --- | --- |
| `enabled` | `bool` | Runtime upstream snapshot availability according to API config. |
| `reason` | `string?` | `feature_disabled` or `source_unavailable` when runtime snapshot is unavailable. |
| `generated_at_epoch_secs` | `u64` | Snapshot generation time. |
| `zero` | `ZeroUpstreamData` | Always available zero-cost upstream counters block. |
| `summary` | `UpstreamSummaryData?` | Runtime upstream aggregate view, null when unavailable. |
| `upstreams` | `UpstreamStatus[]?` | Per-upstream runtime status rows, null when unavailable. |
#### `UpstreamSummaryData`
| Field | Type | Description |
| --- | --- | --- |
| `configured_total` | `usize` | Total configured upstream entries. |
| `healthy_total` | `usize` | Upstreams currently marked healthy. |
| `unhealthy_total` | `usize` | Upstreams currently marked unhealthy. |
| `direct_total` | `usize` | Number of direct upstream entries. |
| `socks4_total` | `usize` | Number of SOCKS4 upstream entries. |
| `socks5_total` | `usize` | Number of SOCKS5 upstream entries. |
#### `UpstreamStatus`
| Field | Type | Description |
| --- | --- | --- |
| `upstream_id` | `usize` | Runtime upstream index. |
| `route_kind` | `string` | Upstream route kind: `direct`, `socks4`, `socks5`. |
| `address` | `string` | Upstream address (`direct` for direct route kind). Authentication fields are intentionally omitted. |
| `weight` | `u16` | Selection weight. |
| `scopes` | `string` | Configured scope selector string. |
| `healthy` | `bool` | Current health flag. |
| `fails` | `u32` | Consecutive fail counter. |
| `last_check_age_secs` | `u64` | Seconds since the last health-check update. |
| `effective_latency_ms` | `f64?` | Effective upstream latency used by selector. |
| `dc` | `UpstreamDcStatus[]` | Per-DC latency/IP preference snapshot. |
#### `UpstreamDcStatus`
| Field | Type | Description |
| --- | --- | --- |
| `dc` | `i16` | Telegram DC id. |
| `latency_ema_ms` | `f64?` | Per-DC latency EMA value. |
| `ip_preference` | `string` | Per-DC IP family preference: `unknown`, `prefer_v4`, `prefer_v6`, `both_work`, `unavailable`. |
#### `ZeroMiddleProxyData`
| Field | Type | Description |
| --- | --- | --- |
| `keepalive_sent_total` | `u64` | ME keepalive packets sent. |
| `keepalive_failed_total` | `u64` | ME keepalive send failures. |
| `keepalive_pong_total` | `u64` | Keepalive pong responses received. |
| `keepalive_timeout_total` | `u64` | Keepalive timeout events. |
| `rpc_proxy_req_signal_sent_total` | `u64` | RPC proxy activity signals sent. |
| `rpc_proxy_req_signal_failed_total` | `u64` | RPC proxy activity signal failures. |
| `rpc_proxy_req_signal_skipped_no_meta_total` | `u64` | Signals skipped due to missing metadata. |
| `rpc_proxy_req_signal_response_total` | `u64` | RPC proxy signal responses received. |
| `rpc_proxy_req_signal_close_sent_total` | `u64` | RPC proxy close signals sent. |
| `reconnect_attempt_total` | `u64` | ME reconnect attempts. |
| `reconnect_success_total` | `u64` | Successful reconnects. |
| `handshake_reject_total` | `u64` | ME handshake rejects. |
| `handshake_error_codes` | `ZeroCodeCount[]` | Handshake rejects grouped by code. |
| `reader_eof_total` | `u64` | ME reader EOF events. |
| `idle_close_by_peer_total` | `u64` | Idle closes initiated by peer. |
| `route_drop_no_conn_total` | `u64` | Route drops due to missing bound connection. |
| `route_drop_channel_closed_total` | `u64` | Route drops due to closed channel. |
| `route_drop_queue_full_total` | `u64` | Route drops due to full queue (total). |
| `route_drop_queue_full_base_total` | `u64` | Route drops in base queue mode. |
| `route_drop_queue_full_high_total` | `u64` | Route drops in high queue mode. |
| `socks_kdf_strict_reject_total` | `u64` | SOCKS KDF strict rejects. |
| `socks_kdf_compat_fallback_total` | `u64` | SOCKS KDF compat fallbacks. |
| `endpoint_quarantine_total` | `u64` | Endpoint quarantine activations. |
| `kdf_drift_total` | `u64` | KDF drift detections. |
| `kdf_port_only_drift_total` | `u64` | KDF port-only drift detections. |
| `hardswap_pending_reuse_total` | `u64` | Pending hardswap reused events. |
| `hardswap_pending_ttl_expired_total` | `u64` | Pending hardswap TTL expiry events. |
| `single_endpoint_outage_enter_total` | `u64` | Entered single-endpoint outage mode. |
| `single_endpoint_outage_exit_total` | `u64` | Exited single-endpoint outage mode. |
| `single_endpoint_outage_reconnect_attempt_total` | `u64` | Reconnect attempts in outage mode. |
| `single_endpoint_outage_reconnect_success_total` | `u64` | Reconnect successes in outage mode. |
| `single_endpoint_quarantine_bypass_total` | `u64` | Quarantine bypasses in outage mode. |
| `single_endpoint_shadow_rotate_total` | `u64` | Shadow writer rotations. |
| `single_endpoint_shadow_rotate_skipped_quarantine_total` | `u64` | Shadow rotations skipped because of quarantine. |
| `floor_mode_switch_total` | `u64` | Total floor mode switches. |
| `floor_mode_switch_static_to_adaptive_total` | `u64` | Static -> adaptive switches. |
| `floor_mode_switch_adaptive_to_static_total` | `u64` | Adaptive -> static switches. |
#### `ZeroCodeCount`
| Field | Type | Description |
| --- | --- | --- |
| `code` | `i32` | Handshake error code. |
| `total` | `u64` | Events with this code. |
#### `ZeroPoolData`
| Field | Type | Description |
| --- | --- | --- |
| `pool_swap_total` | `u64` | Pool swap count. |
| `pool_drain_active` | `u64` | Current active draining pools. |
| `pool_force_close_total` | `u64` | Forced pool closes by timeout. |
| `pool_stale_pick_total` | `u64` | Stale writer picks for binding. |
| `writer_removed_total` | `u64` | Writer removals total. |
| `writer_removed_unexpected_total` | `u64` | Unexpected writer removals. |
| `refill_triggered_total` | `u64` | Refill triggers. |
| `refill_skipped_inflight_total` | `u64` | Refill skipped because refill already in-flight. |
| `refill_failed_total` | `u64` | Refill failures. |
| `writer_restored_same_endpoint_total` | `u64` | Restores on same endpoint. |
| `writer_restored_fallback_total` | `u64` | Restores on fallback endpoint. |
#### `ZeroDesyncData`
| Field | Type | Description |
| --- | --- | --- |
| `secure_padding_invalid_total` | `u64` | Invalid secure padding events. |
| `desync_total` | `u64` | Desync events total. |
| `desync_full_logged_total` | `u64` | Fully logged desync events. |
| `desync_suppressed_total` | `u64` | Suppressed desync logs. |
| `desync_frames_bucket_0` | `u64` | Desync frames bucket 0. |
| `desync_frames_bucket_1_2` | `u64` | Desync frames bucket 1-2. |
| `desync_frames_bucket_3_10` | `u64` | Desync frames bucket 3-10. |
| `desync_frames_bucket_gt_10` | `u64` | Desync frames bucket >10. |
### `MinimalAllData`
| Field | Type | Description |
| --- | --- | --- |
| `enabled` | `bool` | Whether minimal runtime snapshots are enabled by config. |
| `reason` | `string?` | `feature_disabled` or `source_unavailable` when applicable. |
| `generated_at_epoch_secs` | `u64` | Snapshot generation time. |
| `data` | `MinimalAllPayload?` | Null when disabled; fallback payload when source unavailable. |
#### `MinimalAllPayload`
| Field | Type | Description |
| --- | --- | --- |
| `me_writers` | `MeWritersData` | ME writer status block. |
| `dcs` | `DcStatusData` | DC aggregate status block. |
| `me_runtime` | `MinimalMeRuntimeData?` | Runtime ME control snapshot. |
| `network_path` | `MinimalDcPathData[]` | Active IP path selection per DC. |
#### `MinimalMeRuntimeData`
| Field | Type | Description |
| --- | --- | --- |
| `active_generation` | `u64` | Active pool generation. |
| `warm_generation` | `u64` | Warm pool generation. |
| `pending_hardswap_generation` | `u64` | Pending hardswap generation. |
| `pending_hardswap_age_secs` | `u64?` | Pending hardswap age in seconds. |
| `hardswap_enabled` | `bool` | Hardswap mode toggle. |
| `floor_mode` | `string` | Writer floor mode. |
| `adaptive_floor_idle_secs` | `u64` | Idle threshold for adaptive floor. |
| `adaptive_floor_min_writers_single_endpoint` | `u8` | Minimum writers for single-endpoint DC in adaptive mode. |
| `adaptive_floor_recover_grace_secs` | `u64` | Grace period for floor recovery. |
| `me_keepalive_enabled` | `bool` | ME keepalive toggle. |
| `me_keepalive_interval_secs` | `u64` | Keepalive period. |
| `me_keepalive_jitter_secs` | `u64` | Keepalive jitter. |
| `me_keepalive_payload_random` | `bool` | Randomized keepalive payload toggle. |
| `rpc_proxy_req_every_secs` | `u64` | Period for RPC proxy request signal. |
| `me_reconnect_max_concurrent_per_dc` | `u32` | Reconnect concurrency per DC. |
| `me_reconnect_backoff_base_ms` | `u64` | Base reconnect backoff. |
| `me_reconnect_backoff_cap_ms` | `u64` | Max reconnect backoff. |
| `me_reconnect_fast_retry_count` | `u32` | Fast retry attempts before normal backoff. |
| `me_pool_drain_ttl_secs` | `u64` | Pool drain TTL. |
| `me_pool_force_close_secs` | `u64` | Hard close timeout for draining writers. |
| `me_pool_min_fresh_ratio` | `f32` | Minimum fresh ratio before swap. |
| `me_bind_stale_mode` | `string` | Stale writer bind policy. |
| `me_bind_stale_ttl_secs` | `u64` | Stale writer TTL. |
| `me_single_endpoint_shadow_writers` | `u8` | Shadow writers for single-endpoint DCs. |
| `me_single_endpoint_outage_mode_enabled` | `bool` | Outage mode toggle for single-endpoint DCs. |
| `me_single_endpoint_outage_disable_quarantine` | `bool` | Quarantine behavior in outage mode. |
| `me_single_endpoint_outage_backoff_min_ms` | `u64` | Outage mode min reconnect backoff. |
| `me_single_endpoint_outage_backoff_max_ms` | `u64` | Outage mode max reconnect backoff. |
| `me_single_endpoint_shadow_rotate_every_secs` | `u64` | Shadow rotation interval. |
| `me_deterministic_writer_sort` | `bool` | Deterministic writer ordering toggle. |
| `me_socks_kdf_policy` | `string` | Current SOCKS KDF policy mode. |
| `quarantined_endpoints_total` | `usize` | Total quarantined endpoints. |
| `quarantined_endpoints` | `MinimalQuarantineData[]` | Quarantine details. |
#### `MinimalQuarantineData`
| Field | Type | Description |
| --- | --- | --- |
| `endpoint` | `string` | Endpoint (`ip:port`). |
| `remaining_ms` | `u64` | Remaining quarantine duration. |
#### `MinimalDcPathData`
| Field | Type | Description |
| --- | --- | --- |
| `dc` | `i16` | Telegram DC identifier. |
| `ip_preference` | `string?` | Runtime IP family preference. |
| `selected_addr_v4` | `string?` | Selected IPv4 endpoint for this DC. |
| `selected_addr_v6` | `string?` | Selected IPv6 endpoint for this DC. |
### `MeWritersData`
| Field | Type | Description |
| --- | --- | --- |
| `middle_proxy_enabled` | `bool` | `false` when minimal runtime is disabled or source unavailable. |
| `reason` | `string?` | `feature_disabled` or `source_unavailable` when not fully available. |
| `generated_at_epoch_secs` | `u64` | Snapshot generation time. |
| `summary` | `MeWritersSummary` | Coverage/availability summary. |
| `writers` | `MeWriterStatus[]` | Per-writer statuses. |
#### `MeWritersSummary`
| Field | Type | Description |
| --- | --- | --- |
| `configured_dc_groups` | `usize` | Number of configured DC groups. |
| `configured_endpoints` | `usize` | Total configured ME endpoints. |
| `available_endpoints` | `usize` | Endpoints currently available. |
| `available_pct` | `f64` | `available_endpoints / configured_endpoints * 100`. |
| `required_writers` | `usize` | Required writers based on current floor policy. |
| `alive_writers` | `usize` | Writers currently alive. |
| `coverage_pct` | `f64` | `alive_writers / required_writers * 100`. |
#### `MeWriterStatus`
| Field | Type | Description |
| --- | --- | --- |
| `writer_id` | `u64` | Runtime writer identifier. |
| `dc` | `i16?` | DC id if mapped. |
| `endpoint` | `string` | Endpoint (`ip:port`). |
| `generation` | `u64` | Pool generation owning this writer. |
| `state` | `string` | Writer state (`warm`, `active`, `draining`). |
| `draining` | `bool` | Draining flag. |
| `degraded` | `bool` | Degraded flag. |
| `bound_clients` | `usize` | Number of currently bound clients. |
| `idle_for_secs` | `u64?` | Idle age in seconds if idle. |
| `rtt_ema_ms` | `f64?` | RTT exponential moving average. |
### `DcStatusData`
| Field | Type | Description |
| --- | --- | --- |
| `middle_proxy_enabled` | `bool` | `false` when minimal runtime is disabled or source unavailable. |
| `reason` | `string?` | `feature_disabled` or `source_unavailable` when not fully available. |
| `generated_at_epoch_secs` | `u64` | Snapshot generation time. |
| `dcs` | `DcStatus[]` | Per-DC status rows. |
#### `DcStatus`
| Field | Type | Description |
| --- | --- | --- |
| `dc` | `i16` | Telegram DC id. |
| `endpoints` | `string[]` | Endpoints in this DC (`ip:port`). |
| `available_endpoints` | `usize` | Endpoints currently available in this DC. |
| `available_pct` | `f64` | `available_endpoints / endpoints_total * 100`. |
| `required_writers` | `usize` | Required writer count for this DC. |
| `alive_writers` | `usize` | Alive writers in this DC. |
| `coverage_pct` | `f64` | `alive_writers / required_writers * 100`. |
| `rtt_ms` | `f64?` | Aggregated RTT for DC. |
| `load` | `usize` | Active client sessions bound to this DC. |
### `UserInfo`
| Field | Type | Description |
| --- | --- | --- |
| `username` | `string` | Username. |
| `user_ad_tag` | `string?` | Optional ad tag (32 hex chars). |
| `max_tcp_conns` | `usize?` | Optional max concurrent TCP limit. |
| `expiration_rfc3339` | `string?` | Optional expiration timestamp. |
| `data_quota_bytes` | `u64?` | Optional data quota. |
| `max_unique_ips` | `usize?` | Optional unique IP limit. |
| `current_connections` | `u64` | Current live connections. |
| `active_unique_ips` | `usize` | Current active unique source IPs. |
| `total_octets` | `u64` | Total traffic octets for this user. |
| `links` | `UserLinks` | Active connection links derived from current config. |
#### `UserLinks`
| Field | Type | Description |
| --- | --- | --- |
| `classic` | `string[]` | Active `tg://proxy` links for classic mode. |
| `secure` | `string[]` | Active `tg://proxy` links for secure/DD mode. |
| `tls` | `string[]` | Active `tg://proxy` links for EE-TLS mode (for each host+TLS domain). |
Link generation uses active config and enabled modes:
- `[general.links].public_host/public_port` have priority.
- If `public_host` is not set, startup-detected public IPs are used (`IPv4`, `IPv6`, or both when available).
- Fallback host sources: listener `announce`, `announce_ip`, explicit listener `ip`.
- Legacy fallback: `listen_addr_ipv4` and `listen_addr_ipv6` when routable.
- Startup-detected IPs are fixed for process lifetime and refreshed on restart.
- User rows are sorted by `username` in ascending lexical order.
### `CreateUserResponse`
| Field | Type | Description |
| --- | --- | --- |
| `user` | `UserInfo` | Created or updated user view. |
| `secret` | `string` | Effective user secret. |
## Mutation Semantics
| Endpoint | Notes |
| --- | --- |
| `POST /v1/users` | Creates user and validates resulting config before atomic save. |
| `PATCH /v1/users/{username}` | Partial update of provided fields only. Missing fields remain unchanged. |
| `POST /v1/users/{username}/rotate-secret` | Currently returns `404` in runtime route matcher; request schema is reserved for intended behavior. |
| `DELETE /v1/users/{username}` | Deletes user and related optional settings. Last user deletion is blocked. |
All mutating endpoints:
- Respect `read_only` mode.
- Accept optional `If-Match` for optimistic concurrency.
- Return new `revision` after successful write.
- Use process-local mutation lock + atomic write (`tmp + rename`) for config persistence.
## Runtime State Matrix
| Endpoint | `minimal_runtime_enabled=false` | `minimal_runtime_enabled=true` + source unavailable | `minimal_runtime_enabled=true` + source available |
| --- | --- | --- | --- |
| `/v1/stats/minimal/all` | `enabled=false`, `reason=feature_disabled`, `data=null` | `enabled=true`, `reason=source_unavailable`, fallback `data` with disabled ME blocks | `enabled=true`, `reason` omitted, full payload |
| `/v1/stats/me-writers` | `middle_proxy_enabled=false`, `reason=feature_disabled` | `middle_proxy_enabled=false`, `reason=source_unavailable` | `middle_proxy_enabled=true`, runtime snapshot |
| `/v1/stats/dcs` | `middle_proxy_enabled=false`, `reason=feature_disabled` | `middle_proxy_enabled=false`, `reason=source_unavailable` | `middle_proxy_enabled=true`, runtime snapshot |
| `/v1/stats/upstreams` | `enabled=false`, `reason=feature_disabled`, `summary/upstreams` omitted, `zero` still present | `enabled=true`, `reason=source_unavailable`, `summary/upstreams` omitted, `zero` present | `enabled=true`, `reason` omitted, `summary/upstreams` present, `zero` present |
`source_unavailable` conditions:
- ME endpoints: ME pool is absent (for example direct-only mode or failed ME initialization).
- Upstreams endpoint: non-blocking upstream snapshot lock is unavailable at request time.
## Serialization Rules
- Success responses always include `revision`.
- Error responses never include `revision`; they include `request_id`.
- Optional fields with `skip_serializing_if` are omitted when absent.
- Nullable payload fields may still be `null` where contract uses `?` (for example `UserInfo` option fields).
- For `/v1/stats/upstreams`, authentication details of SOCKS upstreams are intentionally omitted.
## Operational Notes
| Topic | Details |
| --- | --- |
| API startup | API listener is spawned only when `[server.api].enabled=true`. |
| `listen` port `0` | API spawn is skipped when parsed listen port is `0` (treated as disabled bind target). |
| Bind failure | Failed API bind logs warning and API task exits (no auto-retry loop). |
| ME runtime status endpoints | `/v1/stats/me-writers`, `/v1/stats/dcs`, `/v1/stats/minimal/all` require `[server.api].minimal_runtime_enabled=true`; otherwise they return disabled payload with `reason=feature_disabled`. |
| Upstream runtime endpoint | `/v1/stats/upstreams` always returns `zero`, but runtime fields (`summary`, `upstreams`) require `[server.api].minimal_runtime_enabled=true`. |
| Restart requirements | `server.api` changes are restart-required for predictable behavior. |
| Hot-reload nuance | A pure `server.api`-only config change may not propagate through watcher broadcast; a mixed change (with hot fields) may propagate API flags while still warning that restart is required. |
| Runtime apply path | Successful writes are picked up by existing config watcher/hot-reload path. |
| Exposure | Built-in TLS/mTLS is not provided. Use loopback bind + reverse proxy if needed. |
| Pagination | User list currently has no pagination/filtering. |
| Serialization side effect | Config comments/manual formatting are not preserved on write. |
## Known Limitations (Current Release)
- `POST /v1/users/{username}/rotate-secret` is currently unreachable in route matcher and returns `404`.
- API runtime controls under `server.api` are documented as restart-required; hot-reload behavior for these fields is not strictly uniform in all change combinations.

65
docs/FAQ.ru.md Normal file
View File

@@ -0,0 +1,65 @@
## Как настроить канал "спонсор прокси"
1. Зайти в бота @MTProxybot.
2. Ввести команду `/newproxy`
3. Отправить IP и порт сервера. Например: 1.2.3.4:443
4. Открыть конфиг `nano /etc/telemt.toml`.
5. Скопировать и отправить боту секрет пользователя из раздела [access.users].
6. Скопировать полученный tag у бота. Например 1234567890abcdef1234567890abcdef.
> [!WARNING]
> Ссылка, которую выдает бот, не будет работать. Не копируйте и не используйте её!
7. Раскомментировать параметр ad_tag и вписать tag, полученный у бота.
8. Раскомментировать/добавить параметр use_middle_proxy = true.
Пример конфига:
```toml
[general]
ad_tag = "1234567890abcdef1234567890abcdef"
use_middle_proxy = true
```
9. Сохранить конфиг. Ctrl+S -> Ctrl+X.
10. Перезапустить telemt `systemctl restart telemt`.
11. В боте отправить команду /myproxies и выбрать добавленный сервер.
12. Нажать кнопку "Set promotion".
13. Отправить **публичную ссылку** на канал. Приватный канал добавить нельзя!
14. Подождать примерно 1 час, пока информация обновится на серверах Telegram.
> [!WARNING]
> У вас не будет отображаться "спонсор прокси" если вы уже подписаны на канал.
## Сколько человек может пользоваться 1 ссылкой
По умолчанию 1 ссылкой может пользоваться сколько угодно человек.
Вы можете ограничить число IP, использующих прокси.
```toml
[access.user_max_unique_ips]
hello = 1
```
Этот параметр ограничивает, сколько уникальных IP может использовать 1 ссылку одновременно. Если один пользователь отключится, второй сможет подключиться. Также с одного IP может сидеть несколько пользователей.
## Как сделать несколько разных ссылок
1. Сгенерируйте нужное число секретов `openssl rand -hex 16`
2. Открыть конфиг `nano /etc/telemt.toml`
3. Добавить новых пользователей.
```toml
[access.users]
user1 = "00000000000000000000000000000001"
user2 = "00000000000000000000000000000002"
user3 = "00000000000000000000000000000003"
```
4. Сохранить конфиг. Ctrl+S -> Ctrl+X. Перезапускать telemt не нужно.
5. Получить ссылки через `journalctl -u telemt -n -g "links" --no-pager -o cat | tac`
## Как посмотреть метрики
1. Открыть конфиг `nano /etc/telemt.toml`
2. Добавить следующие параметры
```toml
[server]
metrics_port = 9090
metrics_whitelist = ["127.0.0.1/32", "::1/128", "0.0.0.0/0"]
```
3. Сохранить конфиг. Ctrl+S -> Ctrl+X.
4. Метрики доступны по адресу SERVER_IP:9090/metrics.
> [!WARNING]
> "0.0.0.0/0" в metrics_whitelist открывает доступ с любого IP. Замените на свой ip. Например "1.2.3.4"

40
docs/MIDDLE-END-KDF.de.md Normal file
View File

@@ -0,0 +1,40 @@
# Middle-End Proxy
## KDF-Adressierung — Implementierungs-FAQ
### Benötigt die C-Referenzimplementierung sowohl externe IP-Adresse als auch Port für die KDF?
Ja.
In der C-Referenzimplementierung werden **sowohl IP-Adresse als auch Port in die KDF einbezogen** — auf beiden Seiten der Verbindung.
In `aes_create_keys()` enthält der KDF-Input:
- `server_ip + client_port`
- `client_ip + server_port`
- sowie Secret / Nonces
Für IPv6:
- IPv4-Felder werden auf 0 gesetzt
- IPv6-Adressen werden ergänzt
Die **Ports bleiben weiterhin Bestandteil der KDF**.
> Wenn sich externe IP oder Port (z. B. durch NAT, SOCKS oder Proxy) von den erwarteten Werten unterscheiden, entstehen unterschiedliche Schlüssel — der Handshake schlägt fehl.
---
### Kann der Port aus der KDF ausgeschlossen werden (z. B. durch Port = 0)?
**Nein!**
Die C-Referenzimplementierung enthält **keine Möglichkeit, den Port zu ignorieren**:
- `client_port` und `server_port` sind fester Bestandteil der KDF
- Es werden immer reale Socket-Ports übergeben:
- `c->our_port`
- `c->remote_port`
Falls ein Port den Wert `0` hat, wird er dennoch als `0` in die KDF übernommen.
Eine „Port-Ignore“-Logik existiert nicht.

41
docs/MIDDLE-END-KDF.en.md Normal file
View File

@@ -0,0 +1,41 @@
# Middle-End Proxy
## KDF Addressing — Implementation FAQ
### Does the C-implementation require both external IP address and port for the KDF?
**Yes!**
In the C reference implementation, **both IP address and port are included in the KDF input** from both sides of the connection.
Inside `aes_create_keys()`, the KDF input explicitly contains:
- `server_ip + client_port`
- `client_ip + server_port`
- followed by shared secret / nonces
For IPv6:
- IPv4 fields are zeroed
- IPv6 addresses are inserted
However, **client_port and server_port remain part of the KDF regardless of IP version**.
> If externally observed IP or port (e.g. due to NAT, SOCKS, or proxy traversal) differs from what the peer expects, the derived keys will not match and the handshake will fail.
---
### Can port be excluded from KDF (e.g. by using port = 0)?
**No!**
The C-implementation provides **no mechanism to ignore the port**:
- `client_port` and `server_port` are explicitly included in the KDF input
- Real socket ports are always passed:
- `c->our_port`
- `c->remote_port`
If a port is `0`, it is still incorporated into the KDF as `0`.
There is **no conditional logic to exclude ports**

41
docs/MIDDLE-END-KDF.ru.md Normal file
View File

@@ -0,0 +1,41 @@
# Middle-End Proxy
## KDF Addressing — FAQ по реализации
### Требует ли C-референсная реализация KDF внешний IP и порт?
**Да**
В C-референсе **в KDF участвуют и IP-адрес, и порт**с обеих сторон соединения.
В `aes_create_keys()` в строку KDF входят:
- `server_ip + client_port`
- `client_ip + server_port`
- далее secret / nonces
Для IPv6:
- IPv4-поля заполняются нулями
- добавляются IPv6-адреса
Однако **порты client_port и server_port всё равно участвуют в KDF**.
> Если внешний IP или порт (например, из-за NAT, SOCKS или прокси) не совпадает с ожидаемым другой стороной — ключи расходятся и handshake ломается.
---
### Можно ли исключить порт из KDF (например, установив порт = 0)?
**Нет.**
В C-референсе **нет механики отключения порта**.
- `client_port` и `server_port` явно включены в KDF
- Передаются реальные порты сокета:
- `c->our_port`
- `c->remote_port`
Если порт равен `0`, он всё равно попадёт в KDF как `0`.
Отдельной логики «игнорировать порт» не предусмотрено.

View File

@@ -0,0 +1,153 @@
# Telemt via Systemd
## Installation
This software is designed for Debian-based OS: in addition to Debian, these are Ubuntu, Mint, Kali, MX and many other Linux
**1. Download**
```bash
wget -qO- "https://github.com/telemt/telemt/releases/latest/download/telemt-$(uname -m)-linux-$(ldd --version 2>&1 | grep -iq musl && echo musl || echo gnu).tar.gz" | tar -xz
```
**2. Move to the Bin folder**
```bash
mv telemt /bin
```
**3. Make the file executable**
```bash
chmod +x /bin/telemt
```
## How to use?
**This guide "assumes" that you:**
- logged in as root or executed `su -` / `sudo su`
- Already have the "telemt" executable file in the /bin folder. Read the **[Installation](#Installation)** section.
---
**0. Check port and generate secrets**
The port you have selected for use should be MISSING from the list, when:
```bash
netstat -lnp
```
Generate 16 bytes/32 characters HEX with OpenSSL or another way:
```bash
openssl rand -hex 16
```
OR
```bash
xxd -l 16 -p /dev/urandom
```
OR
```bash
python3 -c 'import os; print(os.urandom(16).hex())'
```
Save the obtained result somewhere. You will need it later!
---
**1. Place your config to /etc/telemt.toml**
Open nano
```bash
nano /etc/telemt.toml
```
paste your config
```toml
# === General Settings ===
[general]
# ad_tag = "00000000000000000000000000000000"
use_middle_proxy = false
[general.modes]
classic = false
secure = false
tls = true
# === Anti-Censorship & Masking ===
[censorship]
tls_domain = "petrovich.ru"
[access.users]
# format: "username" = "32_hex_chars_secret"
hello = "00000000000000000000000000000000"
```
then Ctrl+S -> Ctrl+X to save
> [!WARNING]
> Replace the value of the hello parameter with the value you obtained in step 0.
> Replace the value of the tls_domain parameter with another website.
---
**2. Create service on /etc/systemd/system/telemt.service**
Open nano
```bash
nano /etc/systemd/system/telemt.service
```
paste this Systemd Module
```bash
[Unit]
Description=Telemt
After=network.target
[Service]
Type=simple
WorkingDirectory=/bin
ExecStart=/bin/telemt /etc/telemt.toml
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
```
then Ctrl+S -> Ctrl+X to save
**3.** To start it, enter the command `systemctl start telemt`
**4.** To get status information, enter `systemctl status telemt`
**5.** For automatic startup at system boot, enter `systemctl enable telemt`
**6.** To get the links, enter `journalctl -u telemt -n -g "links" --no-pager -o cat | tac`
---
# Telemt via Docker Compose
**1. Edit `config.toml` in repo root (at least: port, users secrets, tls_domain)**
**2. Start container:**
```bash
docker compose up -d --build
```
**3. Check logs:**
```bash
docker compose logs -f telemt
```
**4. Stop:**
```bash
docker compose down
```
> [!NOTE]
> - `docker-compose.yml` maps `./config.toml` to `/app/config.toml` (read-only)
> - By default it publishes `443:443` and runs with dropped capabilities (only `NET_BIND_SERVICE` is added)
> - If you really need host networking (usually only for some IPv6 setups) uncomment `network_mode: host`
**Run without Compose**
```bash
docker build -t telemt:local .
docker run --name telemt --restart unless-stopped \
-p 443:443 \
-e RUST_LOG=info \
-v "$PWD/config.toml:/app/config.toml:ro" \
--read-only \
--cap-drop ALL --cap-add NET_BIND_SERVICE \
--ulimit nofile=65536:65536 \
telemt:local
```

View File

@@ -0,0 +1,155 @@
# Telemt через Systemd
## Установка
Это программное обеспечение разработано для ОС на базе Debian: помимо Debian, это Ubuntu, Mint, Kali, MX и многие другие Linux
**1. Скачать**
```bash
wget -qO- "https://github.com/telemt/telemt/releases/latest/download/telemt-$(uname -m)-linux-$(ldd --version 2>&1 | grep -iq musl && echo musl || echo gnu).tar.gz" | tar -xz
```
**2. Переместить в папку Bin**
```bash
mv telemt /bin
```
**3. Сделать файл исполняемым**
```bash
chmod +x /bin/telemt
```
## Как правильно использовать?
**Эта инструкция "предполагает", что вы:**
- Авторизовались как пользователь root или выполнил `su -` / `sudo su`
- У вас уже есть исполняемый файл "telemt" в папке /bin. Читайте раздел **[Установка](#установка)**
---
**0. Проверьте порт и сгенерируйте секреты**
Порт, который вы выбрали для использования, должен отсутствовать в списке:
```bash
netstat -lnp
```
Сгенерируйте 16 bytes/32 символа в шестнадцатеричном формате с помощью OpenSSL или другим способом:
```bash
openssl rand -hex 16
```
ИЛИ
```bash
xxd -l 16 -p /dev/urandom
```
ИЛИ
```bash
python3 -c 'import os; print(os.urandom(16).hex())'
```
Полученный результат сохраняем где-нибудь. Он понадобиться вам дальше!
---
**1. Поместите свою конфигурацию в файл /etc/telemt.toml**
Открываем nano
```bash
nano /etc/telemt.toml
```
Вставьте свою конфигурацию
```toml
# === General Settings ===
[general]
# ad_tag = "00000000000000000000000000000000"
use_middle_proxy = false
[general.modes]
classic = false
secure = false
tls = true
# === Anti-Censorship & Masking ===
[censorship]
tls_domain = "petrovich.ru"
[access.users]
# format: "username" = "32_hex_chars_secret"
hello = "00000000000000000000000000000000"
```
Затем нажмите Ctrl+S -> Ctrl+X, чтобы сохранить
> [!WARNING]
> Замените значение параметра hello на значение, которое вы получили в пункте 0.
> Так же замените значение параметра tls_domain на другой сайт.
---
**2. Создайте службу в /etc/systemd/system/telemt.service**
Открываем nano
```bash
nano /etc/systemd/system/telemt.service
```
Вставьте этот модуль Systemd
```bash
[Unit]
Description=Telemt
After=network.target
[Service]
Type=simple
WorkingDirectory=/bin
ExecStart=/bin/telemt /etc/telemt.toml
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
```
Затем нажмите Ctrl+S -> Ctrl+X, чтобы сохранить
**3.** Для запуска введите команду `systemctl start telemt`
**4.** Для получения информации о статусе введите `systemctl status telemt`
**5.** Для автоматического запуска при запуске системы в введите `systemctl enable telemt`
**6.** Для получения ссылки введите `journalctl -u telemt -n -g "links" --no-pager -o cat | tac`
> [!WARNING]
> Рабочую ссылку может выдать только команда из 6 пункта. Не пытайтесь делать ее самостоятельно или копировать откуда-либо!
---
# Telemt через Docker Compose
**1. Отредактируйте `config.toml` в корневом каталоге репозитория (как минимум: порт, пользовательские секреты, tls_domain)**
**2. Запустите контейнер:**
```bash
docker compose up -d --build
```
**3. Проверьте логи:**
```bash
docker compose logs -f telemt
```
**4. Остановите контейнер:**
```bash
docker compose down
```
> [!NOTE]
> - В `docker-compose.yml` файл `./config.toml` монтируется в `/app/config.toml` (доступно только для чтения)
> - По умолчанию публикуются порты 443:443, а контейнер запускается со сброшенными привилегиями (добавлена только `NET_BIND_SERVICE`)
> - Если вам действительно нужна сеть хоста (обычно это требуется только для некоторых конфигураций IPv6), раскомментируйте `network_mode: host`
**Запуск в Docker Compose**
```bash
docker build -t telemt:local .
docker run --name telemt --restart unless-stopped \
-p 443:443 \
-e RUST_LOG=info \
-v "$PWD/config.toml:/app/config.toml:ro" \
--read-only \
--cap-drop ALL --cap-add NET_BIND_SERVICE \
--ulimit nofile=65536:65536 \
telemt:local
```

View File

@@ -0,0 +1,321 @@
# SNI-маршрутизация в xray-core / sing-box + TLS-fronting
## Термины (в контексте этого кейса)
- **TLS-fronting домен** — домен, который фигурирует в TLS ClientHello как **SNI** (например, `petrovich.ru`): он используется как "маска" на L7 и как ключ маршрутизации в прокси-роутере.
- **xray-core / sing-box** — локальный или удалённый L7/TLS-роутер (прокси), который:
1) принимает входящее TCP/TLS-соединение,
2) читает TLS ClientHello,
3) извлекает SNI,
4) по SNI выбирает outbound/апстрим,
5) устанавливает новое TCP-соединение к целевому хосту уже **от себя**.
- **SNI (Server Name Indication)** — поле в TLS ClientHello, где клиент Telegram сообщает доменное имя для "маскировки"
- **DNS-resolve на стороне L7-роутера** — если выходной адрес задан доменом (или роутер решил "всё равно идти по SNI"), то DNS резолвится **на стороне xray/sing-box**, а не на стороне Telegram-клиента
---
## Ключевая идея: куда на самом деле идёт соединение решает не то, что вы указали клиенту, а то как L7-роутер трактует SNI
Механика:
1) Telegram-клиенту вы можете указать **IP/домен telemt**,как "сервер".
2) Между клиентом и telemt стоит xray-core/sing-box, который принимает TCP, читает TLS ClientHello и видит **SNI=petrovich.ru**
3) Дальше роутер говорит: "Вижу SNI - направить на апстрим/маршрут N"
4) И устанавливает исходящее соединение не "по тому IP, который пользователь подразумевал", а **по домену из SNI** (или по сопоставлению SNI→outbound), используя для определния его IP собственный DNS-кеш или резолвер
5) `petrovich.ru` по A-записи указывает **не на IP telemt**, а значит при L7-маршрутизации трафик уйдёт на "оригинальный" сайт за этим доменом, а не в telemt: Telegram-клиент, естественно, не сможет получить ожидаемое поведение, потому что ответить с handshake на той стороне некому
---
## Схема №1 "Как это НЕ работает"
```text
Telegram Client
|
| (указан IP/домен telemt)
v
telemt instance
````
Ожидание: "я указал telemt -> значит трафик попадёт в telemt" - **нет!**
---
## Схема №2. "Как это реально работает с TLS/L7-роутером и SNI"
```text
Telegram Client
|
| 1) TCP/TLS connection:
| - ClientHello:
| - SNI=petrovich.ru
v
xray-core / sing-box / любой L7 router
|
| 2) читает ClientHello -> вытаскивает SNI
| 3) выбирает маршрут по SNI
| 4) делает DNS для petrovich.ru
| 5) подключается к полученному IP по TLS с этим SNI
v
"Оригинальный" сайт, A-запись которого не на telemt
|
X не telemt -> Telegram-клиент не коннектится как ожидалось
```
---
## Почему указанный в клиенте IP/домен telemt "не спасает"
Потому что в таком режиме xray/sing-box выступает как **точка терминации TCP/TLS**, можно сказать - TLS-инспектор на уровне ClientHello, это означает:
* TCP-сессия от Telegram-клиента заканчивается на xray/sing-box
* Дальше создаётся **новая** TCP-сессия "от имени" xray/sing-box к апстриму
* Выбор апстрима делается правилами роутинга, а в TLS-сценариях самый удобный и распространённый ключ — **SNI**
То есть, "куда идти дальше" определяется логикой L7-роутера:
* либо правилами вида `if SNI == petrovich.ru -> outbound X`,
* либо более "автоматическим" поведением: `подключаться к тому хосту, который указан в SNI`,
* плюс кэш DNS и собственные резолверы роутера
---
## Что именно извлекается из TLS ClientHello и почему этого достаточно
TLS ClientHello отправляется **в начале** TLS-сессии и, в классическом TLS без ECH, содержит SNI в открытом виде.
Упрощённо:
```text
ClientHello:
- supported_versions
- cipher_suites
- extensions:
- server_name: petrovich.ru <-- SNI
- alpn: h2/http1.1/...
- ...
```
Роутеру не нужно расшифровывать трафик и завершать TLS "как сервер" — часто достаточно просто прочитать первые пакеты и распарсить ClientHello, чтобы получить SNI и принять решение
---
## Типовой алгоритм SNI-роутинга
1. Принять входящий TCP.
2. Подождать первые байты.
3. Определить протокол:
* если видим TLS ClientHello → парсим SNI/ALPN
4. Применить route rules:
* match по `server_name` / `domain` / `tls.sni`
5. Выбрать outbound:
* direct / proxy / specific upstream / detour
6. Установить исходящее соединение:
* либо на фиксированный IP:порт,
* либо на домен через DNS-resolve на стороне роутера
7. Начать проксирование данных между входом и выходом
---
## Почему "A-запись фронтинг-домена не на telemt" ломает кейс
### Ситуация
* В ClientHello: `SNI = petrovich.ru`
* DNS: `petrovich.ru -> 203.0.113.77` - "оригинальный" сайт
* telemt живёт на: `198.51.100.10`
### Что делает роутер
* Видит SNI `petrovich.ru`
* Либо:
* (а) напрямую коннектится к `petrovich.ru:443`, резолвя A-запись в `203.0.113.77`,
* либо:
* (б) выбирает outbound, который указывает на `petrovich.ru` как destination,
* либо:
* (в) делает sniffing/override destination по SNI
В итоге исходящий коннект идёт на `203.0.113.77:443`, а не на telemt!
Другой сервер, другой протокол, другая логика, где telemt не участвует
---
## "Где именно происходит подмена destination на SNI"
Это зависит от конфигурации, но типовые варианты:
### Вариант A: outbound задан доменом (и он совпадает с SNI)
Правило по SNI выбирает outbound, у которого destination задан доменом фронтинга,
тогда DNS резолвится на стороне роутера и вы уходите на "оригинальный" хост
### Вариант B: destination override / sniffing
Роутер "снифает" SNI и **перезаписывает** destination на домен из SNI (даже если вход изначально был на IP telemt),
это особенно коварно: пользователь видит "я подключаюсь к IP telemt", но роутер после sniffing решает иначе
### Вариант C: split DNS / кеш / независимый резолвер
Даже если клиент "где-то" резолвит иначе, это не важно: конечный DNS для исходящего коннекта — на стороне xray/sing-box,
который может иметь:
* свой DoH/DoT,
* свой кеш,
* свои правила fake-ip / system resolver,
* и, как следствие, своя "карта" **домен/SNI -> IP**
---
## Признаки того, что трафик "утёк на оригинал", а не попал в telemt
* На стороне telemt отсутствуют входящие соединения/логи
* На стороне роутера видно, что destination — домен фронтинга, а IP соответствует публичному сайту
* TLS-метрики/сертификат на выходе соответствует "оригинальному" сайту в записах трафика
* Telegram-клиент получает неожиданный тип ответов/ошибку handshaking/timeout в debug-режиме
---
## Best-practice решение для этого кейса: свой домен фронтинга + заглушка на telemt + Let's Encrypt
### Цель
Сделать так, чтобы:
* SNI (фронтинг-домен) **резолвился в IP telemt**,
* на IP telemt реально был TLS-сервис с валидным сертификатом под этот домен,
* даже если кто-то "попробует открыть домен как сайт", он увидит нормальную заглушку, а не "пустоту"
### Что это даёт
* xray/sing-box, маршрутизируя по SNI, будет неизбежно приходить на telemt, потому что DNS(SNI-домен) → IP telemt
* Внешний вид будет правдоподобным: обычный домен с обычным сертификатом
* Устойчивость: меньше сюрпризов от DNS-кеша/перерезолва/"умных" правил роутера
---
## Рекомендуемая схема (целевое состояние)
```text
Telegram Client
|
| TLS ClientHello: SNI = hello.example.com
v
xray-core / sing-box
|
| Route by SNI -> outbound -> connect to hello.example.com:443
| DNS(hello.example.com) = IP telemt
v
telemt instance (IP telemt)
|
| TLS cert for hello.example.com (Let's Encrypt)
| + сайт-заглушка / health endpoint
v
OK
```
---
## Практический чеклист (минимальный)
1. Купить/иметь домен: `hello.example.com`
2. В DNS:
* `A hello.example.com -> <IP telemt>`
* (опционально) AAAA, если используете IPv6 и он стабилен
3. На telemt-хосте:
* поднять TLS endpoint на 443 с валидным сертификатом LE под `hello.example.com`
* отдать "заглушку" (например, статический сайт), чтобы домен выглядел как обычный веб-сервис
4. В xray/sing-box правилах:
* маршрутизировать нужный трафик по SNI = `hello.example.com` в "правильный" outbound (к telemt)
* избегать конфигураций, где destination override уводит на чужой домен
5. Важно:
* если вы используете кеш DNS на роутере — сбросить/обновить его после смены A-записи
---
## Пояснение про сайт-заглушку
Для эмуляции TLS, telemt имеет подсистему TLS-F в `src/tls_front`:
- её модуль - fetcher, собирает TLS-профили, чтоб максимально поведенчески корректно повторять TLS конкретно указанного сайта
Когда вы указываете сайт, который не отвечает по TLS:
- fetcher не может собрать TLS-профиль и происходит fallback на `fake_cert_len` - примитивный алгоритм,
- он забивает служебную информацию TLS рандомными байтами,
- простые системы DPI не распознают это
- однако, продвинутые системы, такие как nEdge или Fraud Control в сетях мобильной связи легко заблокируют или замедлят такой трафик
Создав сайт-заглушку с Let's Encrypt сертификатом, вы даёте TLS-F возможность получить данные сертификата и корректно его "повторять" в дальнейшем
---
## Вариант конфиг-подхода: "SNI строго привязываем к telemt - фиксированный IP"
Чтобы полностью исключить зависимость от DNS если вам это нужно, можно сделать outbound, который ходит на **фиксированный IP telemt**, но при этом выставляет SNI/Host как `hello.example.com`.
Идея:
* destination: `IP:443`
* SNI: `hello.example.com`
* сертификат на telemt именно под `hello.example.com`
Так вы получаете:
* TLS выглядит корректно, ведь SNI совпадает с сертификатом,
* а routing никогда не уйдёт на "оригинал", потому что A-запись указывает на telemt и контроллируется вами!
Но в вашем описании проблема как раз в том, что роутер "сам решает по SNI и резолвит домен", поэтому самый универсальный вариант — сделать так, чтобы DNS всегда приводил в telemt
---
## Пример логики правил на псевдоконфиге L7-роутера
```text
if inbound is TLS and sni == "hello.example.com":
route -> outbound "telemt"
else:
route -> outbound "default"
```
Outbound `telemt`:
* destination: `hello.example.com:443`
* TLS enabled
* SNI: `hello.example.com`
---
## Отдельно: что может неожиданно сломать даже "правильный" DNS
* **Кеширование DNS** на xray/sing-box или на системном резолвере, особенно при смене A-записи
* **Split-horizon DNS**: разные ответы внутри/снаружи, попытки подмены/терминирования в других точках
* **IPv6**: если есть AAAA и он указывает не туда, роутер может предпочесть IPv6: помните, что поддержка v6 нестабильна и не рекомендуется в prod
* **DoH/DoT** на роутере: он может резолвить не тем резолвером, которым вы проверяли
Минимальная гигиена:
* контролировать A/AAAA,
* держать TTL разумным,
* проверять, каким резолвером пользуется именно роутер,
* при необходимости отключить/ограничить destination override
---
## Итог
В режиме TLS-fronting с xray-core/sing-box как L7/TLS-роутером **SNI становится приоритетным "source-of-truth" для маршрутизации**
Если фронтинг-домен по DNS указывает не на IP telemt, роутер честно уводит трафик на "оригинальный" сайт, потому что он строит исходящее соединение "по SNI"
Надёжное решение для этого кейса:
* использовать **свой домен** для фронтинга,
* направить его **A/AAAA** на IP telemt,
* поднять на telemt **TLS-сервис с Lets Encrypt сертификатом** под этот домен,
* (желательно) держать **сайт-заглушку**, чтобы 443 выглядел как обычный HTTPS

73
install.sh Normal file
View File

@@ -0,0 +1,73 @@
sudo bash -c '
set -e
# --- Проверка на существующую установку ---
if systemctl list-unit-files | grep -q telemt.service; then
# --- РЕЖИМ ОБНОВЛЕНИЯ ---
echo "--- Обнаружена существующая установка Telemt. Запускаю обновление... ---"
echo "[*] Остановка службы telemt..."
systemctl stop telemt || true # Игнорируем ошибку, если служба уже остановлена
echo "[1/2] Скачивание последней версии Telemt..."
wget -qO- "https://github.com/telemt/telemt/releases/latest/download/telemt-$(uname -m)-linux-$(ldd --version 2>&1 | grep -iq musl && echo musl || echo gnu).tar.gz" | tar -xz
echo "[1/2] Замена исполняемого файла в /usr/local/bin..."
mv telemt /usr/local/bin/telemt
chmod +x /usr/local/bin/telemt
echo "[2/2] Запуск службы..."
systemctl start telemt
echo "--- Обновление Telemt успешно завершено! ---"
echo
echo "Для проверки статуса службы выполните:"
echo " systemctl status telemt"
else
# --- РЕЖИМ НОВОЙ УСТАНОВКИ ---
echo "--- Начало автоматической установки Telemt ---"
# Шаг 1: Скачивание и установка бинарного файла
echo "[1/5] Скачивание последней версии Telemt..."
wget -qO- "https://github.com/telemt/telemt/releases/latest/download/telemt-$(uname -m)-linux-$(ldd --version 2>&1 | grep -iq musl && echo musl || echo gnu).tar.gz" | tar -xz
echo "[1/5] Перемещение исполняемого файла в /usr/local/bin и установка прав..."
mv telemt /usr/local/bin/telemt
chmod +x /usr/local/bin/telemt
# Шаг 2: Генерация секрета
echo "[2/5] Генерация секретного ключа..."
SECRET=$(openssl rand -hex 16)
# Шаг 3: Создание файла конфигурации
echo "[3/5] Создание файла конфигурации /etc/telemt.toml..."
printf "# === General Settings ===\n[general]\n[general.modes]\nclassic = false\nsecure = false\ntls = true\n\n# === Anti-Censorship & Masking ===\n[censorship]\n# !!! ВАЖНО: Замените на ваш домен или домен, который вы хотите использовать для маскировки !!!\ntls_domain = \"petrovich.ru\"\n\n[access.users]\nhello = \"%s\"\n" "$SECRET" > /etc/telemt.toml
# Шаг 4: Создание службы Systemd
echo "[4/5] Создание службы systemd..."
printf "[Unit]\nDescription=Telemt Proxy\nAfter=network.target\n\n[Service]\nType=simple\nExecStart=/usr/local/bin/telemt /etc/telemt.toml\nRestart=on-failure\nRestartSec=5\nLimitNOFILE=65536\n\n[Install]\nWantedBy=multi-user.target\n" > /etc/systemd/system/telemt.service
# Шаг 5: Запуск службы
echo "[5/5] Перезагрузка systemd, запуск и включение службы telemt..."
systemctl daemon-reload
systemctl start telemt
systemctl enable telemt
echo "--- Установка и запуск Telemt успешно завершены! ---"
echo
echo "ВАЖНАЯ ИНФОРМАЦИЯ:"
echo "==================="
echo "1. Вам НЕОБХОДИМО отредактировать файл /etc/telemt.toml и заменить '\''petrovich.ru'\'' на другой домен"
echo " с помощью команды:"
echo " nano /etc/telemt.toml"
echo " После редактирования файла перезапустите службу командой:"
echo " sudo systemctl restart telemt"
echo
echo "2. Для проверки статуса службы выполните команду:"
echo " systemctl status telemt"
echo
echo "3. Для получения ссылок на подключение выполните команду:"
echo " journalctl -u telemt -n -g '\''links'\'' --no-pager -o cat | tac"
fi
'

107
src/api/config_store.rs Normal file
View File

@@ -0,0 +1,107 @@
use std::io::Write;
use std::path::{Path, PathBuf};
use hyper::header::IF_MATCH;
use sha2::{Digest, Sha256};
use crate::config::ProxyConfig;
use super::model::ApiFailure;
pub(super) fn parse_if_match(headers: &hyper::HeaderMap) -> Option<String> {
headers
.get(IF_MATCH)
.and_then(|value| value.to_str().ok())
.map(str::trim)
.filter(|value| !value.is_empty())
.map(|value| value.trim_matches('"').to_string())
}
pub(super) async fn ensure_expected_revision(
config_path: &Path,
expected_revision: Option<&str>,
) -> Result<(), ApiFailure> {
let Some(expected) = expected_revision else {
return Ok(());
};
let current = current_revision(config_path).await?;
if current != expected {
return Err(ApiFailure::new(
hyper::StatusCode::CONFLICT,
"revision_conflict",
"Config revision mismatch",
));
}
Ok(())
}
pub(super) async fn current_revision(config_path: &Path) -> Result<String, ApiFailure> {
let content = tokio::fs::read_to_string(config_path)
.await
.map_err(|e| ApiFailure::internal(format!("failed to read config: {}", e)))?;
Ok(compute_revision(&content))
}
pub(super) fn compute_revision(content: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
hex::encode(hasher.finalize())
}
pub(super) async fn load_config_from_disk(config_path: &Path) -> Result<ProxyConfig, ApiFailure> {
let config_path = config_path.to_path_buf();
tokio::task::spawn_blocking(move || ProxyConfig::load(config_path))
.await
.map_err(|e| ApiFailure::internal(format!("failed to join config loader: {}", e)))?
.map_err(|e| ApiFailure::internal(format!("failed to load config: {}", e)))
}
pub(super) async fn save_config_to_disk(
config_path: &Path,
cfg: &ProxyConfig,
) -> Result<String, ApiFailure> {
let serialized = toml::to_string_pretty(cfg)
.map_err(|e| ApiFailure::internal(format!("failed to serialize config: {}", e)))?;
write_atomic(config_path.to_path_buf(), serialized.clone()).await?;
Ok(compute_revision(&serialized))
}
async fn write_atomic(path: PathBuf, contents: String) -> Result<(), ApiFailure> {
tokio::task::spawn_blocking(move || write_atomic_sync(&path, &contents))
.await
.map_err(|e| ApiFailure::internal(format!("failed to join writer: {}", e)))?
.map_err(|e| ApiFailure::internal(format!("failed to write config: {}", e)))
}
fn write_atomic_sync(path: &Path, contents: &str) -> std::io::Result<()> {
let parent = path.parent().unwrap_or_else(|| Path::new("."));
std::fs::create_dir_all(parent)?;
let tmp_name = format!(
".{}.tmp-{}",
path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("config.toml"),
rand::random::<u64>()
);
let tmp_path = parent.join(tmp_name);
let write_result = (|| {
let mut file = std::fs::OpenOptions::new()
.create_new(true)
.write(true)
.open(&tmp_path)?;
file.write_all(contents.as_bytes())?;
file.sync_all()?;
std::fs::rename(&tmp_path, path)?;
if let Ok(dir) = std::fs::File::open(parent) {
let _ = dir.sync_all();
}
Ok(())
})();
if write_result.is_err() {
let _ = std::fs::remove_file(&tmp_path);
}
write_result
}

443
src/api/mod.rs Normal file
View File

@@ -0,0 +1,443 @@
use std::convert::Infallible;
use std::net::{IpAddr, SocketAddr};
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use http_body_util::{BodyExt, Full};
use hyper::body::{Bytes, Incoming};
use hyper::header::AUTHORIZATION;
use hyper::server::conn::http1;
use hyper::service::service_fn;
use hyper::{Method, Request, Response, StatusCode};
use serde::Serialize;
use serde::de::DeserializeOwned;
use tokio::net::TcpListener;
use tokio::sync::{Mutex, watch};
use tracing::{debug, info, warn};
use crate::config::ProxyConfig;
use crate::ip_tracker::UserIpTracker;
use crate::stats::Stats;
use crate::transport::middle_proxy::MePool;
use crate::transport::UpstreamManager;
mod config_store;
mod model;
mod runtime_stats;
mod users;
use config_store::{current_revision, parse_if_match};
use model::{
ApiFailure, CreateUserRequest, ErrorBody, ErrorResponse, HealthData, PatchUserRequest,
RotateSecretRequest, SuccessResponse, SummaryData,
};
use runtime_stats::{
MinimalCacheEntry, build_dcs_data, build_me_writers_data, build_minimal_all_data,
build_upstreams_data, build_zero_all_data,
};
use users::{create_user, delete_user, patch_user, rotate_secret, users_from_config};
#[derive(Clone)]
pub(super) struct ApiShared {
pub(super) stats: Arc<Stats>,
pub(super) ip_tracker: Arc<UserIpTracker>,
pub(super) me_pool: Option<Arc<MePool>>,
pub(super) upstream_manager: Arc<UpstreamManager>,
pub(super) config_path: PathBuf,
pub(super) startup_detected_ip_v4: Option<IpAddr>,
pub(super) startup_detected_ip_v6: Option<IpAddr>,
pub(super) mutation_lock: Arc<Mutex<()>>,
pub(super) minimal_cache: Arc<Mutex<Option<MinimalCacheEntry>>>,
pub(super) request_id: Arc<AtomicU64>,
}
impl ApiShared {
fn next_request_id(&self) -> u64 {
self.request_id.fetch_add(1, Ordering::Relaxed)
}
}
pub async fn serve(
listen: SocketAddr,
stats: Arc<Stats>,
ip_tracker: Arc<UserIpTracker>,
me_pool: Option<Arc<MePool>>,
upstream_manager: Arc<UpstreamManager>,
config_rx: watch::Receiver<Arc<ProxyConfig>>,
config_path: PathBuf,
startup_detected_ip_v4: Option<IpAddr>,
startup_detected_ip_v6: Option<IpAddr>,
) {
let listener = match TcpListener::bind(listen).await {
Ok(listener) => listener,
Err(error) => {
warn!(
error = %error,
listen = %listen,
"Failed to bind API listener"
);
return;
}
};
info!("API endpoint: http://{}/v1/*", listen);
let shared = Arc::new(ApiShared {
stats,
ip_tracker,
me_pool,
upstream_manager,
config_path,
startup_detected_ip_v4,
startup_detected_ip_v6,
mutation_lock: Arc::new(Mutex::new(())),
minimal_cache: Arc::new(Mutex::new(None)),
request_id: Arc::new(AtomicU64::new(1)),
});
loop {
let (stream, peer) = match listener.accept().await {
Ok(v) => v,
Err(error) => {
warn!(error = %error, "API accept error");
continue;
}
};
let shared_conn = shared.clone();
let config_rx_conn = config_rx.clone();
tokio::spawn(async move {
let svc = service_fn(move |req: Request<Incoming>| {
let shared_req = shared_conn.clone();
let config_rx_req = config_rx_conn.clone();
async move { handle(req, peer, shared_req, config_rx_req).await }
});
if let Err(error) = http1::Builder::new()
.serve_connection(hyper_util::rt::TokioIo::new(stream), svc)
.await
{
debug!(error = %error, "API connection error");
}
});
}
}
async fn handle(
req: Request<Incoming>,
peer: SocketAddr,
shared: Arc<ApiShared>,
config_rx: watch::Receiver<Arc<ProxyConfig>>,
) -> Result<Response<Full<Bytes>>, Infallible> {
let request_id = shared.next_request_id();
let cfg = config_rx.borrow().clone();
let api_cfg = &cfg.server.api;
if !api_cfg.enabled {
return Ok(error_response(
request_id,
ApiFailure::new(
StatusCode::SERVICE_UNAVAILABLE,
"api_disabled",
"API is disabled",
),
));
}
if !api_cfg.whitelist.is_empty()
&& !api_cfg
.whitelist
.iter()
.any(|net| net.contains(peer.ip()))
{
return Ok(error_response(
request_id,
ApiFailure::new(StatusCode::FORBIDDEN, "forbidden", "Source IP is not allowed"),
));
}
if !api_cfg.auth_header.is_empty() {
let auth_ok = req
.headers()
.get(AUTHORIZATION)
.and_then(|v| v.to_str().ok())
.map(|v| v == api_cfg.auth_header)
.unwrap_or(false);
if !auth_ok {
return Ok(error_response(
request_id,
ApiFailure::new(
StatusCode::UNAUTHORIZED,
"unauthorized",
"Missing or invalid Authorization header",
),
));
}
}
let method = req.method().clone();
let path = req.uri().path().to_string();
let body_limit = api_cfg.request_body_limit_bytes;
let result: Result<Response<Full<Bytes>>, ApiFailure> = async {
match (method.as_str(), path.as_str()) {
("GET", "/v1/health") => {
let revision = current_revision(&shared.config_path).await?;
let data = HealthData {
status: "ok",
read_only: api_cfg.read_only,
};
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/summary") => {
let revision = current_revision(&shared.config_path).await?;
let data = SummaryData {
uptime_seconds: shared.stats.uptime_secs(),
connections_total: shared.stats.get_connects_all(),
connections_bad_total: shared.stats.get_connects_bad(),
handshake_timeouts_total: shared.stats.get_handshake_timeouts(),
configured_users: cfg.access.users.len(),
};
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/zero/all") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_zero_all_data(&shared.stats, cfg.access.users.len());
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/upstreams") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_upstreams_data(shared.as_ref(), api_cfg);
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/minimal/all") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_minimal_all_data(shared.as_ref(), api_cfg).await;
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/me-writers") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_me_writers_data(shared.as_ref(), api_cfg).await;
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/dcs") => {
let revision = current_revision(&shared.config_path).await?;
let data = build_dcs_data(shared.as_ref(), api_cfg).await;
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/users") | ("GET", "/v1/users") => {
let revision = current_revision(&shared.config_path).await?;
let users = users_from_config(
&cfg,
&shared.stats,
&shared.ip_tracker,
shared.startup_detected_ip_v4,
shared.startup_detected_ip_v6,
)
.await;
Ok(success_response(StatusCode::OK, users, revision))
}
("POST", "/v1/users") => {
if api_cfg.read_only {
return Ok(error_response(
request_id,
ApiFailure::new(
StatusCode::FORBIDDEN,
"read_only",
"API runs in read-only mode",
),
));
}
let expected_revision = parse_if_match(req.headers());
let body = read_json::<CreateUserRequest>(req.into_body(), body_limit).await?;
let (data, revision) = create_user(body, expected_revision, &shared).await?;
Ok(success_response(StatusCode::CREATED, data, revision))
}
_ => {
if let Some(user) = path.strip_prefix("/v1/users/")
&& !user.is_empty()
&& !user.contains('/')
{
if method == Method::GET {
let revision = current_revision(&shared.config_path).await?;
let users = users_from_config(
&cfg,
&shared.stats,
&shared.ip_tracker,
shared.startup_detected_ip_v4,
shared.startup_detected_ip_v6,
)
.await;
if let Some(user_info) = users.into_iter().find(|entry| entry.username == user)
{
return Ok(success_response(StatusCode::OK, user_info, revision));
}
return Ok(error_response(
request_id,
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "User not found"),
));
}
if method == Method::PATCH {
if api_cfg.read_only {
return Ok(error_response(
request_id,
ApiFailure::new(
StatusCode::FORBIDDEN,
"read_only",
"API runs in read-only mode",
),
));
}
let expected_revision = parse_if_match(req.headers());
let body = read_json::<PatchUserRequest>(req.into_body(), body_limit).await?;
let (data, revision) =
patch_user(user, body, expected_revision, &shared).await?;
return Ok(success_response(StatusCode::OK, data, revision));
}
if method == Method::DELETE {
if api_cfg.read_only {
return Ok(error_response(
request_id,
ApiFailure::new(
StatusCode::FORBIDDEN,
"read_only",
"API runs in read-only mode",
),
));
}
let expected_revision = parse_if_match(req.headers());
let (deleted_user, revision) =
delete_user(user, expected_revision, &shared).await?;
return Ok(success_response(StatusCode::OK, deleted_user, revision));
}
if method == Method::POST
&& let Some(base_user) = user.strip_suffix("/rotate-secret")
&& !base_user.is_empty()
&& !base_user.contains('/')
{
if api_cfg.read_only {
return Ok(error_response(
request_id,
ApiFailure::new(
StatusCode::FORBIDDEN,
"read_only",
"API runs in read-only mode",
),
));
}
let expected_revision = parse_if_match(req.headers());
let body =
read_optional_json::<RotateSecretRequest>(req.into_body(), body_limit)
.await?;
let (data, revision) =
rotate_secret(base_user, body.unwrap_or_default(), expected_revision, &shared)
.await?;
return Ok(success_response(StatusCode::OK, data, revision));
}
if method == Method::POST {
return Ok(error_response(
request_id,
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "Route not found"),
));
}
return Ok(error_response(
request_id,
ApiFailure::new(
StatusCode::METHOD_NOT_ALLOWED,
"method_not_allowed",
"Unsupported HTTP method for this route",
),
));
}
Ok(error_response(
request_id,
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "Route not found"),
))
}
}
}
.await;
match result {
Ok(resp) => Ok(resp),
Err(error) => Ok(error_response(request_id, error)),
}
}
fn success_response<T: Serialize>(
status: StatusCode,
data: T,
revision: String,
) -> Response<Full<Bytes>> {
let payload = SuccessResponse {
ok: true,
data,
revision,
};
let body = serde_json::to_vec(&payload).unwrap_or_else(|_| b"{\"ok\":false}".to_vec());
Response::builder()
.status(status)
.header("content-type", "application/json; charset=utf-8")
.body(Full::new(Bytes::from(body)))
.unwrap()
}
fn error_response(request_id: u64, failure: ApiFailure) -> Response<Full<Bytes>> {
let payload = ErrorResponse {
ok: false,
error: ErrorBody {
code: failure.code,
message: failure.message,
},
request_id,
};
let body = serde_json::to_vec(&payload).unwrap_or_else(|_| {
format!(
"{{\"ok\":false,\"error\":{{\"code\":\"internal_error\",\"message\":\"serialization failed\"}},\"request_id\":{}}}",
request_id
)
.into_bytes()
});
Response::builder()
.status(failure.status)
.header("content-type", "application/json; charset=utf-8")
.body(Full::new(Bytes::from(body)))
.unwrap()
}
async fn read_json<T: DeserializeOwned>(body: Incoming, limit: usize) -> Result<T, ApiFailure> {
let bytes = read_body_with_limit(body, limit).await?;
serde_json::from_slice(&bytes).map_err(|_| ApiFailure::bad_request("Invalid JSON body"))
}
async fn read_optional_json<T: DeserializeOwned>(
body: Incoming,
limit: usize,
) -> Result<Option<T>, ApiFailure> {
let bytes = read_body_with_limit(body, limit).await?;
if bytes.is_empty() {
return Ok(None);
}
serde_json::from_slice(&bytes)
.map(Some)
.map_err(|_| ApiFailure::bad_request("Invalid JSON body"))
}
async fn read_body_with_limit(body: Incoming, limit: usize) -> Result<Vec<u8>, ApiFailure> {
let mut collected = Vec::new();
let mut body = body;
while let Some(frame_result) = body.frame().await {
let frame = frame_result.map_err(|_| ApiFailure::bad_request("Invalid request body"))?;
if let Some(chunk) = frame.data_ref() {
if collected.len().saturating_add(chunk.len()) > limit {
return Err(ApiFailure::new(
StatusCode::PAYLOAD_TOO_LARGE,
"payload_too_large",
format!("Body exceeds {} bytes", limit),
));
}
collected.extend_from_slice(chunk);
}
}
Ok(collected)
}

444
src/api/model.rs Normal file
View File

@@ -0,0 +1,444 @@
use std::net::IpAddr;
use chrono::{DateTime, Utc};
use hyper::StatusCode;
use rand::Rng;
use serde::{Deserialize, Serialize};
const MAX_USERNAME_LEN: usize = 64;
#[derive(Debug)]
pub(super) struct ApiFailure {
pub(super) status: StatusCode,
pub(super) code: &'static str,
pub(super) message: String,
}
impl ApiFailure {
pub(super) fn new(status: StatusCode, code: &'static str, message: impl Into<String>) -> Self {
Self {
status,
code,
message: message.into(),
}
}
pub(super) fn internal(message: impl Into<String>) -> Self {
Self::new(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", message)
}
pub(super) fn bad_request(message: impl Into<String>) -> Self {
Self::new(StatusCode::BAD_REQUEST, "bad_request", message)
}
}
#[derive(Serialize)]
pub(super) struct ErrorBody {
pub(super) code: &'static str,
pub(super) message: String,
}
#[derive(Serialize)]
pub(super) struct ErrorResponse {
pub(super) ok: bool,
pub(super) error: ErrorBody,
pub(super) request_id: u64,
}
#[derive(Serialize)]
pub(super) struct SuccessResponse<T> {
pub(super) ok: bool,
pub(super) data: T,
pub(super) revision: String,
}
#[derive(Serialize)]
pub(super) struct HealthData {
pub(super) status: &'static str,
pub(super) read_only: bool,
}
#[derive(Serialize)]
pub(super) struct SummaryData {
pub(super) uptime_seconds: f64,
pub(super) connections_total: u64,
pub(super) connections_bad_total: u64,
pub(super) handshake_timeouts_total: u64,
pub(super) configured_users: usize,
}
#[derive(Serialize, Clone)]
pub(super) struct ZeroCodeCount {
pub(super) code: i32,
pub(super) total: u64,
}
#[derive(Serialize, Clone)]
pub(super) struct ZeroCoreData {
pub(super) uptime_seconds: f64,
pub(super) connections_total: u64,
pub(super) connections_bad_total: u64,
pub(super) handshake_timeouts_total: u64,
pub(super) configured_users: usize,
pub(super) telemetry_core_enabled: bool,
pub(super) telemetry_user_enabled: bool,
pub(super) telemetry_me_level: String,
}
#[derive(Serialize, Clone)]
pub(super) struct ZeroUpstreamData {
pub(super) connect_attempt_total: u64,
pub(super) connect_success_total: u64,
pub(super) connect_fail_total: u64,
pub(super) connect_failfast_hard_error_total: u64,
pub(super) connect_attempts_bucket_1: u64,
pub(super) connect_attempts_bucket_2: u64,
pub(super) connect_attempts_bucket_3_4: u64,
pub(super) connect_attempts_bucket_gt_4: u64,
pub(super) connect_duration_success_bucket_le_100ms: u64,
pub(super) connect_duration_success_bucket_101_500ms: u64,
pub(super) connect_duration_success_bucket_501_1000ms: u64,
pub(super) connect_duration_success_bucket_gt_1000ms: u64,
pub(super) connect_duration_fail_bucket_le_100ms: u64,
pub(super) connect_duration_fail_bucket_101_500ms: u64,
pub(super) connect_duration_fail_bucket_501_1000ms: u64,
pub(super) connect_duration_fail_bucket_gt_1000ms: u64,
}
#[derive(Serialize, Clone)]
pub(super) struct UpstreamDcStatus {
pub(super) dc: i16,
pub(super) latency_ema_ms: Option<f64>,
pub(super) ip_preference: &'static str,
}
#[derive(Serialize, Clone)]
pub(super) struct UpstreamStatus {
pub(super) upstream_id: usize,
pub(super) route_kind: &'static str,
pub(super) address: String,
pub(super) weight: u16,
pub(super) scopes: String,
pub(super) healthy: bool,
pub(super) fails: u32,
pub(super) last_check_age_secs: u64,
pub(super) effective_latency_ms: Option<f64>,
pub(super) dc: Vec<UpstreamDcStatus>,
}
#[derive(Serialize, Clone)]
pub(super) struct UpstreamSummaryData {
pub(super) configured_total: usize,
pub(super) healthy_total: usize,
pub(super) unhealthy_total: usize,
pub(super) direct_total: usize,
pub(super) socks4_total: usize,
pub(super) socks5_total: usize,
}
#[derive(Serialize, Clone)]
pub(super) struct UpstreamsData {
pub(super) enabled: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reason: Option<&'static str>,
pub(super) generated_at_epoch_secs: u64,
pub(super) zero: ZeroUpstreamData,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) summary: Option<UpstreamSummaryData>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) upstreams: Option<Vec<UpstreamStatus>>,
}
#[derive(Serialize, Clone)]
pub(super) struct ZeroMiddleProxyData {
pub(super) keepalive_sent_total: u64,
pub(super) keepalive_failed_total: u64,
pub(super) keepalive_pong_total: u64,
pub(super) keepalive_timeout_total: u64,
pub(super) rpc_proxy_req_signal_sent_total: u64,
pub(super) rpc_proxy_req_signal_failed_total: u64,
pub(super) rpc_proxy_req_signal_skipped_no_meta_total: u64,
pub(super) rpc_proxy_req_signal_response_total: u64,
pub(super) rpc_proxy_req_signal_close_sent_total: u64,
pub(super) reconnect_attempt_total: u64,
pub(super) reconnect_success_total: u64,
pub(super) handshake_reject_total: u64,
pub(super) handshake_error_codes: Vec<ZeroCodeCount>,
pub(super) reader_eof_total: u64,
pub(super) idle_close_by_peer_total: u64,
pub(super) route_drop_no_conn_total: u64,
pub(super) route_drop_channel_closed_total: u64,
pub(super) route_drop_queue_full_total: u64,
pub(super) route_drop_queue_full_base_total: u64,
pub(super) route_drop_queue_full_high_total: u64,
pub(super) socks_kdf_strict_reject_total: u64,
pub(super) socks_kdf_compat_fallback_total: u64,
pub(super) endpoint_quarantine_total: u64,
pub(super) kdf_drift_total: u64,
pub(super) kdf_port_only_drift_total: u64,
pub(super) hardswap_pending_reuse_total: u64,
pub(super) hardswap_pending_ttl_expired_total: u64,
pub(super) single_endpoint_outage_enter_total: u64,
pub(super) single_endpoint_outage_exit_total: u64,
pub(super) single_endpoint_outage_reconnect_attempt_total: u64,
pub(super) single_endpoint_outage_reconnect_success_total: u64,
pub(super) single_endpoint_quarantine_bypass_total: u64,
pub(super) single_endpoint_shadow_rotate_total: u64,
pub(super) single_endpoint_shadow_rotate_skipped_quarantine_total: u64,
pub(super) floor_mode_switch_total: u64,
pub(super) floor_mode_switch_static_to_adaptive_total: u64,
pub(super) floor_mode_switch_adaptive_to_static_total: u64,
}
#[derive(Serialize, Clone)]
pub(super) struct ZeroPoolData {
pub(super) pool_swap_total: u64,
pub(super) pool_drain_active: u64,
pub(super) pool_force_close_total: u64,
pub(super) pool_stale_pick_total: u64,
pub(super) writer_removed_total: u64,
pub(super) writer_removed_unexpected_total: u64,
pub(super) refill_triggered_total: u64,
pub(super) refill_skipped_inflight_total: u64,
pub(super) refill_failed_total: u64,
pub(super) writer_restored_same_endpoint_total: u64,
pub(super) writer_restored_fallback_total: u64,
}
#[derive(Serialize, Clone)]
pub(super) struct ZeroDesyncData {
pub(super) secure_padding_invalid_total: u64,
pub(super) desync_total: u64,
pub(super) desync_full_logged_total: u64,
pub(super) desync_suppressed_total: u64,
pub(super) desync_frames_bucket_0: u64,
pub(super) desync_frames_bucket_1_2: u64,
pub(super) desync_frames_bucket_3_10: u64,
pub(super) desync_frames_bucket_gt_10: u64,
}
#[derive(Serialize, Clone)]
pub(super) struct ZeroAllData {
pub(super) generated_at_epoch_secs: u64,
pub(super) core: ZeroCoreData,
pub(super) upstream: ZeroUpstreamData,
pub(super) middle_proxy: ZeroMiddleProxyData,
pub(super) pool: ZeroPoolData,
pub(super) desync: ZeroDesyncData,
}
#[derive(Serialize, Clone)]
pub(super) struct MeWritersSummary {
pub(super) configured_dc_groups: usize,
pub(super) configured_endpoints: usize,
pub(super) available_endpoints: usize,
pub(super) available_pct: f64,
pub(super) required_writers: usize,
pub(super) alive_writers: usize,
pub(super) coverage_pct: f64,
}
#[derive(Serialize, Clone)]
pub(super) struct MeWriterStatus {
pub(super) writer_id: u64,
pub(super) dc: Option<i16>,
pub(super) endpoint: String,
pub(super) generation: u64,
pub(super) state: &'static str,
pub(super) draining: bool,
pub(super) degraded: bool,
pub(super) bound_clients: usize,
pub(super) idle_for_secs: Option<u64>,
pub(super) rtt_ema_ms: Option<f64>,
}
#[derive(Serialize, Clone)]
pub(super) struct MeWritersData {
pub(super) middle_proxy_enabled: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reason: Option<&'static str>,
pub(super) generated_at_epoch_secs: u64,
pub(super) summary: MeWritersSummary,
pub(super) writers: Vec<MeWriterStatus>,
}
#[derive(Serialize, Clone)]
pub(super) struct DcStatus {
pub(super) dc: i16,
pub(super) endpoints: Vec<String>,
pub(super) available_endpoints: usize,
pub(super) available_pct: f64,
pub(super) required_writers: usize,
pub(super) alive_writers: usize,
pub(super) coverage_pct: f64,
pub(super) rtt_ms: Option<f64>,
pub(super) load: usize,
}
#[derive(Serialize, Clone)]
pub(super) struct DcStatusData {
pub(super) middle_proxy_enabled: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reason: Option<&'static str>,
pub(super) generated_at_epoch_secs: u64,
pub(super) dcs: Vec<DcStatus>,
}
#[derive(Serialize, Clone)]
pub(super) struct MinimalQuarantineData {
pub(super) endpoint: String,
pub(super) remaining_ms: u64,
}
#[derive(Serialize, Clone)]
pub(super) struct MinimalDcPathData {
pub(super) dc: i16,
pub(super) ip_preference: Option<&'static str>,
pub(super) selected_addr_v4: Option<String>,
pub(super) selected_addr_v6: Option<String>,
}
#[derive(Serialize, Clone)]
pub(super) struct MinimalMeRuntimeData {
pub(super) active_generation: u64,
pub(super) warm_generation: u64,
pub(super) pending_hardswap_generation: u64,
pub(super) pending_hardswap_age_secs: Option<u64>,
pub(super) hardswap_enabled: bool,
pub(super) floor_mode: &'static str,
pub(super) adaptive_floor_idle_secs: u64,
pub(super) adaptive_floor_min_writers_single_endpoint: u8,
pub(super) adaptive_floor_recover_grace_secs: u64,
pub(super) me_keepalive_enabled: bool,
pub(super) me_keepalive_interval_secs: u64,
pub(super) me_keepalive_jitter_secs: u64,
pub(super) me_keepalive_payload_random: bool,
pub(super) rpc_proxy_req_every_secs: u64,
pub(super) me_reconnect_max_concurrent_per_dc: u32,
pub(super) me_reconnect_backoff_base_ms: u64,
pub(super) me_reconnect_backoff_cap_ms: u64,
pub(super) me_reconnect_fast_retry_count: u32,
pub(super) me_pool_drain_ttl_secs: u64,
pub(super) me_pool_force_close_secs: u64,
pub(super) me_pool_min_fresh_ratio: f32,
pub(super) me_bind_stale_mode: &'static str,
pub(super) me_bind_stale_ttl_secs: u64,
pub(super) me_single_endpoint_shadow_writers: u8,
pub(super) me_single_endpoint_outage_mode_enabled: bool,
pub(super) me_single_endpoint_outage_disable_quarantine: bool,
pub(super) me_single_endpoint_outage_backoff_min_ms: u64,
pub(super) me_single_endpoint_outage_backoff_max_ms: u64,
pub(super) me_single_endpoint_shadow_rotate_every_secs: u64,
pub(super) me_deterministic_writer_sort: bool,
pub(super) me_socks_kdf_policy: &'static str,
pub(super) quarantined_endpoints_total: usize,
pub(super) quarantined_endpoints: Vec<MinimalQuarantineData>,
}
#[derive(Serialize, Clone)]
pub(super) struct MinimalAllPayload {
pub(super) me_writers: MeWritersData,
pub(super) dcs: DcStatusData,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) me_runtime: Option<MinimalMeRuntimeData>,
pub(super) network_path: Vec<MinimalDcPathData>,
}
#[derive(Serialize, Clone)]
pub(super) struct MinimalAllData {
pub(super) enabled: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reason: Option<&'static str>,
pub(super) generated_at_epoch_secs: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) data: Option<MinimalAllPayload>,
}
#[derive(Serialize)]
pub(super) struct UserLinks {
pub(super) classic: Vec<String>,
pub(super) secure: Vec<String>,
pub(super) tls: Vec<String>,
}
#[derive(Serialize)]
pub(super) struct UserInfo {
pub(super) username: String,
pub(super) user_ad_tag: Option<String>,
pub(super) max_tcp_conns: Option<usize>,
pub(super) expiration_rfc3339: Option<String>,
pub(super) data_quota_bytes: Option<u64>,
pub(super) max_unique_ips: Option<usize>,
pub(super) current_connections: u64,
pub(super) active_unique_ips: usize,
pub(super) active_unique_ips_list: Vec<IpAddr>,
pub(super) recent_unique_ips: usize,
pub(super) recent_unique_ips_list: Vec<IpAddr>,
pub(super) total_octets: u64,
pub(super) links: UserLinks,
}
#[derive(Serialize)]
pub(super) struct CreateUserResponse {
pub(super) user: UserInfo,
pub(super) secret: String,
}
#[derive(Deserialize)]
pub(super) struct CreateUserRequest {
pub(super) username: String,
pub(super) secret: Option<String>,
pub(super) user_ad_tag: Option<String>,
pub(super) max_tcp_conns: Option<usize>,
pub(super) expiration_rfc3339: Option<String>,
pub(super) data_quota_bytes: Option<u64>,
pub(super) max_unique_ips: Option<usize>,
}
#[derive(Deserialize)]
pub(super) struct PatchUserRequest {
pub(super) secret: Option<String>,
pub(super) user_ad_tag: Option<String>,
pub(super) max_tcp_conns: Option<usize>,
pub(super) expiration_rfc3339: Option<String>,
pub(super) data_quota_bytes: Option<u64>,
pub(super) max_unique_ips: Option<usize>,
}
#[derive(Default, Deserialize)]
pub(super) struct RotateSecretRequest {
pub(super) secret: Option<String>,
}
pub(super) fn parse_optional_expiration(
value: Option<&str>,
) -> Result<Option<DateTime<Utc>>, ApiFailure> {
let Some(raw) = value else {
return Ok(None);
};
let parsed = DateTime::parse_from_rfc3339(raw)
.map_err(|_| ApiFailure::bad_request("expiration_rfc3339 must be valid RFC3339"))?;
Ok(Some(parsed.with_timezone(&Utc)))
}
pub(super) fn is_valid_user_secret(secret: &str) -> bool {
secret.len() == 32 && secret.chars().all(|c| c.is_ascii_hexdigit())
}
pub(super) fn is_valid_ad_tag(tag: &str) -> bool {
tag.len() == 32 && tag.chars().all(|c| c.is_ascii_hexdigit())
}
pub(super) fn is_valid_username(user: &str) -> bool {
!user.is_empty()
&& user.len() <= MAX_USERNAME_LEN
&& user
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.'))
}
pub(super) fn random_user_secret() -> String {
let mut bytes = [0u8; 16];
rand::rng().fill(&mut bytes);
hex::encode(bytes)
}

484
src/api/runtime_stats.rs Normal file
View File

@@ -0,0 +1,484 @@
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use crate::config::ApiConfig;
use crate::stats::Stats;
use crate::transport::upstream::IpPreference;
use crate::transport::UpstreamRouteKind;
use super::ApiShared;
use super::model::{
DcStatus, DcStatusData, MeWriterStatus, MeWritersData, MeWritersSummary, MinimalAllData,
MinimalAllPayload, MinimalDcPathData, MinimalMeRuntimeData, MinimalQuarantineData,
UpstreamDcStatus, UpstreamStatus, UpstreamSummaryData, UpstreamsData, ZeroAllData,
ZeroCodeCount, ZeroCoreData, ZeroDesyncData, ZeroMiddleProxyData, ZeroPoolData,
ZeroUpstreamData,
};
const FEATURE_DISABLED_REASON: &str = "feature_disabled";
const SOURCE_UNAVAILABLE_REASON: &str = "source_unavailable";
#[derive(Clone)]
pub(crate) struct MinimalCacheEntry {
pub(super) expires_at: Instant,
pub(super) payload: MinimalAllPayload,
pub(super) generated_at_epoch_secs: u64,
}
pub(super) fn build_zero_all_data(stats: &Stats, configured_users: usize) -> ZeroAllData {
let telemetry = stats.telemetry_policy();
let handshake_error_codes = stats
.get_me_handshake_error_code_counts()
.into_iter()
.map(|(code, total)| ZeroCodeCount { code, total })
.collect();
ZeroAllData {
generated_at_epoch_secs: now_epoch_secs(),
core: ZeroCoreData {
uptime_seconds: stats.uptime_secs(),
connections_total: stats.get_connects_all(),
connections_bad_total: stats.get_connects_bad(),
handshake_timeouts_total: stats.get_handshake_timeouts(),
configured_users,
telemetry_core_enabled: telemetry.core_enabled,
telemetry_user_enabled: telemetry.user_enabled,
telemetry_me_level: telemetry.me_level.to_string(),
},
upstream: build_zero_upstream_data(stats),
middle_proxy: ZeroMiddleProxyData {
keepalive_sent_total: stats.get_me_keepalive_sent(),
keepalive_failed_total: stats.get_me_keepalive_failed(),
keepalive_pong_total: stats.get_me_keepalive_pong(),
keepalive_timeout_total: stats.get_me_keepalive_timeout(),
rpc_proxy_req_signal_sent_total: stats.get_me_rpc_proxy_req_signal_sent_total(),
rpc_proxy_req_signal_failed_total: stats.get_me_rpc_proxy_req_signal_failed_total(),
rpc_proxy_req_signal_skipped_no_meta_total: stats
.get_me_rpc_proxy_req_signal_skipped_no_meta_total(),
rpc_proxy_req_signal_response_total: stats.get_me_rpc_proxy_req_signal_response_total(),
rpc_proxy_req_signal_close_sent_total: stats
.get_me_rpc_proxy_req_signal_close_sent_total(),
reconnect_attempt_total: stats.get_me_reconnect_attempts(),
reconnect_success_total: stats.get_me_reconnect_success(),
handshake_reject_total: stats.get_me_handshake_reject_total(),
handshake_error_codes,
reader_eof_total: stats.get_me_reader_eof_total(),
idle_close_by_peer_total: stats.get_me_idle_close_by_peer_total(),
route_drop_no_conn_total: stats.get_me_route_drop_no_conn(),
route_drop_channel_closed_total: stats.get_me_route_drop_channel_closed(),
route_drop_queue_full_total: stats.get_me_route_drop_queue_full(),
route_drop_queue_full_base_total: stats.get_me_route_drop_queue_full_base(),
route_drop_queue_full_high_total: stats.get_me_route_drop_queue_full_high(),
socks_kdf_strict_reject_total: stats.get_me_socks_kdf_strict_reject(),
socks_kdf_compat_fallback_total: stats.get_me_socks_kdf_compat_fallback(),
endpoint_quarantine_total: stats.get_me_endpoint_quarantine_total(),
kdf_drift_total: stats.get_me_kdf_drift_total(),
kdf_port_only_drift_total: stats.get_me_kdf_port_only_drift_total(),
hardswap_pending_reuse_total: stats.get_me_hardswap_pending_reuse_total(),
hardswap_pending_ttl_expired_total: stats.get_me_hardswap_pending_ttl_expired_total(),
single_endpoint_outage_enter_total: stats.get_me_single_endpoint_outage_enter_total(),
single_endpoint_outage_exit_total: stats.get_me_single_endpoint_outage_exit_total(),
single_endpoint_outage_reconnect_attempt_total: stats
.get_me_single_endpoint_outage_reconnect_attempt_total(),
single_endpoint_outage_reconnect_success_total: stats
.get_me_single_endpoint_outage_reconnect_success_total(),
single_endpoint_quarantine_bypass_total: stats
.get_me_single_endpoint_quarantine_bypass_total(),
single_endpoint_shadow_rotate_total: stats.get_me_single_endpoint_shadow_rotate_total(),
single_endpoint_shadow_rotate_skipped_quarantine_total: stats
.get_me_single_endpoint_shadow_rotate_skipped_quarantine_total(),
floor_mode_switch_total: stats.get_me_floor_mode_switch_total(),
floor_mode_switch_static_to_adaptive_total: stats
.get_me_floor_mode_switch_static_to_adaptive_total(),
floor_mode_switch_adaptive_to_static_total: stats
.get_me_floor_mode_switch_adaptive_to_static_total(),
},
pool: ZeroPoolData {
pool_swap_total: stats.get_pool_swap_total(),
pool_drain_active: stats.get_pool_drain_active(),
pool_force_close_total: stats.get_pool_force_close_total(),
pool_stale_pick_total: stats.get_pool_stale_pick_total(),
writer_removed_total: stats.get_me_writer_removed_total(),
writer_removed_unexpected_total: stats.get_me_writer_removed_unexpected_total(),
refill_triggered_total: stats.get_me_refill_triggered_total(),
refill_skipped_inflight_total: stats.get_me_refill_skipped_inflight_total(),
refill_failed_total: stats.get_me_refill_failed_total(),
writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(),
writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(),
},
desync: ZeroDesyncData {
secure_padding_invalid_total: stats.get_secure_padding_invalid(),
desync_total: stats.get_desync_total(),
desync_full_logged_total: stats.get_desync_full_logged(),
desync_suppressed_total: stats.get_desync_suppressed(),
desync_frames_bucket_0: stats.get_desync_frames_bucket_0(),
desync_frames_bucket_1_2: stats.get_desync_frames_bucket_1_2(),
desync_frames_bucket_3_10: stats.get_desync_frames_bucket_3_10(),
desync_frames_bucket_gt_10: stats.get_desync_frames_bucket_gt_10(),
},
}
}
fn build_zero_upstream_data(stats: &Stats) -> ZeroUpstreamData {
ZeroUpstreamData {
connect_attempt_total: stats.get_upstream_connect_attempt_total(),
connect_success_total: stats.get_upstream_connect_success_total(),
connect_fail_total: stats.get_upstream_connect_fail_total(),
connect_failfast_hard_error_total: stats.get_upstream_connect_failfast_hard_error_total(),
connect_attempts_bucket_1: stats.get_upstream_connect_attempts_bucket_1(),
connect_attempts_bucket_2: stats.get_upstream_connect_attempts_bucket_2(),
connect_attempts_bucket_3_4: stats.get_upstream_connect_attempts_bucket_3_4(),
connect_attempts_bucket_gt_4: stats.get_upstream_connect_attempts_bucket_gt_4(),
connect_duration_success_bucket_le_100ms: stats
.get_upstream_connect_duration_success_bucket_le_100ms(),
connect_duration_success_bucket_101_500ms: stats
.get_upstream_connect_duration_success_bucket_101_500ms(),
connect_duration_success_bucket_501_1000ms: stats
.get_upstream_connect_duration_success_bucket_501_1000ms(),
connect_duration_success_bucket_gt_1000ms: stats
.get_upstream_connect_duration_success_bucket_gt_1000ms(),
connect_duration_fail_bucket_le_100ms: stats.get_upstream_connect_duration_fail_bucket_le_100ms(),
connect_duration_fail_bucket_101_500ms: stats
.get_upstream_connect_duration_fail_bucket_101_500ms(),
connect_duration_fail_bucket_501_1000ms: stats
.get_upstream_connect_duration_fail_bucket_501_1000ms(),
connect_duration_fail_bucket_gt_1000ms: stats
.get_upstream_connect_duration_fail_bucket_gt_1000ms(),
}
}
pub(super) fn build_upstreams_data(shared: &ApiShared, api_cfg: &ApiConfig) -> UpstreamsData {
let generated_at_epoch_secs = now_epoch_secs();
let zero = build_zero_upstream_data(&shared.stats);
if !api_cfg.minimal_runtime_enabled {
return UpstreamsData {
enabled: false,
reason: Some(FEATURE_DISABLED_REASON),
generated_at_epoch_secs,
zero,
summary: None,
upstreams: None,
};
}
let Some(snapshot) = shared.upstream_manager.try_api_snapshot() else {
return UpstreamsData {
enabled: true,
reason: Some(SOURCE_UNAVAILABLE_REASON),
generated_at_epoch_secs,
zero,
summary: None,
upstreams: None,
};
};
let summary = UpstreamSummaryData {
configured_total: snapshot.summary.configured_total,
healthy_total: snapshot.summary.healthy_total,
unhealthy_total: snapshot.summary.unhealthy_total,
direct_total: snapshot.summary.direct_total,
socks4_total: snapshot.summary.socks4_total,
socks5_total: snapshot.summary.socks5_total,
};
let upstreams = snapshot
.upstreams
.into_iter()
.map(|upstream| UpstreamStatus {
upstream_id: upstream.upstream_id,
route_kind: map_route_kind(upstream.route_kind),
address: upstream.address,
weight: upstream.weight,
scopes: upstream.scopes,
healthy: upstream.healthy,
fails: upstream.fails,
last_check_age_secs: upstream.last_check_age_secs,
effective_latency_ms: upstream.effective_latency_ms,
dc: upstream
.dc
.into_iter()
.map(|dc| UpstreamDcStatus {
dc: dc.dc,
latency_ema_ms: dc.latency_ema_ms,
ip_preference: map_ip_preference(dc.ip_preference),
})
.collect(),
})
.collect();
UpstreamsData {
enabled: true,
reason: None,
generated_at_epoch_secs,
zero,
summary: Some(summary),
upstreams: Some(upstreams),
}
}
pub(super) async fn build_minimal_all_data(
shared: &ApiShared,
api_cfg: &ApiConfig,
) -> MinimalAllData {
let now = now_epoch_secs();
if !api_cfg.minimal_runtime_enabled {
return MinimalAllData {
enabled: false,
reason: Some(FEATURE_DISABLED_REASON),
generated_at_epoch_secs: now,
data: None,
};
}
let Some((generated_at_epoch_secs, payload)) =
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
else {
return MinimalAllData {
enabled: true,
reason: Some(SOURCE_UNAVAILABLE_REASON),
generated_at_epoch_secs: now,
data: Some(MinimalAllPayload {
me_writers: disabled_me_writers(now, SOURCE_UNAVAILABLE_REASON),
dcs: disabled_dcs(now, SOURCE_UNAVAILABLE_REASON),
me_runtime: None,
network_path: Vec::new(),
}),
};
};
MinimalAllData {
enabled: true,
reason: None,
generated_at_epoch_secs,
data: Some(payload),
}
}
pub(super) async fn build_me_writers_data(
shared: &ApiShared,
api_cfg: &ApiConfig,
) -> MeWritersData {
let now = now_epoch_secs();
if !api_cfg.minimal_runtime_enabled {
return disabled_me_writers(now, FEATURE_DISABLED_REASON);
}
let Some((_, payload)) =
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
else {
return disabled_me_writers(now, SOURCE_UNAVAILABLE_REASON);
};
payload.me_writers
}
pub(super) async fn build_dcs_data(shared: &ApiShared, api_cfg: &ApiConfig) -> DcStatusData {
let now = now_epoch_secs();
if !api_cfg.minimal_runtime_enabled {
return disabled_dcs(now, FEATURE_DISABLED_REASON);
}
let Some((_, payload)) =
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
else {
return disabled_dcs(now, SOURCE_UNAVAILABLE_REASON);
};
payload.dcs
}
async fn get_minimal_payload_cached(
shared: &ApiShared,
cache_ttl_ms: u64,
) -> Option<(u64, MinimalAllPayload)> {
if cache_ttl_ms > 0 {
let now = Instant::now();
let cached = shared.minimal_cache.lock().await.clone();
if let Some(entry) = cached
&& now < entry.expires_at
{
return Some((entry.generated_at_epoch_secs, entry.payload));
}
}
let pool = shared.me_pool.as_ref()?;
let status = pool.api_status_snapshot().await;
let runtime = pool.api_runtime_snapshot().await;
let generated_at_epoch_secs = status.generated_at_epoch_secs;
let me_writers = MeWritersData {
middle_proxy_enabled: true,
reason: None,
generated_at_epoch_secs,
summary: MeWritersSummary {
configured_dc_groups: status.configured_dc_groups,
configured_endpoints: status.configured_endpoints,
available_endpoints: status.available_endpoints,
available_pct: status.available_pct,
required_writers: status.required_writers,
alive_writers: status.alive_writers,
coverage_pct: status.coverage_pct,
},
writers: status
.writers
.into_iter()
.map(|entry| MeWriterStatus {
writer_id: entry.writer_id,
dc: entry.dc,
endpoint: entry.endpoint.to_string(),
generation: entry.generation,
state: entry.state,
draining: entry.draining,
degraded: entry.degraded,
bound_clients: entry.bound_clients,
idle_for_secs: entry.idle_for_secs,
rtt_ema_ms: entry.rtt_ema_ms,
})
.collect(),
};
let dcs = DcStatusData {
middle_proxy_enabled: true,
reason: None,
generated_at_epoch_secs,
dcs: status
.dcs
.into_iter()
.map(|entry| DcStatus {
dc: entry.dc,
endpoints: entry
.endpoints
.into_iter()
.map(|value| value.to_string())
.collect(),
available_endpoints: entry.available_endpoints,
available_pct: entry.available_pct,
required_writers: entry.required_writers,
alive_writers: entry.alive_writers,
coverage_pct: entry.coverage_pct,
rtt_ms: entry.rtt_ms,
load: entry.load,
})
.collect(),
};
let me_runtime = MinimalMeRuntimeData {
active_generation: runtime.active_generation,
warm_generation: runtime.warm_generation,
pending_hardswap_generation: runtime.pending_hardswap_generation,
pending_hardswap_age_secs: runtime.pending_hardswap_age_secs,
hardswap_enabled: runtime.hardswap_enabled,
floor_mode: runtime.floor_mode,
adaptive_floor_idle_secs: runtime.adaptive_floor_idle_secs,
adaptive_floor_min_writers_single_endpoint: runtime
.adaptive_floor_min_writers_single_endpoint,
adaptive_floor_recover_grace_secs: runtime.adaptive_floor_recover_grace_secs,
me_keepalive_enabled: runtime.me_keepalive_enabled,
me_keepalive_interval_secs: runtime.me_keepalive_interval_secs,
me_keepalive_jitter_secs: runtime.me_keepalive_jitter_secs,
me_keepalive_payload_random: runtime.me_keepalive_payload_random,
rpc_proxy_req_every_secs: runtime.rpc_proxy_req_every_secs,
me_reconnect_max_concurrent_per_dc: runtime.me_reconnect_max_concurrent_per_dc,
me_reconnect_backoff_base_ms: runtime.me_reconnect_backoff_base_ms,
me_reconnect_backoff_cap_ms: runtime.me_reconnect_backoff_cap_ms,
me_reconnect_fast_retry_count: runtime.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: runtime.me_pool_drain_ttl_secs,
me_pool_force_close_secs: runtime.me_pool_force_close_secs,
me_pool_min_fresh_ratio: runtime.me_pool_min_fresh_ratio,
me_bind_stale_mode: runtime.me_bind_stale_mode,
me_bind_stale_ttl_secs: runtime.me_bind_stale_ttl_secs,
me_single_endpoint_shadow_writers: runtime.me_single_endpoint_shadow_writers,
me_single_endpoint_outage_mode_enabled: runtime.me_single_endpoint_outage_mode_enabled,
me_single_endpoint_outage_disable_quarantine: runtime
.me_single_endpoint_outage_disable_quarantine,
me_single_endpoint_outage_backoff_min_ms: runtime.me_single_endpoint_outage_backoff_min_ms,
me_single_endpoint_outage_backoff_max_ms: runtime.me_single_endpoint_outage_backoff_max_ms,
me_single_endpoint_shadow_rotate_every_secs: runtime
.me_single_endpoint_shadow_rotate_every_secs,
me_deterministic_writer_sort: runtime.me_deterministic_writer_sort,
me_socks_kdf_policy: runtime.me_socks_kdf_policy,
quarantined_endpoints_total: runtime.quarantined_endpoints.len(),
quarantined_endpoints: runtime
.quarantined_endpoints
.into_iter()
.map(|entry| MinimalQuarantineData {
endpoint: entry.endpoint.to_string(),
remaining_ms: entry.remaining_ms,
})
.collect(),
};
let network_path = runtime
.network_path
.into_iter()
.map(|entry| MinimalDcPathData {
dc: entry.dc,
ip_preference: entry.ip_preference,
selected_addr_v4: entry.selected_addr_v4.map(|value| value.to_string()),
selected_addr_v6: entry.selected_addr_v6.map(|value| value.to_string()),
})
.collect();
let payload = MinimalAllPayload {
me_writers,
dcs,
me_runtime: Some(me_runtime),
network_path,
};
if cache_ttl_ms > 0 {
let entry = MinimalCacheEntry {
expires_at: Instant::now() + Duration::from_millis(cache_ttl_ms),
payload: payload.clone(),
generated_at_epoch_secs,
};
*shared.minimal_cache.lock().await = Some(entry);
}
Some((generated_at_epoch_secs, payload))
}
fn disabled_me_writers(now_epoch_secs: u64, reason: &'static str) -> MeWritersData {
MeWritersData {
middle_proxy_enabled: false,
reason: Some(reason),
generated_at_epoch_secs: now_epoch_secs,
summary: MeWritersSummary {
configured_dc_groups: 0,
configured_endpoints: 0,
available_endpoints: 0,
available_pct: 0.0,
required_writers: 0,
alive_writers: 0,
coverage_pct: 0.0,
},
writers: Vec::new(),
}
}
fn disabled_dcs(now_epoch_secs: u64, reason: &'static str) -> DcStatusData {
DcStatusData {
middle_proxy_enabled: false,
reason: Some(reason),
generated_at_epoch_secs: now_epoch_secs,
dcs: Vec::new(),
}
}
fn map_route_kind(value: UpstreamRouteKind) -> &'static str {
match value {
UpstreamRouteKind::Direct => "direct",
UpstreamRouteKind::Socks4 => "socks4",
UpstreamRouteKind::Socks5 => "socks5",
}
}
fn map_ip_preference(value: IpPreference) -> &'static str {
match value {
IpPreference::Unknown => "unknown",
IpPreference::PreferV6 => "prefer_v6",
IpPreference::PreferV4 => "prefer_v4",
IpPreference::BothWork => "both_work",
IpPreference::Unavailable => "unavailable",
}
}
fn now_epoch_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}

499
src/api/users.rs Normal file
View File

@@ -0,0 +1,499 @@
use std::net::IpAddr;
use hyper::StatusCode;
use crate::config::ProxyConfig;
use crate::ip_tracker::UserIpTracker;
use crate::stats::Stats;
use super::ApiShared;
use super::config_store::{
ensure_expected_revision, load_config_from_disk, save_config_to_disk,
};
use super::model::{
ApiFailure, CreateUserRequest, CreateUserResponse, PatchUserRequest, RotateSecretRequest,
UserInfo, UserLinks, is_valid_ad_tag, is_valid_user_secret, is_valid_username,
parse_optional_expiration, random_user_secret,
};
pub(super) async fn create_user(
body: CreateUserRequest,
expected_revision: Option<String>,
shared: &ApiShared,
) -> Result<(CreateUserResponse, String), ApiFailure> {
if !is_valid_username(&body.username) {
return Err(ApiFailure::bad_request(
"username must match [A-Za-z0-9_.-] and be 1..64 chars",
));
}
let secret = match body.secret {
Some(secret) => {
if !is_valid_user_secret(&secret) {
return Err(ApiFailure::bad_request(
"secret must be exactly 32 hex characters",
));
}
secret
}
None => random_user_secret(),
};
if let Some(ad_tag) = body.user_ad_tag.as_ref() && !is_valid_ad_tag(ad_tag) {
return Err(ApiFailure::bad_request(
"user_ad_tag must be exactly 32 hex characters",
));
}
let expiration = parse_optional_expiration(body.expiration_rfc3339.as_deref())?;
let _guard = shared.mutation_lock.lock().await;
let mut cfg = load_config_from_disk(&shared.config_path).await?;
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
if cfg.access.users.contains_key(&body.username) {
return Err(ApiFailure::new(
StatusCode::CONFLICT,
"user_exists",
"User already exists",
));
}
cfg.access.users.insert(body.username.clone(), secret.clone());
if let Some(ad_tag) = body.user_ad_tag {
cfg.access.user_ad_tags.insert(body.username.clone(), ad_tag);
}
if let Some(limit) = body.max_tcp_conns {
cfg.access.user_max_tcp_conns.insert(body.username.clone(), limit);
}
if let Some(expiration) = expiration {
cfg.access
.user_expirations
.insert(body.username.clone(), expiration);
}
if let Some(quota) = body.data_quota_bytes {
cfg.access.user_data_quota.insert(body.username.clone(), quota);
}
let updated_limit = body.max_unique_ips;
if let Some(limit) = updated_limit {
cfg.access
.user_max_unique_ips
.insert(body.username.clone(), limit);
}
cfg.validate()
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
drop(_guard);
if let Some(limit) = updated_limit {
shared.ip_tracker.set_user_limit(&body.username, limit).await;
}
let users = users_from_config(
&cfg,
&shared.stats,
&shared.ip_tracker,
shared.startup_detected_ip_v4,
shared.startup_detected_ip_v6,
)
.await;
let user = users
.into_iter()
.find(|entry| entry.username == body.username)
.unwrap_or(UserInfo {
username: body.username.clone(),
user_ad_tag: None,
max_tcp_conns: None,
expiration_rfc3339: None,
data_quota_bytes: None,
max_unique_ips: updated_limit,
current_connections: 0,
active_unique_ips: 0,
active_unique_ips_list: Vec::new(),
recent_unique_ips: 0,
recent_unique_ips_list: Vec::new(),
total_octets: 0,
links: build_user_links(
&cfg,
&secret,
shared.startup_detected_ip_v4,
shared.startup_detected_ip_v6,
),
});
Ok((CreateUserResponse { user, secret }, revision))
}
pub(super) async fn patch_user(
user: &str,
body: PatchUserRequest,
expected_revision: Option<String>,
shared: &ApiShared,
) -> Result<(UserInfo, String), ApiFailure> {
if let Some(secret) = body.secret.as_ref() && !is_valid_user_secret(secret) {
return Err(ApiFailure::bad_request(
"secret must be exactly 32 hex characters",
));
}
if let Some(ad_tag) = body.user_ad_tag.as_ref() && !is_valid_ad_tag(ad_tag) {
return Err(ApiFailure::bad_request(
"user_ad_tag must be exactly 32 hex characters",
));
}
let expiration = parse_optional_expiration(body.expiration_rfc3339.as_deref())?;
let _guard = shared.mutation_lock.lock().await;
let mut cfg = load_config_from_disk(&shared.config_path).await?;
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
if !cfg.access.users.contains_key(user) {
return Err(ApiFailure::new(
StatusCode::NOT_FOUND,
"not_found",
"User not found",
));
}
if let Some(secret) = body.secret {
cfg.access.users.insert(user.to_string(), secret);
}
if let Some(ad_tag) = body.user_ad_tag {
cfg.access.user_ad_tags.insert(user.to_string(), ad_tag);
}
if let Some(limit) = body.max_tcp_conns {
cfg.access.user_max_tcp_conns.insert(user.to_string(), limit);
}
if let Some(expiration) = expiration {
cfg.access.user_expirations.insert(user.to_string(), expiration);
}
if let Some(quota) = body.data_quota_bytes {
cfg.access.user_data_quota.insert(user.to_string(), quota);
}
let mut updated_limit = None;
if let Some(limit) = body.max_unique_ips {
cfg.access.user_max_unique_ips.insert(user.to_string(), limit);
updated_limit = Some(limit);
}
cfg.validate()
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
drop(_guard);
if let Some(limit) = updated_limit {
shared.ip_tracker.set_user_limit(user, limit).await;
}
let users = users_from_config(
&cfg,
&shared.stats,
&shared.ip_tracker,
shared.startup_detected_ip_v4,
shared.startup_detected_ip_v6,
)
.await;
let user_info = users
.into_iter()
.find(|entry| entry.username == user)
.ok_or_else(|| ApiFailure::internal("failed to build updated user view"))?;
Ok((user_info, revision))
}
pub(super) async fn rotate_secret(
user: &str,
body: RotateSecretRequest,
expected_revision: Option<String>,
shared: &ApiShared,
) -> Result<(CreateUserResponse, String), ApiFailure> {
let secret = body.secret.unwrap_or_else(random_user_secret);
if !is_valid_user_secret(&secret) {
return Err(ApiFailure::bad_request(
"secret must be exactly 32 hex characters",
));
}
let _guard = shared.mutation_lock.lock().await;
let mut cfg = load_config_from_disk(&shared.config_path).await?;
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
if !cfg.access.users.contains_key(user) {
return Err(ApiFailure::new(
StatusCode::NOT_FOUND,
"not_found",
"User not found",
));
}
cfg.access.users.insert(user.to_string(), secret.clone());
cfg.validate()
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
drop(_guard);
let users = users_from_config(
&cfg,
&shared.stats,
&shared.ip_tracker,
shared.startup_detected_ip_v4,
shared.startup_detected_ip_v6,
)
.await;
let user_info = users
.into_iter()
.find(|entry| entry.username == user)
.ok_or_else(|| ApiFailure::internal("failed to build updated user view"))?;
Ok((
CreateUserResponse {
user: user_info,
secret,
},
revision,
))
}
pub(super) async fn delete_user(
user: &str,
expected_revision: Option<String>,
shared: &ApiShared,
) -> Result<(String, String), ApiFailure> {
let _guard = shared.mutation_lock.lock().await;
let mut cfg = load_config_from_disk(&shared.config_path).await?;
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
if !cfg.access.users.contains_key(user) {
return Err(ApiFailure::new(
StatusCode::NOT_FOUND,
"not_found",
"User not found",
));
}
if cfg.access.users.len() <= 1 {
return Err(ApiFailure::new(
StatusCode::CONFLICT,
"last_user_forbidden",
"Cannot delete the last configured user",
));
}
cfg.access.users.remove(user);
cfg.access.user_ad_tags.remove(user);
cfg.access.user_max_tcp_conns.remove(user);
cfg.access.user_expirations.remove(user);
cfg.access.user_data_quota.remove(user);
cfg.access.user_max_unique_ips.remove(user);
cfg.validate()
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
drop(_guard);
shared.ip_tracker.remove_user_limit(user).await;
shared.ip_tracker.clear_user_ips(user).await;
Ok((user.to_string(), revision))
}
pub(super) async fn users_from_config(
cfg: &ProxyConfig,
stats: &Stats,
ip_tracker: &UserIpTracker,
startup_detected_ip_v4: Option<IpAddr>,
startup_detected_ip_v6: Option<IpAddr>,
) -> Vec<UserInfo> {
let mut names = cfg.access.users.keys().cloned().collect::<Vec<_>>();
names.sort();
let active_ip_lists = ip_tracker.get_active_ips_for_users(&names).await;
let recent_ip_lists = ip_tracker.get_recent_ips_for_users(&names).await;
let mut users = Vec::with_capacity(names.len());
for username in names {
let active_ip_list = active_ip_lists
.get(&username)
.cloned()
.unwrap_or_else(Vec::new);
let recent_ip_list = recent_ip_lists
.get(&username)
.cloned()
.unwrap_or_else(Vec::new);
let links = cfg
.access
.users
.get(&username)
.map(|secret| {
build_user_links(
cfg,
secret,
startup_detected_ip_v4,
startup_detected_ip_v6,
)
})
.unwrap_or(UserLinks {
classic: Vec::new(),
secure: Vec::new(),
tls: Vec::new(),
});
users.push(UserInfo {
user_ad_tag: cfg.access.user_ad_tags.get(&username).cloned(),
max_tcp_conns: cfg.access.user_max_tcp_conns.get(&username).copied(),
expiration_rfc3339: cfg
.access
.user_expirations
.get(&username)
.map(chrono::DateTime::<chrono::Utc>::to_rfc3339),
data_quota_bytes: cfg.access.user_data_quota.get(&username).copied(),
max_unique_ips: cfg.access.user_max_unique_ips.get(&username).copied(),
current_connections: stats.get_user_curr_connects(&username),
active_unique_ips: active_ip_list.len(),
active_unique_ips_list: active_ip_list,
recent_unique_ips: recent_ip_list.len(),
recent_unique_ips_list: recent_ip_list,
total_octets: stats.get_user_total_octets(&username),
links,
username,
});
}
users
}
fn build_user_links(
cfg: &ProxyConfig,
secret: &str,
startup_detected_ip_v4: Option<IpAddr>,
startup_detected_ip_v6: Option<IpAddr>,
) -> UserLinks {
let hosts = resolve_link_hosts(cfg, startup_detected_ip_v4, startup_detected_ip_v6);
let port = cfg.general.links.public_port.unwrap_or(cfg.server.port);
let tls_domains = resolve_tls_domains(cfg);
let mut classic = Vec::new();
let mut secure = Vec::new();
let mut tls = Vec::new();
for host in &hosts {
if cfg.general.modes.classic {
classic.push(format!(
"tg://proxy?server={}&port={}&secret={}",
host, port, secret
));
}
if cfg.general.modes.secure {
secure.push(format!(
"tg://proxy?server={}&port={}&secret=dd{}",
host, port, secret
));
}
if cfg.general.modes.tls {
for domain in &tls_domains {
let domain_hex = hex::encode(domain);
tls.push(format!(
"tg://proxy?server={}&port={}&secret=ee{}{}",
host, port, secret, domain_hex
));
}
}
}
UserLinks {
classic,
secure,
tls,
}
}
fn resolve_link_hosts(
cfg: &ProxyConfig,
startup_detected_ip_v4: Option<IpAddr>,
startup_detected_ip_v6: Option<IpAddr>,
) -> Vec<String> {
if let Some(host) = cfg
.general
.links
.public_host
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
return vec![host.to_string()];
}
let mut startup_hosts = Vec::new();
if let Some(ip) = startup_detected_ip_v4 {
push_unique_host(&mut startup_hosts, &ip.to_string());
}
if let Some(ip) = startup_detected_ip_v6 {
push_unique_host(&mut startup_hosts, &ip.to_string());
}
if !startup_hosts.is_empty() {
return startup_hosts;
}
let mut hosts = Vec::new();
for listener in &cfg.server.listeners {
if let Some(host) = listener
.announce
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
push_unique_host(&mut hosts, host);
continue;
}
if let Some(ip) = listener.announce_ip {
if !ip.is_unspecified() {
push_unique_host(&mut hosts, &ip.to_string());
}
continue;
}
if !listener.ip.is_unspecified() {
push_unique_host(&mut hosts, &listener.ip.to_string());
}
}
if hosts.is_empty() {
if let Some(host) = cfg.server.listen_addr_ipv4.as_deref() {
push_host_from_legacy_listen(&mut hosts, host);
}
if let Some(host) = cfg.server.listen_addr_ipv6.as_deref() {
push_host_from_legacy_listen(&mut hosts, host);
}
}
hosts
}
fn push_host_from_legacy_listen(hosts: &mut Vec<String>, raw: &str) {
let candidate = raw.trim();
if candidate.is_empty() {
return;
}
match candidate.parse::<IpAddr>() {
Ok(ip) if ip.is_unspecified() => {}
Ok(ip) => push_unique_host(hosts, &ip.to_string()),
Err(_) => push_unique_host(hosts, candidate),
}
}
fn push_unique_host(hosts: &mut Vec<String>, candidate: &str) {
if !hosts.iter().any(|existing| existing == candidate) {
hosts.push(candidate.to_string());
}
}
fn resolve_tls_domains(cfg: &ProxyConfig) -> Vec<&str> {
let mut domains = Vec::with_capacity(1 + cfg.censorship.tls_domains.len());
let primary = cfg.censorship.tls_domain.as_str();
if !primary.is_empty() {
domains.push(primary);
}
for domain in &cfg.censorship.tls_domains {
let value = domain.as_str();
if value.is_empty() || domains.contains(&value) {
continue;
}
domains.push(value);
}
domains
}

View File

@@ -8,8 +8,13 @@ const DEFAULT_STUN_TCP_FALLBACK: bool = true;
const DEFAULT_MIDDLE_PROXY_WARM_STANDBY: usize = 16;
const DEFAULT_ME_RECONNECT_MAX_CONCURRENT_PER_DC: u32 = 8;
const DEFAULT_ME_RECONNECT_FAST_RETRY_COUNT: u32 = 16;
const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 3;
const DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD: u32 = 4;
const DEFAULT_ME_SINGLE_ENDPOINT_SHADOW_WRITERS: u8 = 2;
const DEFAULT_ME_ADAPTIVE_FLOOR_IDLE_SECS: u64 = 90;
const DEFAULT_ME_ADAPTIVE_FLOOR_MIN_WRITERS_SINGLE_ENDPOINT: u8 = 1;
const DEFAULT_ME_ADAPTIVE_FLOOR_RECOVER_GRACE_SECS: u64 = 180;
const DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS: u64 = 30;
const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 2;
const DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD: u32 = 5;
const DEFAULT_LISTEN_ADDR_IPV6: &str = "::";
const DEFAULT_ACCESS_USER: &str = "default";
const DEFAULT_ACCESS_SECRET: &str = "00000000000000000000000000000000";
@@ -88,6 +93,26 @@ pub(crate) fn default_metrics_whitelist() -> Vec<IpNetwork> {
]
}
pub(crate) fn default_api_listen() -> String {
"127.0.0.1:9091".to_string()
}
pub(crate) fn default_api_whitelist() -> Vec<IpNetwork> {
default_metrics_whitelist()
}
pub(crate) fn default_api_request_body_limit_bytes() -> usize {
64 * 1024
}
pub(crate) fn default_api_minimal_runtime_enabled() -> bool {
false
}
pub(crate) fn default_api_minimal_runtime_cache_ttl_ms() -> u64 {
1000
}
pub(crate) fn default_prefer_4() -> u8 {
4
}
@@ -104,6 +129,10 @@ pub(crate) fn default_unknown_dc_log_path() -> Option<String> {
Some("unknown-dc.txt".to_string())
}
pub(crate) fn default_unknown_dc_file_log_enabled() -> bool {
false
}
pub(crate) fn default_pool_size() -> usize {
8
}
@@ -112,6 +141,14 @@ pub(crate) fn default_proxy_secret_path() -> Option<String> {
Some("proxy-secret".to_string())
}
pub(crate) fn default_proxy_config_v4_cache_path() -> Option<String> {
Some("cache/proxy-config-v4.txt".to_string())
}
pub(crate) fn default_proxy_config_v6_cache_path() -> Option<String> {
Some("cache/proxy-config-v6.txt".to_string())
}
pub(crate) fn default_middle_proxy_nat_stun() -> Option<String> {
None
}
@@ -128,12 +165,20 @@ pub(crate) fn default_middle_proxy_warm_standby() -> usize {
DEFAULT_MIDDLE_PROXY_WARM_STANDBY
}
pub(crate) fn default_me_init_retry_attempts() -> u32 {
0
}
pub(crate) fn default_me2dc_fallback() -> bool {
true
}
pub(crate) fn default_keepalive_interval() -> u64 {
25
8
}
pub(crate) fn default_keepalive_jitter() -> u64 {
5
2
}
pub(crate) fn default_warmup_step_delay_ms() -> u64 {
@@ -160,18 +205,62 @@ pub(crate) fn default_me_reconnect_fast_retry_count() -> u32 {
DEFAULT_ME_RECONNECT_FAST_RETRY_COUNT
}
pub(crate) fn default_me_single_endpoint_shadow_writers() -> u8 {
DEFAULT_ME_SINGLE_ENDPOINT_SHADOW_WRITERS
}
pub(crate) fn default_me_single_endpoint_outage_mode_enabled() -> bool {
true
}
pub(crate) fn default_me_single_endpoint_outage_disable_quarantine() -> bool {
true
}
pub(crate) fn default_me_single_endpoint_outage_backoff_min_ms() -> u64 {
250
}
pub(crate) fn default_me_single_endpoint_outage_backoff_max_ms() -> u64 {
3000
}
pub(crate) fn default_me_single_endpoint_shadow_rotate_every_secs() -> u64 {
900
}
pub(crate) fn default_me_adaptive_floor_idle_secs() -> u64 {
DEFAULT_ME_ADAPTIVE_FLOOR_IDLE_SECS
}
pub(crate) fn default_me_adaptive_floor_min_writers_single_endpoint() -> u8 {
DEFAULT_ME_ADAPTIVE_FLOOR_MIN_WRITERS_SINGLE_ENDPOINT
}
pub(crate) fn default_me_adaptive_floor_recover_grace_secs() -> u64 {
DEFAULT_ME_ADAPTIVE_FLOOR_RECOVER_GRACE_SECS
}
pub(crate) fn default_upstream_connect_retry_attempts() -> u32 {
DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS
}
pub(crate) fn default_upstream_connect_retry_backoff_ms() -> u64 {
250
100
}
pub(crate) fn default_upstream_unhealthy_fail_threshold() -> u32 {
DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD
}
pub(crate) fn default_upstream_connect_failfast_hard_errors() -> bool {
false
}
pub(crate) fn default_rpc_proxy_req_every() -> u64 {
0
}
pub(crate) fn default_crypto_pending_buffer() -> usize {
256 * 1024
}
@@ -196,6 +285,18 @@ pub(crate) fn default_me_route_backpressure_high_watermark_pct() -> u8 {
80
}
pub(crate) fn default_me_route_no_writer_wait_ms() -> u64 {
250
}
pub(crate) fn default_me_route_inline_recovery_attempts() -> u32 {
3
}
pub(crate) fn default_me_route_inline_recovery_wait_ms() -> u64 {
3000
}
pub(crate) fn default_beobachten_minutes() -> u64 {
10
}
@@ -277,6 +378,18 @@ pub(crate) fn default_me_reinit_every_secs() -> u64 {
15 * 60
}
pub(crate) fn default_me_reinit_singleflight() -> bool {
true
}
pub(crate) fn default_me_reinit_trigger_channel() -> usize {
64
}
pub(crate) fn default_me_reinit_coalesce_window_ms() -> u64 {
200
}
pub(crate) fn default_me_hardswap_warmup_delay_min_ms() -> u64 {
1000
}
@@ -301,6 +414,18 @@ pub(crate) fn default_me_config_apply_cooldown_secs() -> u64 {
300
}
pub(crate) fn default_me_snapshot_require_http_2xx() -> bool {
true
}
pub(crate) fn default_me_snapshot_reject_empty_map() -> bool {
true
}
pub(crate) fn default_me_snapshot_min_proxy_for_lines() -> u32 {
1
}
pub(crate) fn default_proxy_secret_stable_snapshots() -> u8 {
2
}
@@ -309,6 +434,10 @@ pub(crate) fn default_proxy_secret_rotate_runtime() -> bool {
true
}
pub(crate) fn default_me_secret_atomic_snapshot() -> bool {
true
}
pub(crate) fn default_proxy_secret_len_max() -> usize {
256
}
@@ -321,10 +450,18 @@ pub(crate) fn default_me_pool_drain_ttl_secs() -> u64 {
90
}
pub(crate) fn default_me_bind_stale_ttl_secs() -> u64 {
default_me_pool_drain_ttl_secs()
}
pub(crate) fn default_me_pool_min_fresh_ratio() -> f32 {
0.8
}
pub(crate) fn default_me_deterministic_writer_sort() -> bool {
true
}
pub(crate) fn default_hardswap() -> bool {
true
}
@@ -360,6 +497,10 @@ pub(crate) fn default_access_users() -> HashMap<String, String> {
)])
}
pub(crate) fn default_user_max_unique_ips_window_secs() -> u64 {
DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS
}
// Custom deserializer helpers
#[derive(Deserialize)]

View File

@@ -4,24 +4,22 @@
//!
//! # What can be reloaded without restart
//!
//! | Section | Field | Effect |
//! |-----------|-------------------------------|-----------------------------------|
//! | `general` | `log_level` | Filter updated via `log_level_tx` |
//! | `general` | `ad_tag` | Passed on next connection |
//! | `general` | `middle_proxy_pool_size` | Passed on next connection |
//! | `general` | `me_keepalive_*` | Passed on next connection |
//! | `general` | `desync_all_full` | Applied immediately |
//! | `general` | `update_every` | Applied to ME updater immediately |
//! | `general` | `hardswap` | Applied on next ME map update |
//! | `general` | `me_pool_drain_ttl_secs` | Applied on next ME map update |
//! | `general` | `me_pool_min_fresh_ratio` | Applied on next ME map update |
//! | `general` | `me_reinit_drain_timeout_secs`| Applied on next ME map update |
//! | `general` | `telemetry` / `me_*_policy` | Applied immediately |
//! | `network` | `dns_overrides` | Applied immediately |
//! | `access` | All user/quota fields | Effective immediately |
//! | Section | Field | Effect |
//! |-----------|--------------------------------|------------------------------------------------|
//! | `general` | `log_level` | Filter updated via `log_level_tx` |
//! | `access` | `user_ad_tags` | Passed on next connection |
//! | `general` | `ad_tag` | Passed on next connection (fallback per-user) |
//! | `general` | `desync_all_full` | Applied immediately |
//! | `general` | `update_every` | Applied to ME updater immediately |
//! | `general` | `me_reinit_*` | Applied to ME reinit scheduler immediately |
//! | `general` | `hardswap` / `me_*_reinit` | Applied on next ME map update |
//! | `general` | `telemetry` / `me_*_policy` | Applied immediately |
//! | `network` | `dns_overrides` | Applied immediately |
//! | `access` | All user/quota fields | Effective immediately |
//!
//! Fields that require re-binding sockets (`server.port`, `censorship.*`,
//! `network.*`, `use_middle_proxy`) are **not** applied; a warning is emitted.
//! Non-hot changes are never mixed into the runtime config snapshot.
use std::net::IpAddr;
use std::path::PathBuf;
@@ -31,7 +29,7 @@ use notify::{EventKind, RecursiveMode, Watcher, recommended_watcher};
use tokio::sync::{mpsc, watch};
use tracing::{error, info, warn};
use crate::config::{LogLevel, MeSocksKdfPolicy, MeTelemetryLevel};
use crate::config::{LogLevel, MeBindStaleMode, MeFloorMode, MeSocksKdfPolicy, MeTelemetryLevel};
use super::load::ProxyConfig;
// ── Hot fields ────────────────────────────────────────────────────────────────
@@ -42,25 +40,56 @@ pub struct HotFields {
pub log_level: LogLevel,
pub ad_tag: Option<String>,
pub dns_overrides: Vec<String>,
pub middle_proxy_pool_size: usize,
pub desync_all_full: bool,
pub update_every_secs: u64,
pub me_reinit_every_secs: u64,
pub me_reinit_singleflight: bool,
pub me_reinit_coalesce_window_ms: u64,
pub hardswap: bool,
pub me_pool_drain_ttl_secs: u64,
pub me_pool_min_fresh_ratio: f32,
pub me_reinit_drain_timeout_secs: u64,
pub me_keepalive_enabled: bool,
pub me_keepalive_interval_secs: u64,
pub me_keepalive_jitter_secs: u64,
pub me_keepalive_payload_random: bool,
pub me_hardswap_warmup_delay_min_ms: u64,
pub me_hardswap_warmup_delay_max_ms: u64,
pub me_hardswap_warmup_extra_passes: u8,
pub me_hardswap_warmup_pass_backoff_base_ms: u64,
pub me_bind_stale_mode: MeBindStaleMode,
pub me_bind_stale_ttl_secs: u64,
pub me_secret_atomic_snapshot: bool,
pub me_deterministic_writer_sort: bool,
pub me_single_endpoint_shadow_writers: u8,
pub me_single_endpoint_outage_mode_enabled: bool,
pub me_single_endpoint_outage_disable_quarantine: bool,
pub me_single_endpoint_outage_backoff_min_ms: u64,
pub me_single_endpoint_outage_backoff_max_ms: u64,
pub me_single_endpoint_shadow_rotate_every_secs: u64,
pub me_config_stable_snapshots: u8,
pub me_config_apply_cooldown_secs: u64,
pub me_snapshot_require_http_2xx: bool,
pub me_snapshot_reject_empty_map: bool,
pub me_snapshot_min_proxy_for_lines: u32,
pub proxy_secret_stable_snapshots: u8,
pub proxy_secret_rotate_runtime: bool,
pub proxy_secret_len_max: usize,
pub telemetry_core_enabled: bool,
pub telemetry_user_enabled: bool,
pub telemetry_me_level: MeTelemetryLevel,
pub me_socks_kdf_policy: MeSocksKdfPolicy,
pub me_floor_mode: MeFloorMode,
pub me_adaptive_floor_idle_secs: u64,
pub me_adaptive_floor_min_writers_single_endpoint: u8,
pub me_adaptive_floor_recover_grace_secs: u64,
pub me_route_backpressure_base_timeout_ms: u64,
pub me_route_backpressure_high_timeout_ms: u64,
pub me_route_backpressure_high_watermark_pct: u8,
pub access: crate::config::AccessConfig,
pub users: std::collections::HashMap<String, String>,
pub user_ad_tags: std::collections::HashMap<String, String>,
pub user_max_tcp_conns: std::collections::HashMap<String, usize>,
pub user_expirations: std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
pub user_data_quota: std::collections::HashMap<String, u64>,
pub user_max_unique_ips: std::collections::HashMap<String, usize>,
pub user_max_unique_ips_mode: crate::config::UserMaxUniqueIpsMode,
pub user_max_unique_ips_window_secs: u64,
}
impl HotFields {
@@ -69,62 +98,342 @@ impl HotFields {
log_level: cfg.general.log_level.clone(),
ad_tag: cfg.general.ad_tag.clone(),
dns_overrides: cfg.network.dns_overrides.clone(),
middle_proxy_pool_size: cfg.general.middle_proxy_pool_size,
desync_all_full: cfg.general.desync_all_full,
update_every_secs: cfg.general.effective_update_every_secs(),
me_reinit_every_secs: cfg.general.me_reinit_every_secs,
me_reinit_singleflight: cfg.general.me_reinit_singleflight,
me_reinit_coalesce_window_ms: cfg.general.me_reinit_coalesce_window_ms,
hardswap: cfg.general.hardswap,
me_pool_drain_ttl_secs: cfg.general.me_pool_drain_ttl_secs,
me_pool_min_fresh_ratio: cfg.general.me_pool_min_fresh_ratio,
me_reinit_drain_timeout_secs: cfg.general.me_reinit_drain_timeout_secs,
me_keepalive_enabled: cfg.general.me_keepalive_enabled,
me_keepalive_interval_secs: cfg.general.me_keepalive_interval_secs,
me_keepalive_jitter_secs: cfg.general.me_keepalive_jitter_secs,
me_keepalive_payload_random: cfg.general.me_keepalive_payload_random,
me_hardswap_warmup_delay_min_ms: cfg.general.me_hardswap_warmup_delay_min_ms,
me_hardswap_warmup_delay_max_ms: cfg.general.me_hardswap_warmup_delay_max_ms,
me_hardswap_warmup_extra_passes: cfg.general.me_hardswap_warmup_extra_passes,
me_hardswap_warmup_pass_backoff_base_ms: cfg
.general
.me_hardswap_warmup_pass_backoff_base_ms,
me_bind_stale_mode: cfg.general.me_bind_stale_mode,
me_bind_stale_ttl_secs: cfg.general.me_bind_stale_ttl_secs,
me_secret_atomic_snapshot: cfg.general.me_secret_atomic_snapshot,
me_deterministic_writer_sort: cfg.general.me_deterministic_writer_sort,
me_single_endpoint_shadow_writers: cfg.general.me_single_endpoint_shadow_writers,
me_single_endpoint_outage_mode_enabled: cfg
.general
.me_single_endpoint_outage_mode_enabled,
me_single_endpoint_outage_disable_quarantine: cfg
.general
.me_single_endpoint_outage_disable_quarantine,
me_single_endpoint_outage_backoff_min_ms: cfg
.general
.me_single_endpoint_outage_backoff_min_ms,
me_single_endpoint_outage_backoff_max_ms: cfg
.general
.me_single_endpoint_outage_backoff_max_ms,
me_single_endpoint_shadow_rotate_every_secs: cfg
.general
.me_single_endpoint_shadow_rotate_every_secs,
me_config_stable_snapshots: cfg.general.me_config_stable_snapshots,
me_config_apply_cooldown_secs: cfg.general.me_config_apply_cooldown_secs,
me_snapshot_require_http_2xx: cfg.general.me_snapshot_require_http_2xx,
me_snapshot_reject_empty_map: cfg.general.me_snapshot_reject_empty_map,
me_snapshot_min_proxy_for_lines: cfg.general.me_snapshot_min_proxy_for_lines,
proxy_secret_stable_snapshots: cfg.general.proxy_secret_stable_snapshots,
proxy_secret_rotate_runtime: cfg.general.proxy_secret_rotate_runtime,
proxy_secret_len_max: cfg.general.proxy_secret_len_max,
telemetry_core_enabled: cfg.general.telemetry.core_enabled,
telemetry_user_enabled: cfg.general.telemetry.user_enabled,
telemetry_me_level: cfg.general.telemetry.me_level,
me_socks_kdf_policy: cfg.general.me_socks_kdf_policy,
me_floor_mode: cfg.general.me_floor_mode,
me_adaptive_floor_idle_secs: cfg.general.me_adaptive_floor_idle_secs,
me_adaptive_floor_min_writers_single_endpoint: cfg
.general
.me_adaptive_floor_min_writers_single_endpoint,
me_adaptive_floor_recover_grace_secs: cfg
.general
.me_adaptive_floor_recover_grace_secs,
me_route_backpressure_base_timeout_ms: cfg.general.me_route_backpressure_base_timeout_ms,
me_route_backpressure_high_timeout_ms: cfg.general.me_route_backpressure_high_timeout_ms,
me_route_backpressure_high_watermark_pct: cfg.general.me_route_backpressure_high_watermark_pct,
access: cfg.access.clone(),
users: cfg.access.users.clone(),
user_ad_tags: cfg.access.user_ad_tags.clone(),
user_max_tcp_conns: cfg.access.user_max_tcp_conns.clone(),
user_expirations: cfg.access.user_expirations.clone(),
user_data_quota: cfg.access.user_data_quota.clone(),
user_max_unique_ips: cfg.access.user_max_unique_ips.clone(),
user_max_unique_ips_mode: cfg.access.user_max_unique_ips_mode,
user_max_unique_ips_window_secs: cfg.access.user_max_unique_ips_window_secs,
}
}
}
// ── Helpers ───────────────────────────────────────────────────────────────────
fn canonicalize_json(value: &mut serde_json::Value) {
match value {
serde_json::Value::Object(map) => {
let mut pairs: Vec<(String, serde_json::Value)> =
std::mem::take(map).into_iter().collect();
pairs.sort_by(|a, b| a.0.cmp(&b.0));
for (_, item) in pairs.iter_mut() {
canonicalize_json(item);
}
for (key, item) in pairs {
map.insert(key, item);
}
}
serde_json::Value::Array(items) => {
for item in items {
canonicalize_json(item);
}
}
_ => {}
}
}
fn config_equal(lhs: &ProxyConfig, rhs: &ProxyConfig) -> bool {
let mut left = match serde_json::to_value(lhs) {
Ok(value) => value,
Err(_) => return false,
};
let mut right = match serde_json::to_value(rhs) {
Ok(value) => value,
Err(_) => return false,
};
canonicalize_json(&mut left);
canonicalize_json(&mut right);
left == right
}
fn listeners_equal(
lhs: &[crate::config::ListenerConfig],
rhs: &[crate::config::ListenerConfig],
) -> bool {
if lhs.len() != rhs.len() {
return false;
}
lhs.iter().zip(rhs.iter()).all(|(a, b)| {
a.ip == b.ip
&& a.announce == b.announce
&& a.announce_ip == b.announce_ip
&& a.proxy_protocol == b.proxy_protocol
&& a.reuse_allow == b.reuse_allow
})
}
fn overlay_hot_fields(old: &ProxyConfig, new: &ProxyConfig) -> ProxyConfig {
let mut cfg = old.clone();
cfg.general.log_level = new.general.log_level.clone();
cfg.general.ad_tag = new.general.ad_tag.clone();
cfg.network.dns_overrides = new.network.dns_overrides.clone();
cfg.general.desync_all_full = new.general.desync_all_full;
cfg.general.update_every = new.general.update_every;
cfg.general.proxy_secret_auto_reload_secs = new.general.proxy_secret_auto_reload_secs;
cfg.general.proxy_config_auto_reload_secs = new.general.proxy_config_auto_reload_secs;
cfg.general.me_reinit_every_secs = new.general.me_reinit_every_secs;
cfg.general.me_reinit_singleflight = new.general.me_reinit_singleflight;
cfg.general.me_reinit_coalesce_window_ms = new.general.me_reinit_coalesce_window_ms;
cfg.general.hardswap = new.general.hardswap;
cfg.general.me_pool_drain_ttl_secs = new.general.me_pool_drain_ttl_secs;
cfg.general.me_pool_min_fresh_ratio = new.general.me_pool_min_fresh_ratio;
cfg.general.me_reinit_drain_timeout_secs = new.general.me_reinit_drain_timeout_secs;
cfg.general.me_hardswap_warmup_delay_min_ms = new.general.me_hardswap_warmup_delay_min_ms;
cfg.general.me_hardswap_warmup_delay_max_ms = new.general.me_hardswap_warmup_delay_max_ms;
cfg.general.me_hardswap_warmup_extra_passes = new.general.me_hardswap_warmup_extra_passes;
cfg.general.me_hardswap_warmup_pass_backoff_base_ms =
new.general.me_hardswap_warmup_pass_backoff_base_ms;
cfg.general.me_bind_stale_mode = new.general.me_bind_stale_mode;
cfg.general.me_bind_stale_ttl_secs = new.general.me_bind_stale_ttl_secs;
cfg.general.me_secret_atomic_snapshot = new.general.me_secret_atomic_snapshot;
cfg.general.me_deterministic_writer_sort = new.general.me_deterministic_writer_sort;
cfg.general.me_single_endpoint_shadow_writers = new.general.me_single_endpoint_shadow_writers;
cfg.general.me_single_endpoint_outage_mode_enabled =
new.general.me_single_endpoint_outage_mode_enabled;
cfg.general.me_single_endpoint_outage_disable_quarantine =
new.general.me_single_endpoint_outage_disable_quarantine;
cfg.general.me_single_endpoint_outage_backoff_min_ms =
new.general.me_single_endpoint_outage_backoff_min_ms;
cfg.general.me_single_endpoint_outage_backoff_max_ms =
new.general.me_single_endpoint_outage_backoff_max_ms;
cfg.general.me_single_endpoint_shadow_rotate_every_secs =
new.general.me_single_endpoint_shadow_rotate_every_secs;
cfg.general.me_config_stable_snapshots = new.general.me_config_stable_snapshots;
cfg.general.me_config_apply_cooldown_secs = new.general.me_config_apply_cooldown_secs;
cfg.general.me_snapshot_require_http_2xx = new.general.me_snapshot_require_http_2xx;
cfg.general.me_snapshot_reject_empty_map = new.general.me_snapshot_reject_empty_map;
cfg.general.me_snapshot_min_proxy_for_lines = new.general.me_snapshot_min_proxy_for_lines;
cfg.general.proxy_secret_stable_snapshots = new.general.proxy_secret_stable_snapshots;
cfg.general.proxy_secret_rotate_runtime = new.general.proxy_secret_rotate_runtime;
cfg.general.proxy_secret_len_max = new.general.proxy_secret_len_max;
cfg.general.telemetry = new.general.telemetry.clone();
cfg.general.me_socks_kdf_policy = new.general.me_socks_kdf_policy;
cfg.general.me_floor_mode = new.general.me_floor_mode;
cfg.general.me_adaptive_floor_idle_secs = new.general.me_adaptive_floor_idle_secs;
cfg.general.me_adaptive_floor_min_writers_single_endpoint =
new.general.me_adaptive_floor_min_writers_single_endpoint;
cfg.general.me_adaptive_floor_recover_grace_secs =
new.general.me_adaptive_floor_recover_grace_secs;
cfg.general.me_route_backpressure_base_timeout_ms =
new.general.me_route_backpressure_base_timeout_ms;
cfg.general.me_route_backpressure_high_timeout_ms =
new.general.me_route_backpressure_high_timeout_ms;
cfg.general.me_route_backpressure_high_watermark_pct =
new.general.me_route_backpressure_high_watermark_pct;
cfg.access.users = new.access.users.clone();
cfg.access.user_ad_tags = new.access.user_ad_tags.clone();
cfg.access.user_max_tcp_conns = new.access.user_max_tcp_conns.clone();
cfg.access.user_expirations = new.access.user_expirations.clone();
cfg.access.user_data_quota = new.access.user_data_quota.clone();
cfg.access.user_max_unique_ips = new.access.user_max_unique_ips.clone();
cfg.access.user_max_unique_ips_mode = new.access.user_max_unique_ips_mode;
cfg.access.user_max_unique_ips_window_secs = new.access.user_max_unique_ips_window_secs;
cfg
}
/// Warn if any non-hot fields changed (require restart).
fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig) {
fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: bool) {
let mut warned = false;
if old.server.port != new.server.port {
warned = true;
warn!(
"config reload: server.port changed ({} → {}); restart required",
old.server.port, new.server.port
);
}
if old.server.api.enabled != new.server.api.enabled
|| old.server.api.listen != new.server.api.listen
|| old.server.api.whitelist != new.server.api.whitelist
|| old.server.api.auth_header != new.server.api.auth_header
|| old.server.api.request_body_limit_bytes != new.server.api.request_body_limit_bytes
|| old.server.api.minimal_runtime_enabled != new.server.api.minimal_runtime_enabled
|| old.server.api.minimal_runtime_cache_ttl_ms
!= new.server.api.minimal_runtime_cache_ttl_ms
|| old.server.api.read_only != new.server.api.read_only
{
warned = true;
warn!("config reload: server.api changed; restart required");
}
if old.server.proxy_protocol != new.server.proxy_protocol
|| !listeners_equal(&old.server.listeners, &new.server.listeners)
|| old.server.listen_addr_ipv4 != new.server.listen_addr_ipv4
|| old.server.listen_addr_ipv6 != new.server.listen_addr_ipv6
|| old.server.listen_tcp != new.server.listen_tcp
|| old.server.listen_unix_sock != new.server.listen_unix_sock
|| old.server.listen_unix_sock_perm != new.server.listen_unix_sock_perm
{
warned = true;
warn!("config reload: server listener settings changed; restart required");
}
if old.censorship.tls_domain != new.censorship.tls_domain
|| old.censorship.tls_domains != new.censorship.tls_domains
|| old.censorship.mask != new.censorship.mask
|| old.censorship.mask_host != new.censorship.mask_host
|| old.censorship.mask_port != new.censorship.mask_port
|| old.censorship.mask_unix_sock != new.censorship.mask_unix_sock
|| old.censorship.fake_cert_len != new.censorship.fake_cert_len
|| old.censorship.tls_emulation != new.censorship.tls_emulation
|| old.censorship.tls_front_dir != new.censorship.tls_front_dir
|| old.censorship.server_hello_delay_min_ms != new.censorship.server_hello_delay_min_ms
|| old.censorship.server_hello_delay_max_ms != new.censorship.server_hello_delay_max_ms
|| old.censorship.tls_new_session_tickets != new.censorship.tls_new_session_tickets
|| old.censorship.tls_full_cert_ttl_secs != new.censorship.tls_full_cert_ttl_secs
|| old.censorship.alpn_enforce != new.censorship.alpn_enforce
|| old.censorship.mask_proxy_protocol != new.censorship.mask_proxy_protocol
{
warned = true;
warn!("config reload: censorship settings changed; restart required");
}
if old.censorship.tls_domain != new.censorship.tls_domain {
warned = true;
warn!(
"config reload: censorship.tls_domain changed ('{}' → '{}'); restart required",
old.censorship.tls_domain, new.censorship.tls_domain
);
}
if old.network.ipv4 != new.network.ipv4 || old.network.ipv6 != new.network.ipv6 {
warned = true;
warn!("config reload: network.ipv4/ipv6 changed; restart required");
}
if old.network.prefer != new.network.prefer
|| old.network.multipath != new.network.multipath
|| old.network.stun_use != new.network.stun_use
|| old.network.stun_servers != new.network.stun_servers
|| old.network.stun_tcp_fallback != new.network.stun_tcp_fallback
|| old.network.http_ip_detect_urls != new.network.http_ip_detect_urls
|| old.network.cache_public_ip_path != new.network.cache_public_ip_path
{
warned = true;
warn!("config reload: non-hot network settings changed; restart required");
}
if old.general.use_middle_proxy != new.general.use_middle_proxy {
warned = true;
warn!("config reload: use_middle_proxy changed; restart required");
}
if old.general.stun_nat_probe_concurrency != new.general.stun_nat_probe_concurrency {
warned = true;
warn!("config reload: general.stun_nat_probe_concurrency changed; restart required");
}
if old.general.middle_proxy_pool_size != new.general.middle_proxy_pool_size {
warned = true;
warn!("config reload: general.middle_proxy_pool_size changed; restart required");
}
if old.general.me_route_no_writer_mode != new.general.me_route_no_writer_mode
|| old.general.me_route_no_writer_wait_ms != new.general.me_route_no_writer_wait_ms
|| old.general.me_route_inline_recovery_attempts
!= new.general.me_route_inline_recovery_attempts
|| old.general.me_route_inline_recovery_wait_ms
!= new.general.me_route_inline_recovery_wait_ms
{
warned = true;
warn!("config reload: general.me_route_no_writer_* changed; restart required");
}
if old.general.unknown_dc_log_path != new.general.unknown_dc_log_path
|| old.general.unknown_dc_file_log_enabled != new.general.unknown_dc_file_log_enabled
{
warned = true;
warn!("config reload: general.unknown_dc_* changed; restart required");
}
if old.general.me_init_retry_attempts != new.general.me_init_retry_attempts {
warned = true;
warn!("config reload: general.me_init_retry_attempts changed; restart required");
}
if old.general.me2dc_fallback != new.general.me2dc_fallback {
warned = true;
warn!("config reload: general.me2dc_fallback changed; restart required");
}
if old.general.proxy_config_v4_cache_path != new.general.proxy_config_v4_cache_path
|| old.general.proxy_config_v6_cache_path != new.general.proxy_config_v6_cache_path
{
warned = true;
warn!("config reload: general.proxy_config_*_cache_path changed; restart required");
}
if old.general.me_keepalive_enabled != new.general.me_keepalive_enabled
|| old.general.me_keepalive_interval_secs != new.general.me_keepalive_interval_secs
|| old.general.me_keepalive_jitter_secs != new.general.me_keepalive_jitter_secs
|| old.general.me_keepalive_payload_random != new.general.me_keepalive_payload_random
{
warned = true;
warn!("config reload: general.me_keepalive_* changed; restart required");
}
if old.general.upstream_connect_retry_attempts != new.general.upstream_connect_retry_attempts
|| old.general.upstream_connect_retry_backoff_ms
!= new.general.upstream_connect_retry_backoff_ms
|| old.general.upstream_unhealthy_fail_threshold
!= new.general.upstream_unhealthy_fail_threshold
|| old.general.upstream_connect_failfast_hard_errors
!= new.general.upstream_connect_failfast_hard_errors
|| old.general.rpc_proxy_req_every != new.general.rpc_proxy_req_every
{
warned = true;
warn!("config reload: general.upstream_* changed; restart required");
}
if non_hot_changed && !warned {
warn!("config reload: one or more non-hot fields changed; restart required");
}
}
/// Resolve the public host for link generation — mirrors the logic in main.rs.
@@ -207,14 +516,17 @@ fn log_changes(
log_tx.send(new_hot.log_level.clone()).ok();
}
if old_hot.ad_tag != new_hot.ad_tag {
if old_hot.user_ad_tags != new_hot.user_ad_tags {
info!(
"config reload: ad_tag: {} → {}",
old_hot.ad_tag.as_deref().unwrap_or("none"),
new_hot.ad_tag.as_deref().unwrap_or("none"),
"config reload: user_ad_tags updated ({} entries)",
new_hot.user_ad_tags.len(),
);
}
if old_hot.ad_tag != new_hot.ad_tag {
info!("config reload: general.ad_tag updated (applied on next connection)");
}
if old_hot.dns_overrides != new_hot.dns_overrides {
info!(
"config reload: network.dns_overrides updated ({} entries)",
@@ -222,13 +534,6 @@ fn log_changes(
);
}
if old_hot.middle_proxy_pool_size != new_hot.middle_proxy_pool_size {
info!(
"config reload: middle_proxy_pool_size: {} → {}",
old_hot.middle_proxy_pool_size, new_hot.middle_proxy_pool_size,
);
}
if old_hot.desync_all_full != new_hot.desync_all_full {
info!(
"config reload: desync_all_full: {} → {}",
@@ -242,6 +547,17 @@ fn log_changes(
old_hot.update_every_secs, new_hot.update_every_secs,
);
}
if old_hot.me_reinit_every_secs != new_hot.me_reinit_every_secs
|| old_hot.me_reinit_singleflight != new_hot.me_reinit_singleflight
|| old_hot.me_reinit_coalesce_window_ms != new_hot.me_reinit_coalesce_window_ms
{
info!(
"config reload: me_reinit: interval={}s singleflight={} coalesce={}ms",
new_hot.me_reinit_every_secs,
new_hot.me_reinit_singleflight,
new_hot.me_reinit_coalesce_window_ms
);
}
if old_hot.hardswap != new_hot.hardswap {
info!(
@@ -270,18 +586,84 @@ fn log_changes(
old_hot.me_reinit_drain_timeout_secs, new_hot.me_reinit_drain_timeout_secs,
);
}
if old_hot.me_keepalive_enabled != new_hot.me_keepalive_enabled
|| old_hot.me_keepalive_interval_secs != new_hot.me_keepalive_interval_secs
|| old_hot.me_keepalive_jitter_secs != new_hot.me_keepalive_jitter_secs
|| old_hot.me_keepalive_payload_random != new_hot.me_keepalive_payload_random
if old_hot.me_hardswap_warmup_delay_min_ms != new_hot.me_hardswap_warmup_delay_min_ms
|| old_hot.me_hardswap_warmup_delay_max_ms != new_hot.me_hardswap_warmup_delay_max_ms
|| old_hot.me_hardswap_warmup_extra_passes != new_hot.me_hardswap_warmup_extra_passes
|| old_hot.me_hardswap_warmup_pass_backoff_base_ms
!= new_hot.me_hardswap_warmup_pass_backoff_base_ms
{
info!(
"config reload: me_keepalive: enabled={} interval={}s jitter={}s random_payload={}",
new_hot.me_keepalive_enabled,
new_hot.me_keepalive_interval_secs,
new_hot.me_keepalive_jitter_secs,
new_hot.me_keepalive_payload_random,
"config reload: me_hardswap_warmup: min={}ms max={}ms extra_passes={} pass_backoff={}ms",
new_hot.me_hardswap_warmup_delay_min_ms,
new_hot.me_hardswap_warmup_delay_max_ms,
new_hot.me_hardswap_warmup_extra_passes,
new_hot.me_hardswap_warmup_pass_backoff_base_ms
);
}
if old_hot.me_bind_stale_mode != new_hot.me_bind_stale_mode
|| old_hot.me_bind_stale_ttl_secs != new_hot.me_bind_stale_ttl_secs
{
info!(
"config reload: me_bind_stale: mode={:?} ttl={}s",
new_hot.me_bind_stale_mode,
new_hot.me_bind_stale_ttl_secs
);
}
if old_hot.me_secret_atomic_snapshot != new_hot.me_secret_atomic_snapshot
|| old_hot.me_deterministic_writer_sort != new_hot.me_deterministic_writer_sort
{
info!(
"config reload: me_runtime_flags: secret_atomic_snapshot={} deterministic_sort={}",
new_hot.me_secret_atomic_snapshot,
new_hot.me_deterministic_writer_sort
);
}
if old_hot.me_single_endpoint_shadow_writers != new_hot.me_single_endpoint_shadow_writers
|| old_hot.me_single_endpoint_outage_mode_enabled
!= new_hot.me_single_endpoint_outage_mode_enabled
|| old_hot.me_single_endpoint_outage_disable_quarantine
!= new_hot.me_single_endpoint_outage_disable_quarantine
|| old_hot.me_single_endpoint_outage_backoff_min_ms
!= new_hot.me_single_endpoint_outage_backoff_min_ms
|| old_hot.me_single_endpoint_outage_backoff_max_ms
!= new_hot.me_single_endpoint_outage_backoff_max_ms
|| old_hot.me_single_endpoint_shadow_rotate_every_secs
!= new_hot.me_single_endpoint_shadow_rotate_every_secs
{
info!(
"config reload: me_single_endpoint: shadow={} outage_enabled={} disable_quarantine={} backoff=[{}..{}]ms rotate={}s",
new_hot.me_single_endpoint_shadow_writers,
new_hot.me_single_endpoint_outage_mode_enabled,
new_hot.me_single_endpoint_outage_disable_quarantine,
new_hot.me_single_endpoint_outage_backoff_min_ms,
new_hot.me_single_endpoint_outage_backoff_max_ms,
new_hot.me_single_endpoint_shadow_rotate_every_secs
);
}
if old_hot.me_config_stable_snapshots != new_hot.me_config_stable_snapshots
|| old_hot.me_config_apply_cooldown_secs != new_hot.me_config_apply_cooldown_secs
|| old_hot.me_snapshot_require_http_2xx != new_hot.me_snapshot_require_http_2xx
|| old_hot.me_snapshot_reject_empty_map != new_hot.me_snapshot_reject_empty_map
|| old_hot.me_snapshot_min_proxy_for_lines != new_hot.me_snapshot_min_proxy_for_lines
{
info!(
"config reload: me_snapshot_guard: stable={} cooldown={}s require_2xx={} reject_empty={} min_proxy_for={}",
new_hot.me_config_stable_snapshots,
new_hot.me_config_apply_cooldown_secs,
new_hot.me_snapshot_require_http_2xx,
new_hot.me_snapshot_reject_empty_map,
new_hot.me_snapshot_min_proxy_for_lines
);
}
if old_hot.proxy_secret_stable_snapshots != new_hot.proxy_secret_stable_snapshots
|| old_hot.proxy_secret_rotate_runtime != new_hot.proxy_secret_rotate_runtime
|| old_hot.proxy_secret_len_max != new_hot.proxy_secret_len_max
{
info!(
"config reload: proxy_secret_runtime: stable={} rotate={} len_max={}",
new_hot.proxy_secret_stable_snapshots,
new_hot.proxy_secret_rotate_runtime,
new_hot.proxy_secret_len_max
);
}
@@ -305,6 +687,22 @@ fn log_changes(
);
}
if old_hot.me_floor_mode != new_hot.me_floor_mode
|| old_hot.me_adaptive_floor_idle_secs != new_hot.me_adaptive_floor_idle_secs
|| old_hot.me_adaptive_floor_min_writers_single_endpoint
!= new_hot.me_adaptive_floor_min_writers_single_endpoint
|| old_hot.me_adaptive_floor_recover_grace_secs
!= new_hot.me_adaptive_floor_recover_grace_secs
{
info!(
"config reload: me_floor: mode={:?} idle={}s min_single={} recover_grace={}s",
new_hot.me_floor_mode,
new_hot.me_adaptive_floor_idle_secs,
new_hot.me_adaptive_floor_min_writers_single_endpoint,
new_hot.me_adaptive_floor_recover_grace_secs,
);
}
if old_hot.me_route_backpressure_base_timeout_ms
!= new_hot.me_route_backpressure_base_timeout_ms
|| old_hot.me_route_backpressure_high_timeout_ms
@@ -320,21 +718,21 @@ fn log_changes(
);
}
if old_hot.access.users != new_hot.access.users {
let mut added: Vec<&String> = new_hot.access.users.keys()
.filter(|u| !old_hot.access.users.contains_key(*u))
if old_hot.users != new_hot.users {
let mut added: Vec<&String> = new_hot.users.keys()
.filter(|u| !old_hot.users.contains_key(*u))
.collect();
added.sort();
let mut removed: Vec<&String> = old_hot.access.users.keys()
.filter(|u| !new_hot.access.users.contains_key(*u))
let mut removed: Vec<&String> = old_hot.users.keys()
.filter(|u| !new_hot.users.contains_key(*u))
.collect();
removed.sort();
let mut changed: Vec<&String> = new_hot.access.users.keys()
let mut changed: Vec<&String> = new_hot.users.keys()
.filter(|u| {
old_hot.access.users.get(*u)
.map(|s| s != &new_hot.access.users[*u])
old_hot.users.get(*u)
.map(|s| s != &new_hot.users[*u])
.unwrap_or(false)
})
.collect();
@@ -348,7 +746,7 @@ fn log_changes(
let host = resolve_link_host(new_cfg, detected_ip_v4, detected_ip_v6);
let port = new_cfg.general.links.public_port.unwrap_or(new_cfg.server.port);
for user in &added {
if let Some(secret) = new_hot.access.users.get(*user) {
if let Some(secret) = new_hot.users.get(*user) {
print_user_links(user, secret, &host, port, new_cfg);
}
}
@@ -367,28 +765,38 @@ fn log_changes(
}
}
if old_hot.access.user_max_tcp_conns != new_hot.access.user_max_tcp_conns {
if old_hot.user_max_tcp_conns != new_hot.user_max_tcp_conns {
info!(
"config reload: user_max_tcp_conns updated ({} entries)",
new_hot.access.user_max_tcp_conns.len()
new_hot.user_max_tcp_conns.len()
);
}
if old_hot.access.user_expirations != new_hot.access.user_expirations {
if old_hot.user_expirations != new_hot.user_expirations {
info!(
"config reload: user_expirations updated ({} entries)",
new_hot.access.user_expirations.len()
new_hot.user_expirations.len()
);
}
if old_hot.access.user_data_quota != new_hot.access.user_data_quota {
if old_hot.user_data_quota != new_hot.user_data_quota {
info!(
"config reload: user_data_quota updated ({} entries)",
new_hot.access.user_data_quota.len()
new_hot.user_data_quota.len()
);
}
if old_hot.access.user_max_unique_ips != new_hot.access.user_max_unique_ips {
if old_hot.user_max_unique_ips != new_hot.user_max_unique_ips {
info!(
"config reload: user_max_unique_ips updated ({} entries)",
new_hot.access.user_max_unique_ips.len()
new_hot.user_max_unique_ips.len()
);
}
if old_hot.user_max_unique_ips_mode != new_hot.user_max_unique_ips_mode
|| old_hot.user_max_unique_ips_window_secs
!= new_hot.user_max_unique_ips_window_secs
{
info!(
"config reload: user_max_unique_ips policy mode={:?} window={}s",
new_hot.user_max_unique_ips_mode,
new_hot.user_max_unique_ips_window_secs
);
}
}
@@ -415,15 +823,22 @@ fn reload_config(
}
let old_cfg = config_tx.borrow().clone();
let applied_cfg = overlay_hot_fields(&old_cfg, &new_cfg);
let old_hot = HotFields::from_config(&old_cfg);
let new_hot = HotFields::from_config(&new_cfg);
let applied_hot = HotFields::from_config(&applied_cfg);
let non_hot_changed = !config_equal(&applied_cfg, &new_cfg);
let hot_changed = old_hot != applied_hot;
if old_hot == new_hot {
if non_hot_changed {
warn_non_hot_changes(&old_cfg, &new_cfg, non_hot_changed);
}
if !hot_changed {
return;
}
if old_hot.dns_overrides != new_hot.dns_overrides
&& let Err(e) = crate::network::dns_overrides::install_entries(&new_hot.dns_overrides)
if old_hot.dns_overrides != applied_hot.dns_overrides
&& let Err(e) = crate::network::dns_overrides::install_entries(&applied_hot.dns_overrides)
{
error!(
"config reload: invalid network.dns_overrides: {}; keeping old config",
@@ -432,9 +847,15 @@ fn reload_config(
return;
}
warn_non_hot_changes(&old_cfg, &new_cfg);
log_changes(&old_hot, &new_hot, &new_cfg, log_tx, detected_ip_v4, detected_ip_v6);
config_tx.send(Arc::new(new_cfg)).ok();
log_changes(
&old_hot,
&applied_hot,
&applied_cfg,
log_tx,
detected_ip_v4,
detected_ip_v6,
);
config_tx.send(Arc::new(applied_cfg)).ok();
}
// ── Public API ────────────────────────────────────────────────────────────────
@@ -560,3 +981,80 @@ pub fn spawn_config_watcher(
(config_rx, log_rx)
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_config() -> ProxyConfig {
ProxyConfig::default()
}
#[test]
fn overlay_applies_hot_and_preserves_non_hot() {
let old = sample_config();
let mut new = old.clone();
new.general.hardswap = !old.general.hardswap;
new.server.port = old.server.port.saturating_add(1);
let applied = overlay_hot_fields(&old, &new);
assert_eq!(applied.general.hardswap, new.general.hardswap);
assert_eq!(applied.server.port, old.server.port);
}
#[test]
fn non_hot_only_change_does_not_change_hot_snapshot() {
let old = sample_config();
let mut new = old.clone();
new.server.port = old.server.port.saturating_add(1);
let applied = overlay_hot_fields(&old, &new);
assert_eq!(HotFields::from_config(&old), HotFields::from_config(&applied));
assert_eq!(applied.server.port, old.server.port);
}
#[test]
fn bind_stale_mode_is_hot() {
let old = sample_config();
let mut new = old.clone();
new.general.me_bind_stale_mode = match old.general.me_bind_stale_mode {
MeBindStaleMode::Never => MeBindStaleMode::Ttl,
MeBindStaleMode::Ttl => MeBindStaleMode::Always,
MeBindStaleMode::Always => MeBindStaleMode::Never,
};
let applied = overlay_hot_fields(&old, &new);
assert_eq!(
applied.general.me_bind_stale_mode,
new.general.me_bind_stale_mode
);
assert_ne!(HotFields::from_config(&old), HotFields::from_config(&applied));
}
#[test]
fn keepalive_is_not_hot() {
let old = sample_config();
let mut new = old.clone();
new.general.me_keepalive_interval_secs = old.general.me_keepalive_interval_secs + 5;
let applied = overlay_hot_fields(&old, &new);
assert_eq!(
applied.general.me_keepalive_interval_secs,
old.general.me_keepalive_interval_secs
);
assert_eq!(HotFields::from_config(&old), HotFields::from_config(&applied));
}
#[test]
fn mixed_hot_and_non_hot_change_applies_only_hot_subset() {
let old = sample_config();
let mut new = old.clone();
new.general.hardswap = !old.general.hardswap;
new.general.use_middle_proxy = !old.general.use_middle_proxy;
let applied = overlay_hot_fields(&old, &new);
assert_eq!(applied.general.hardswap, new.general.hardswap);
assert_eq!(applied.general.use_middle_proxy, old.general.use_middle_proxy);
assert!(!config_equal(&applied, &new));
}
}

View File

@@ -1,7 +1,7 @@
#![allow(deprecated)]
use std::collections::HashMap;
use std::net::IpAddr;
use std::net::{IpAddr, SocketAddr};
use std::path::Path;
use rand::Rng;
@@ -203,6 +203,22 @@ impl ProxyConfig {
sanitize_ad_tag(&mut config.general.ad_tag);
if let Some(path) = &config.general.proxy_config_v4_cache_path
&& path.trim().is_empty()
{
return Err(ProxyError::Config(
"general.proxy_config_v4_cache_path cannot be empty when provided".to_string(),
));
}
if let Some(path) = &config.general.proxy_config_v6_cache_path
&& path.trim().is_empty()
{
return Err(ProxyError::Config(
"general.proxy_config_v6_cache_path cannot be empty when provided".to_string(),
));
}
if let Some(update_every) = config.general.update_every {
if update_every == 0 {
return Err(ProxyError::Config(
@@ -237,6 +253,12 @@ impl ProxyConfig {
));
}
if config.general.me_init_retry_attempts > 1_000_000 {
return Err(ProxyError::Config(
"general.me_init_retry_attempts must be within [0, 1000000]".to_string(),
));
}
if config.general.upstream_connect_retry_attempts == 0 {
return Err(ProxyError::Config(
"general.upstream_connect_retry_attempts must be > 0".to_string(),
@@ -249,12 +271,61 @@ impl ProxyConfig {
));
}
if config.general.rpc_proxy_req_every != 0
&& !(10..=300).contains(&config.general.rpc_proxy_req_every)
{
return Err(ProxyError::Config(
"general.rpc_proxy_req_every must be 0 or within [10, 300]".to_string(),
));
}
if config.access.user_max_unique_ips_window_secs == 0 {
return Err(ProxyError::Config(
"access.user_max_unique_ips_window_secs must be > 0".to_string(),
));
}
if config.general.me_reinit_every_secs == 0 {
return Err(ProxyError::Config(
"general.me_reinit_every_secs must be > 0".to_string(),
));
}
if config.general.me_single_endpoint_shadow_writers > 32 {
return Err(ProxyError::Config(
"general.me_single_endpoint_shadow_writers must be within [0, 32]".to_string(),
));
}
if config.general.me_adaptive_floor_min_writers_single_endpoint == 0
|| config.general.me_adaptive_floor_min_writers_single_endpoint > 32
{
return Err(ProxyError::Config(
"general.me_adaptive_floor_min_writers_single_endpoint must be within [1, 32]"
.to_string(),
));
}
if config.general.me_single_endpoint_outage_backoff_min_ms == 0 {
return Err(ProxyError::Config(
"general.me_single_endpoint_outage_backoff_min_ms must be > 0".to_string(),
));
}
if config.general.me_single_endpoint_outage_backoff_max_ms == 0 {
return Err(ProxyError::Config(
"general.me_single_endpoint_outage_backoff_max_ms must be > 0".to_string(),
));
}
if config.general.me_single_endpoint_outage_backoff_min_ms
> config.general.me_single_endpoint_outage_backoff_max_ms
{
return Err(ProxyError::Config(
"general.me_single_endpoint_outage_backoff_min_ms must be <= general.me_single_endpoint_outage_backoff_max_ms".to_string(),
));
}
if config.general.beobachten_minutes == 0 {
return Err(ProxyError::Config(
"general.beobachten_minutes must be > 0".to_string(),
@@ -305,12 +376,24 @@ impl ProxyConfig {
));
}
if config.general.me_snapshot_min_proxy_for_lines == 0 {
return Err(ProxyError::Config(
"general.me_snapshot_min_proxy_for_lines must be > 0".to_string(),
));
}
if config.general.proxy_secret_stable_snapshots == 0 {
return Err(ProxyError::Config(
"general.proxy_secret_stable_snapshots must be > 0".to_string(),
));
}
if config.general.me_reinit_trigger_channel == 0 {
return Err(ProxyError::Config(
"general.me_reinit_trigger_channel must be > 0".to_string(),
));
}
if !(32..=4096).contains(&config.general.proxy_secret_len_max) {
return Err(ProxyError::Config(
"general.proxy_secret_len_max must be within [32, 4096]".to_string(),
@@ -343,6 +426,42 @@ impl ProxyConfig {
));
}
if !(10..=5000).contains(&config.general.me_route_no_writer_wait_ms) {
return Err(ProxyError::Config(
"general.me_route_no_writer_wait_ms must be within [10, 5000]".to_string(),
));
}
if config.general.me_route_inline_recovery_attempts == 0 {
return Err(ProxyError::Config(
"general.me_route_inline_recovery_attempts must be > 0".to_string(),
));
}
if !(10..=30000).contains(&config.general.me_route_inline_recovery_wait_ms) {
return Err(ProxyError::Config(
"general.me_route_inline_recovery_wait_ms must be within [10, 30000]".to_string(),
));
}
if config.server.api.request_body_limit_bytes == 0 {
return Err(ProxyError::Config(
"server.api.request_body_limit_bytes must be > 0".to_string(),
));
}
if config.server.api.minimal_runtime_cache_ttl_ms > 60_000 {
return Err(ProxyError::Config(
"server.api.minimal_runtime_cache_ttl_ms must be within [0, 60000]".to_string(),
));
}
if config.server.api.listen.parse::<SocketAddr>().is_err() {
return Err(ProxyError::Config(
"server.api.listen must be in IP:PORT format".to_string(),
));
}
if config.general.effective_me_pool_force_close_secs() > 0
&& config.general.effective_me_pool_force_close_secs()
< config.general.me_pool_drain_ttl_secs
@@ -532,15 +651,16 @@ impl ProxyConfig {
)));
}
if let Some(tag) = &self.general.ad_tag {
for (user, tag) in &self.access.user_ad_tags {
let zeros = "00000000000000000000000000000000";
if !is_valid_ad_tag(tag) {
return Err(ProxyError::Config(
"general.ad_tag must be exactly 32 hex characters".to_string(),
));
return Err(ProxyError::Config(format!(
"access.user_ad_tags['{}'] must be exactly 32 hex characters",
user
)));
}
if tag == zeros {
warn!("ad_tag is all zeros; register a valid proxy tag via @MTProxybot to enable sponsored channel");
warn!(user = %user, "user ad_tag is all zeros; register a valid proxy tag via @MTProxybot to enable sponsored channel");
}
}
@@ -579,6 +699,59 @@ mod tests {
cfg.general.me_reconnect_fast_retry_count,
default_me_reconnect_fast_retry_count()
);
assert_eq!(
cfg.general.me_init_retry_attempts,
default_me_init_retry_attempts()
);
assert_eq!(
cfg.general.me2dc_fallback,
default_me2dc_fallback()
);
assert_eq!(
cfg.general.proxy_config_v4_cache_path,
default_proxy_config_v4_cache_path()
);
assert_eq!(
cfg.general.proxy_config_v6_cache_path,
default_proxy_config_v6_cache_path()
);
assert_eq!(
cfg.general.me_single_endpoint_shadow_writers,
default_me_single_endpoint_shadow_writers()
);
assert_eq!(
cfg.general.me_single_endpoint_outage_mode_enabled,
default_me_single_endpoint_outage_mode_enabled()
);
assert_eq!(
cfg.general.me_single_endpoint_outage_disable_quarantine,
default_me_single_endpoint_outage_disable_quarantine()
);
assert_eq!(
cfg.general.me_single_endpoint_outage_backoff_min_ms,
default_me_single_endpoint_outage_backoff_min_ms()
);
assert_eq!(
cfg.general.me_single_endpoint_outage_backoff_max_ms,
default_me_single_endpoint_outage_backoff_max_ms()
);
assert_eq!(
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
default_me_single_endpoint_shadow_rotate_every_secs()
);
assert_eq!(cfg.general.me_floor_mode, MeFloorMode::default());
assert_eq!(
cfg.general.me_adaptive_floor_idle_secs,
default_me_adaptive_floor_idle_secs()
);
assert_eq!(
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
default_me_adaptive_floor_min_writers_single_endpoint()
);
assert_eq!(
cfg.general.me_adaptive_floor_recover_grace_secs,
default_me_adaptive_floor_recover_grace_secs()
);
assert_eq!(
cfg.general.upstream_connect_retry_attempts,
default_upstream_connect_retry_attempts()
@@ -591,10 +764,40 @@ mod tests {
cfg.general.upstream_unhealthy_fail_threshold,
default_upstream_unhealthy_fail_threshold()
);
assert_eq!(
cfg.general.upstream_connect_failfast_hard_errors,
default_upstream_connect_failfast_hard_errors()
);
assert_eq!(
cfg.general.rpc_proxy_req_every,
default_rpc_proxy_req_every()
);
assert_eq!(cfg.general.update_every, default_update_every());
assert_eq!(cfg.server.listen_addr_ipv4, default_listen_addr_ipv4());
assert_eq!(cfg.server.listen_addr_ipv6, default_listen_addr_ipv6_opt());
assert_eq!(cfg.server.api.listen, default_api_listen());
assert_eq!(cfg.server.api.whitelist, default_api_whitelist());
assert_eq!(
cfg.server.api.request_body_limit_bytes,
default_api_request_body_limit_bytes()
);
assert_eq!(
cfg.server.api.minimal_runtime_enabled,
default_api_minimal_runtime_enabled()
);
assert_eq!(
cfg.server.api.minimal_runtime_cache_ttl_ms,
default_api_minimal_runtime_cache_ttl_ms()
);
assert_eq!(cfg.access.users, default_access_users());
assert_eq!(
cfg.access.user_max_unique_ips_mode,
UserMaxUniqueIpsMode::default()
);
assert_eq!(
cfg.access.user_max_unique_ips_window_secs,
default_user_max_unique_ips_window_secs()
);
}
#[test]
@@ -617,6 +820,56 @@ mod tests {
general.me_reconnect_fast_retry_count,
default_me_reconnect_fast_retry_count()
);
assert_eq!(
general.me_init_retry_attempts,
default_me_init_retry_attempts()
);
assert_eq!(general.me2dc_fallback, default_me2dc_fallback());
assert_eq!(
general.proxy_config_v4_cache_path,
default_proxy_config_v4_cache_path()
);
assert_eq!(
general.proxy_config_v6_cache_path,
default_proxy_config_v6_cache_path()
);
assert_eq!(
general.me_single_endpoint_shadow_writers,
default_me_single_endpoint_shadow_writers()
);
assert_eq!(
general.me_single_endpoint_outage_mode_enabled,
default_me_single_endpoint_outage_mode_enabled()
);
assert_eq!(
general.me_single_endpoint_outage_disable_quarantine,
default_me_single_endpoint_outage_disable_quarantine()
);
assert_eq!(
general.me_single_endpoint_outage_backoff_min_ms,
default_me_single_endpoint_outage_backoff_min_ms()
);
assert_eq!(
general.me_single_endpoint_outage_backoff_max_ms,
default_me_single_endpoint_outage_backoff_max_ms()
);
assert_eq!(
general.me_single_endpoint_shadow_rotate_every_secs,
default_me_single_endpoint_shadow_rotate_every_secs()
);
assert_eq!(general.me_floor_mode, MeFloorMode::default());
assert_eq!(
general.me_adaptive_floor_idle_secs,
default_me_adaptive_floor_idle_secs()
);
assert_eq!(
general.me_adaptive_floor_min_writers_single_endpoint,
default_me_adaptive_floor_min_writers_single_endpoint()
);
assert_eq!(
general.me_adaptive_floor_recover_grace_secs,
default_me_adaptive_floor_recover_grace_secs()
);
assert_eq!(
general.upstream_connect_retry_attempts,
default_upstream_connect_retry_attempts()
@@ -629,10 +882,29 @@ mod tests {
general.upstream_unhealthy_fail_threshold,
default_upstream_unhealthy_fail_threshold()
);
assert_eq!(
general.upstream_connect_failfast_hard_errors,
default_upstream_connect_failfast_hard_errors()
);
assert_eq!(general.rpc_proxy_req_every, default_rpc_proxy_req_every());
assert_eq!(general.update_every, default_update_every());
let server = ServerConfig::default();
assert_eq!(server.listen_addr_ipv6, Some(default_listen_addr_ipv6()));
assert_eq!(server.api.listen, default_api_listen());
assert_eq!(server.api.whitelist, default_api_whitelist());
assert_eq!(
server.api.request_body_limit_bytes,
default_api_request_body_limit_bytes()
);
assert_eq!(
server.api.minimal_runtime_enabled,
default_api_minimal_runtime_enabled()
);
assert_eq!(
server.api.minimal_runtime_cache_ttl_ms,
default_api_minimal_runtime_cache_ttl_ms()
);
let access = AccessConfig::default();
assert_eq!(access.users, default_access_users());
@@ -801,6 +1073,93 @@ mod tests {
let _ = std::fs::remove_file(path);
}
#[test]
fn me_single_endpoint_outage_backoff_range_is_validated() {
let toml = r#"
[general]
me_single_endpoint_outage_backoff_min_ms = 4000
me_single_endpoint_outage_backoff_max_ms = 3000
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_single_endpoint_outage_backoff_range_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains(
"general.me_single_endpoint_outage_backoff_min_ms must be <= general.me_single_endpoint_outage_backoff_max_ms"
));
let _ = std::fs::remove_file(path);
}
#[test]
fn me_single_endpoint_shadow_writers_too_large_is_rejected() {
let toml = r#"
[general]
me_single_endpoint_shadow_writers = 33
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_single_endpoint_shadow_writers_limit_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.me_single_endpoint_shadow_writers must be within [0, 32]"));
let _ = std::fs::remove_file(path);
}
#[test]
fn me_adaptive_floor_min_writers_out_of_range_is_rejected() {
let toml = r#"
[general]
me_adaptive_floor_min_writers_single_endpoint = 0
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_adaptive_floor_min_writers_out_of_range_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(
err.contains(
"general.me_adaptive_floor_min_writers_single_endpoint must be within [1, 32]"
)
);
let _ = std::fs::remove_file(path);
}
#[test]
fn me_floor_mode_adaptive_is_parsed() {
let toml = r#"
[general]
me_floor_mode = "adaptive"
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_floor_mode_adaptive_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert_eq!(cfg.general.me_floor_mode, MeFloorMode::Adaptive);
let _ = std::fs::remove_file(path);
}
#[test]
fn upstream_connect_retry_attempts_zero_is_rejected() {
let toml = r#"
@@ -841,6 +1200,141 @@ mod tests {
let _ = std::fs::remove_file(path);
}
#[test]
fn rpc_proxy_req_every_out_of_range_is_rejected() {
let toml = r#"
[general]
rpc_proxy_req_every = 9
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_rpc_proxy_req_every_out_of_range_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.rpc_proxy_req_every must be 0 or within [10, 300]"));
let _ = std::fs::remove_file(path);
}
#[test]
fn rpc_proxy_req_every_zero_and_valid_range_are_accepted() {
let toml_zero = r#"
[general]
rpc_proxy_req_every = 0
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path_zero = dir.join("telemt_rpc_proxy_req_every_zero_ok_test.toml");
std::fs::write(&path_zero, toml_zero).unwrap();
let cfg_zero = ProxyConfig::load(&path_zero).unwrap();
assert_eq!(cfg_zero.general.rpc_proxy_req_every, 0);
let _ = std::fs::remove_file(path_zero);
let toml_valid = r#"
[general]
rpc_proxy_req_every = 40
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let path_valid = dir.join("telemt_rpc_proxy_req_every_valid_ok_test.toml");
std::fs::write(&path_valid, toml_valid).unwrap();
let cfg_valid = ProxyConfig::load(&path_valid).unwrap();
assert_eq!(cfg_valid.general.rpc_proxy_req_every, 40);
let _ = std::fs::remove_file(path_valid);
}
#[test]
fn me_route_no_writer_wait_ms_out_of_range_is_rejected() {
let toml = r#"
[general]
me_route_no_writer_wait_ms = 5
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_route_no_writer_wait_ms_out_of_range_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.me_route_no_writer_wait_ms must be within [10, 5000]"));
let _ = std::fs::remove_file(path);
}
#[test]
fn me_route_no_writer_mode_is_parsed() {
let toml = r#"
[general]
me_route_no_writer_mode = "inline_recovery_legacy"
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_route_no_writer_mode_parse_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert_eq!(
cfg.general.me_route_no_writer_mode,
crate::config::MeRouteNoWriterMode::InlineRecoveryLegacy
);
let _ = std::fs::remove_file(path);
}
#[test]
fn proxy_config_cache_paths_empty_are_rejected() {
let toml = r#"
[general]
proxy_config_v4_cache_path = " "
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_proxy_config_v4_cache_path_empty_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.proxy_config_v4_cache_path cannot be empty"));
let _ = std::fs::remove_file(path);
let toml_v6 = r#"
[general]
proxy_config_v6_cache_path = ""
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let path_v6 = dir.join("telemt_proxy_config_v6_cache_path_empty_test.toml");
std::fs::write(&path_v6, toml_v6).unwrap();
let err_v6 = ProxyConfig::load(&path_v6).unwrap_err().to_string();
assert!(err_v6.contains("general.proxy_config_v6_cache_path cannot be empty"));
let _ = std::fs::remove_file(path_v6);
}
#[test]
fn me_hardswap_warmup_defaults_are_set() {
let toml = r#"
@@ -1036,6 +1530,28 @@ mod tests {
let _ = std::fs::remove_file(path);
}
#[test]
fn api_minimal_runtime_cache_ttl_out_of_range_is_rejected() {
let toml = r#"
[server.api]
enabled = true
listen = "127.0.0.1:9091"
minimal_runtime_cache_ttl_ms = 70000
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_api_minimal_runtime_cache_ttl_invalid_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("server.api.minimal_runtime_cache_ttl_ms must be within [0, 60000]"));
let _ = std::fs::remove_file(path);
}
#[test]
fn force_close_bumped_when_below_drain_ttl() {
let toml = r#"
@@ -1100,6 +1616,27 @@ mod tests {
let _ = std::fs::remove_file(path);
}
#[test]
fn invalid_user_ad_tag_reports_access_user_ad_tags_key() {
let toml = r#"
[censorship]
tls_domain = "example.com"
[access.users]
alice = "00000000000000000000000000000000"
[access.user_ad_tags]
alice = "not_hex"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_invalid_user_ad_tag_message_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
let err = cfg.validate().unwrap_err().to_string();
assert!(err.contains("access.user_ad_tags['alice'] must be exactly 32 hex characters"));
let _ = std::fs::remove_file(path);
}
#[test]
fn invalid_dns_override_is_rejected() {
let toml = r#"

View File

@@ -130,6 +130,97 @@ impl MeSocksKdfPolicy {
}
}
/// Stale ME writer bind policy during drain window.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum MeBindStaleMode {
Never,
#[default]
Ttl,
Always,
}
impl MeBindStaleMode {
pub fn as_u8(self) -> u8 {
match self {
MeBindStaleMode::Never => 0,
MeBindStaleMode::Ttl => 1,
MeBindStaleMode::Always => 2,
}
}
pub fn from_u8(raw: u8) -> Self {
match raw {
0 => MeBindStaleMode::Never,
2 => MeBindStaleMode::Always,
_ => MeBindStaleMode::Ttl,
}
}
}
/// Middle-End writer floor policy mode.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum MeFloorMode {
Static,
#[default]
Adaptive,
}
impl MeFloorMode {
pub fn as_u8(self) -> u8 {
match self {
MeFloorMode::Static => 0,
MeFloorMode::Adaptive => 1,
}
}
pub fn from_u8(raw: u8) -> Self {
match raw {
1 => MeFloorMode::Adaptive,
_ => MeFloorMode::Static,
}
}
}
/// Middle-End route behavior when no writer is immediately available.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum MeRouteNoWriterMode {
#[default]
AsyncRecoveryFailfast,
InlineRecoveryLegacy,
}
impl MeRouteNoWriterMode {
pub fn as_u8(self) -> u8 {
match self {
MeRouteNoWriterMode::AsyncRecoveryFailfast => 0,
MeRouteNoWriterMode::InlineRecoveryLegacy => 1,
}
}
pub fn from_u8(raw: u8) -> Self {
match raw {
1 => MeRouteNoWriterMode::InlineRecoveryLegacy,
_ => MeRouteNoWriterMode::AsyncRecoveryFailfast,
}
}
}
/// Per-user unique source IP limit mode.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum UserMaxUniqueIpsMode {
/// Count only currently active source IPs.
#[default]
ActiveWindow,
/// Count source IPs seen within the recent time window.
TimeWindow,
/// Enforce both active and recent-window limits at the same time.
Combined,
}
/// Telemetry controls for hot-path counters and ME diagnostics.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct TelemetryConfig {
@@ -247,14 +338,23 @@ pub struct GeneralConfig {
#[serde(default = "default_true")]
pub use_middle_proxy: bool,
#[serde(default)]
pub ad_tag: Option<String>,
/// Path to proxy-secret binary file (auto-downloaded if absent).
/// Infrastructure secret from https://core.telegram.org/getProxySecret.
#[serde(default = "default_proxy_secret_path")]
pub proxy_secret_path: Option<String>,
/// Optional path to cache raw getProxyConfig (IPv4) snapshot for startup fallback.
#[serde(default = "default_proxy_config_v4_cache_path")]
pub proxy_config_v4_cache_path: Option<String>,
/// Optional path to cache raw getProxyConfigV6 snapshot for startup fallback.
#[serde(default = "default_proxy_config_v6_cache_path")]
pub proxy_config_v6_cache_path: Option<String>,
/// Global ad_tag (32 hex chars from @MTProxybot). Fallback when user has no per-user tag in access.user_ad_tags.
#[serde(default)]
pub ad_tag: Option<String>,
/// Public IP override for middle-proxy NAT environments.
/// When set, this IP is used in ME key derivation and RPC_PROXY_REQ "our_addr".
#[serde(default)]
@@ -286,6 +386,15 @@ pub struct GeneralConfig {
#[serde(default = "default_middle_proxy_warm_standby")]
pub middle_proxy_warm_standby: usize,
/// Startup retries for Middle-End pool initialization before ME→Direct fallback.
/// 0 means unlimited retries.
#[serde(default = "default_me_init_retry_attempts")]
pub me_init_retry_attempts: u32,
/// Allow fallback from Middle-End mode to direct DC when ME startup cannot be initialized.
#[serde(default = "default_me2dc_fallback")]
pub me2dc_fallback: bool,
/// Enable ME keepalive padding frames.
#[serde(default = "default_true")]
pub me_keepalive_enabled: bool,
@@ -302,6 +411,11 @@ pub struct GeneralConfig {
#[serde(default = "default_true")]
pub me_keepalive_payload_random: bool,
/// Interval in seconds for service RPC_PROXY_REQ activity signals to ME.
/// 0 disables service activity signals.
#[serde(default = "default_rpc_proxy_req_every")]
pub rpc_proxy_req_every: u64,
/// Max pending ciphertext buffer per client writer (bytes).
/// Controls FakeTLS backpressure vs throughput.
#[serde(default = "default_crypto_pending_buffer")]
@@ -365,6 +479,47 @@ pub struct GeneralConfig {
#[serde(default = "default_me_reconnect_fast_retry_count")]
pub me_reconnect_fast_retry_count: u32,
/// Number of additional reserve writers for DC groups with exactly one endpoint.
#[serde(default = "default_me_single_endpoint_shadow_writers")]
pub me_single_endpoint_shadow_writers: u8,
/// Enable aggressive outage recovery mode for single-endpoint DC groups.
#[serde(default = "default_me_single_endpoint_outage_mode_enabled")]
pub me_single_endpoint_outage_mode_enabled: bool,
/// Ignore endpoint quarantine while in single-endpoint outage mode.
#[serde(default = "default_me_single_endpoint_outage_disable_quarantine")]
pub me_single_endpoint_outage_disable_quarantine: bool,
/// Minimum reconnect backoff in ms for single-endpoint outage mode.
#[serde(default = "default_me_single_endpoint_outage_backoff_min_ms")]
pub me_single_endpoint_outage_backoff_min_ms: u64,
/// Maximum reconnect backoff in ms for single-endpoint outage mode.
#[serde(default = "default_me_single_endpoint_outage_backoff_max_ms")]
pub me_single_endpoint_outage_backoff_max_ms: u64,
/// Periodic shadow writer rotation interval in seconds for single-endpoint DC groups.
/// Set to 0 to disable periodic shadow rotation.
#[serde(default = "default_me_single_endpoint_shadow_rotate_every_secs")]
pub me_single_endpoint_shadow_rotate_every_secs: u64,
/// Floor policy mode for ME writer targets.
#[serde(default)]
pub me_floor_mode: MeFloorMode,
/// Idle time in seconds before adaptive floor can reduce single-endpoint writer target.
#[serde(default = "default_me_adaptive_floor_idle_secs")]
pub me_adaptive_floor_idle_secs: u64,
/// Minimum writer target for single-endpoint DC groups in adaptive floor mode.
#[serde(default = "default_me_adaptive_floor_min_writers_single_endpoint")]
pub me_adaptive_floor_min_writers_single_endpoint: u8,
/// Grace period in seconds to hold static floor after activity in adaptive mode.
#[serde(default = "default_me_adaptive_floor_recover_grace_secs")]
pub me_adaptive_floor_recover_grace_secs: u64,
/// Connect attempts for the selected upstream before returning error/fallback.
#[serde(default = "default_upstream_connect_retry_attempts")]
pub upstream_connect_retry_attempts: u32,
@@ -377,6 +532,10 @@ pub struct GeneralConfig {
#[serde(default = "default_upstream_unhealthy_fail_threshold")]
pub upstream_unhealthy_fail_threshold: u32,
/// Skip additional retries for hard non-transient upstream connect errors.
#[serde(default = "default_upstream_connect_failfast_hard_errors")]
pub upstream_connect_failfast_hard_errors: bool,
/// Ignore STUN/interface IP mismatch (keep using Middle Proxy even if NAT detected).
#[serde(default)]
pub stun_iface_mismatch_ignore: bool,
@@ -385,6 +544,10 @@ pub struct GeneralConfig {
#[serde(default = "default_unknown_dc_log_path")]
pub unknown_dc_log_path: Option<String>,
/// Enable unknown-DC file logging.
#[serde(default = "default_unknown_dc_file_log_enabled")]
pub unknown_dc_file_log_enabled: bool,
#[serde(default)]
pub log_level: LogLevel,
@@ -412,6 +575,22 @@ pub struct GeneralConfig {
#[serde(default = "default_me_route_backpressure_high_watermark_pct")]
pub me_route_backpressure_high_watermark_pct: u8,
/// ME route behavior when no writer is immediately available.
#[serde(default)]
pub me_route_no_writer_mode: MeRouteNoWriterMode,
/// Maximum wait time in milliseconds for async-recovery failfast mode.
#[serde(default = "default_me_route_no_writer_wait_ms")]
pub me_route_no_writer_wait_ms: u64,
/// Number of inline recovery attempts in legacy mode.
#[serde(default = "default_me_route_inline_recovery_attempts")]
pub me_route_inline_recovery_attempts: u32,
/// Maximum wait time in milliseconds for inline recovery in legacy mode.
#[serde(default = "default_me_route_inline_recovery_wait_ms")]
pub me_route_inline_recovery_wait_ms: u64,
/// [general.links] — proxy link generation overrides.
#[serde(default)]
pub links: LinksConfig,
@@ -453,6 +632,18 @@ pub struct GeneralConfig {
#[serde(default = "default_me_config_apply_cooldown_secs")]
pub me_config_apply_cooldown_secs: u64,
/// Ensure getProxyConfig snapshots are applied only for 2xx HTTP responses.
#[serde(default = "default_me_snapshot_require_http_2xx")]
pub me_snapshot_require_http_2xx: bool,
/// Reject empty getProxyConfig snapshots instead of marking them applied.
#[serde(default = "default_me_snapshot_reject_empty_map")]
pub me_snapshot_reject_empty_map: bool,
/// Minimum parsed `proxy_for` rows required to accept a snapshot.
#[serde(default = "default_me_snapshot_min_proxy_for_lines")]
pub me_snapshot_min_proxy_for_lines: u32,
/// Number of identical getProxySecret snapshots required before runtime secret rotation.
#[serde(default = "default_proxy_secret_stable_snapshots")]
pub proxy_secret_stable_snapshots: u8,
@@ -461,6 +652,10 @@ pub struct GeneralConfig {
#[serde(default = "default_proxy_secret_rotate_runtime")]
pub proxy_secret_rotate_runtime: bool,
/// Keep key-selector and secret bytes from one snapshot during ME handshake.
#[serde(default = "default_me_secret_atomic_snapshot")]
pub me_secret_atomic_snapshot: bool,
/// Maximum allowed proxy-secret length in bytes for startup and runtime refresh.
#[serde(default = "default_proxy_secret_len_max")]
pub proxy_secret_len_max: usize,
@@ -470,6 +665,14 @@ pub struct GeneralConfig {
#[serde(default = "default_me_pool_drain_ttl_secs")]
pub me_pool_drain_ttl_secs: u64,
/// Policy for new binds on stale draining writers.
#[serde(default)]
pub me_bind_stale_mode: MeBindStaleMode,
/// TTL for stale bind allowance when `me_bind_stale_mode = \"ttl\"`.
#[serde(default = "default_me_bind_stale_ttl_secs")]
pub me_bind_stale_ttl_secs: u64,
/// Minimum desired-DC coverage ratio required before draining stale writers.
/// Range: 0.0..=1.0.
#[serde(default = "default_me_pool_min_fresh_ratio")]
@@ -490,6 +693,22 @@ pub struct GeneralConfig {
#[serde(default = "default_proxy_config_reload_secs")]
pub proxy_config_auto_reload_secs: u64,
/// Serialize ME reinit cycles across all trigger sources.
#[serde(default = "default_me_reinit_singleflight")]
pub me_reinit_singleflight: bool,
/// Trigger queue capacity for reinit scheduler.
#[serde(default = "default_me_reinit_trigger_channel")]
pub me_reinit_trigger_channel: usize,
/// Trigger coalescing window before starting a reinit cycle.
#[serde(default = "default_me_reinit_coalesce_window_ms")]
pub me_reinit_coalesce_window_ms: u64,
/// Deterministic candidate sort for ME writer binding path.
#[serde(default = "default_me_deterministic_writer_sort")]
pub me_deterministic_writer_sort: bool,
/// Enable NTP drift check at startup.
#[serde(default = "default_ntp_check")]
pub ntp_check: bool,
@@ -516,6 +735,8 @@ impl Default for GeneralConfig {
use_middle_proxy: default_true(),
ad_tag: None,
proxy_secret_path: default_proxy_secret_path(),
proxy_config_v4_cache_path: default_proxy_config_v4_cache_path(),
proxy_config_v6_cache_path: default_proxy_config_v6_cache_path(),
middle_proxy_nat_ip: None,
middle_proxy_nat_probe: default_true(),
middle_proxy_nat_stun: default_middle_proxy_nat_stun(),
@@ -523,10 +744,13 @@ impl Default for GeneralConfig {
stun_nat_probe_concurrency: default_stun_nat_probe_concurrency(),
middle_proxy_pool_size: default_pool_size(),
middle_proxy_warm_standby: default_middle_proxy_warm_standby(),
me_init_retry_attempts: default_me_init_retry_attempts(),
me2dc_fallback: default_me2dc_fallback(),
me_keepalive_enabled: default_true(),
me_keepalive_interval_secs: default_keepalive_interval(),
me_keepalive_jitter_secs: default_keepalive_jitter(),
me_keepalive_payload_random: default_true(),
rpc_proxy_req_every: default_rpc_proxy_req_every(),
me_warmup_stagger_enabled: default_true(),
me_warmup_step_delay_ms: default_warmup_step_delay_ms(),
me_warmup_step_jitter_ms: default_warmup_step_jitter_ms(),
@@ -534,11 +758,23 @@ impl Default for GeneralConfig {
me_reconnect_backoff_base_ms: default_reconnect_backoff_base_ms(),
me_reconnect_backoff_cap_ms: default_reconnect_backoff_cap_ms(),
me_reconnect_fast_retry_count: default_me_reconnect_fast_retry_count(),
me_single_endpoint_shadow_writers: default_me_single_endpoint_shadow_writers(),
me_single_endpoint_outage_mode_enabled: default_me_single_endpoint_outage_mode_enabled(),
me_single_endpoint_outage_disable_quarantine: default_me_single_endpoint_outage_disable_quarantine(),
me_single_endpoint_outage_backoff_min_ms: default_me_single_endpoint_outage_backoff_min_ms(),
me_single_endpoint_outage_backoff_max_ms: default_me_single_endpoint_outage_backoff_max_ms(),
me_single_endpoint_shadow_rotate_every_secs: default_me_single_endpoint_shadow_rotate_every_secs(),
me_floor_mode: MeFloorMode::default(),
me_adaptive_floor_idle_secs: default_me_adaptive_floor_idle_secs(),
me_adaptive_floor_min_writers_single_endpoint: default_me_adaptive_floor_min_writers_single_endpoint(),
me_adaptive_floor_recover_grace_secs: default_me_adaptive_floor_recover_grace_secs(),
upstream_connect_retry_attempts: default_upstream_connect_retry_attempts(),
upstream_connect_retry_backoff_ms: default_upstream_connect_retry_backoff_ms(),
upstream_unhealthy_fail_threshold: default_upstream_unhealthy_fail_threshold(),
upstream_connect_failfast_hard_errors: default_upstream_connect_failfast_hard_errors(),
stun_iface_mismatch_ignore: false,
unknown_dc_log_path: default_unknown_dc_log_path(),
unknown_dc_file_log_enabled: default_unknown_dc_file_log_enabled(),
log_level: LogLevel::Normal,
disable_colors: false,
telemetry: TelemetryConfig::default(),
@@ -546,6 +782,10 @@ impl Default for GeneralConfig {
me_route_backpressure_base_timeout_ms: default_me_route_backpressure_base_timeout_ms(),
me_route_backpressure_high_timeout_ms: default_me_route_backpressure_high_timeout_ms(),
me_route_backpressure_high_watermark_pct: default_me_route_backpressure_high_watermark_pct(),
me_route_no_writer_mode: MeRouteNoWriterMode::default(),
me_route_no_writer_wait_ms: default_me_route_no_writer_wait_ms(),
me_route_inline_recovery_attempts: default_me_route_inline_recovery_attempts(),
me_route_inline_recovery_wait_ms: default_me_route_inline_recovery_wait_ms(),
links: LinksConfig::default(),
crypto_pending_buffer: default_crypto_pending_buffer(),
max_client_frame: default_max_client_frame(),
@@ -564,14 +804,24 @@ impl Default for GeneralConfig {
me_hardswap_warmup_pass_backoff_base_ms: default_me_hardswap_warmup_pass_backoff_base_ms(),
me_config_stable_snapshots: default_me_config_stable_snapshots(),
me_config_apply_cooldown_secs: default_me_config_apply_cooldown_secs(),
me_snapshot_require_http_2xx: default_me_snapshot_require_http_2xx(),
me_snapshot_reject_empty_map: default_me_snapshot_reject_empty_map(),
me_snapshot_min_proxy_for_lines: default_me_snapshot_min_proxy_for_lines(),
proxy_secret_stable_snapshots: default_proxy_secret_stable_snapshots(),
proxy_secret_rotate_runtime: default_proxy_secret_rotate_runtime(),
me_secret_atomic_snapshot: default_me_secret_atomic_snapshot(),
proxy_secret_len_max: default_proxy_secret_len_max(),
me_pool_drain_ttl_secs: default_me_pool_drain_ttl_secs(),
me_bind_stale_mode: MeBindStaleMode::default(),
me_bind_stale_ttl_secs: default_me_bind_stale_ttl_secs(),
me_pool_min_fresh_ratio: default_me_pool_min_fresh_ratio(),
me_reinit_drain_timeout_secs: default_me_reinit_drain_timeout_secs(),
proxy_secret_auto_reload_secs: default_proxy_secret_reload_secs(),
proxy_config_auto_reload_secs: default_proxy_config_reload_secs(),
me_reinit_singleflight: default_me_reinit_singleflight(),
me_reinit_trigger_channel: default_me_reinit_trigger_channel(),
me_reinit_coalesce_window_ms: default_me_reinit_coalesce_window_ms(),
me_deterministic_writer_sort: default_me_deterministic_writer_sort(),
ntp_check: default_ntp_check(),
ntp_servers: default_ntp_servers(),
auto_degradation_enabled: default_true(),
@@ -627,6 +877,58 @@ impl Default for LinksConfig {
}
}
/// API settings for control-plane endpoints.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ApiConfig {
/// Enable or disable REST API.
#[serde(default)]
pub enabled: bool,
/// Listen address for API in `IP:PORT` format.
#[serde(default = "default_api_listen")]
pub listen: String,
/// CIDR whitelist allowed to access API.
#[serde(default = "default_api_whitelist")]
pub whitelist: Vec<IpNetwork>,
/// Optional static value for `Authorization` header validation.
/// Empty string disables header auth.
#[serde(default)]
pub auth_header: String,
/// Maximum accepted HTTP request body size in bytes.
#[serde(default = "default_api_request_body_limit_bytes")]
pub request_body_limit_bytes: usize,
/// Enable runtime snapshots that require read-lock aggregation on API request path.
#[serde(default = "default_api_minimal_runtime_enabled")]
pub minimal_runtime_enabled: bool,
/// Cache TTL for minimal runtime snapshots in milliseconds (0 disables caching).
#[serde(default = "default_api_minimal_runtime_cache_ttl_ms")]
pub minimal_runtime_cache_ttl_ms: u64,
/// Read-only mode: mutating endpoints are rejected.
#[serde(default)]
pub read_only: bool,
}
impl Default for ApiConfig {
fn default() -> Self {
Self {
enabled: false,
listen: default_api_listen(),
whitelist: default_api_whitelist(),
auth_header: String::new(),
request_body_limit_bytes: default_api_request_body_limit_bytes(),
minimal_runtime_enabled: default_api_minimal_runtime_enabled(),
minimal_runtime_cache_ttl_ms: default_api_minimal_runtime_cache_ttl_ms(),
read_only: false,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ServerConfig {
#[serde(default = "default_port")]
@@ -662,6 +964,9 @@ pub struct ServerConfig {
#[serde(default = "default_metrics_whitelist")]
pub metrics_whitelist: Vec<IpNetwork>,
#[serde(default, alias = "admin_api")]
pub api: ApiConfig,
#[serde(default)]
pub listeners: Vec<ListenerConfig>,
}
@@ -678,6 +983,7 @@ impl Default for ServerConfig {
proxy_protocol: false,
metrics_port: None,
metrics_whitelist: default_metrics_whitelist(),
api: ApiConfig::default(),
listeners: Vec::new(),
}
}
@@ -807,6 +1113,10 @@ pub struct AccessConfig {
#[serde(default = "default_access_users")]
pub users: HashMap<String, String>,
/// Per-user ad_tag (32 hex chars from @MTProxybot).
#[serde(default)]
pub user_ad_tags: HashMap<String, String>,
#[serde(default)]
pub user_max_tcp_conns: HashMap<String, usize>,
@@ -819,6 +1129,12 @@ pub struct AccessConfig {
#[serde(default)]
pub user_max_unique_ips: HashMap<String, usize>,
#[serde(default)]
pub user_max_unique_ips_mode: UserMaxUniqueIpsMode,
#[serde(default = "default_user_max_unique_ips_window_secs")]
pub user_max_unique_ips_window_secs: u64,
#[serde(default = "default_replay_check_len")]
pub replay_check_len: usize,
@@ -833,10 +1149,13 @@ impl Default for AccessConfig {
fn default() -> Self {
Self {
users: default_access_users(),
user_ad_tags: HashMap::new(),
user_max_tcp_conns: HashMap::new(),
user_expirations: HashMap::new(),
user_data_quota: HashMap::new(),
user_max_unique_ips: HashMap::new(),
user_max_unique_ips_mode: UserMaxUniqueIpsMode::default(),
user_max_unique_ips_window_secs: default_user_max_unique_ips_window_secs(),
replay_check_len: default_replay_check_len(),
replay_window_secs: default_replay_window_secs(),
ignore_time_skew: false,

View File

@@ -1,252 +1,278 @@
// src/ip_tracker.rs
// IP address tracking and limiting for users
// IP address tracking and per-user unique IP limiting.
#![allow(dead_code)]
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::net::IpAddr;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::RwLock;
/// Трекер уникальных IP-адресов для каждого пользователя MTProxy
///
/// Предоставляет thread-safe механизм для:
/// - Отслеживания активных IP-адресов каждого пользователя
/// - Ограничения количества уникальных IP на пользователя
/// - Автоматической очистки при отключении клиентов
use crate::config::UserMaxUniqueIpsMode;
#[derive(Debug, Clone)]
pub struct UserIpTracker {
/// Маппинг: Имя пользователя -> Множество активных IP-адресов
active_ips: Arc<RwLock<HashMap<String, HashSet<IpAddr>>>>,
/// Маппинг: Имя пользователя -> Максимально разрешенное количество уникальных IP
active_ips: Arc<RwLock<HashMap<String, HashMap<IpAddr, usize>>>>,
recent_ips: Arc<RwLock<HashMap<String, HashMap<IpAddr, Instant>>>>,
max_ips: Arc<RwLock<HashMap<String, usize>>>,
limit_mode: Arc<RwLock<UserMaxUniqueIpsMode>>,
limit_window: Arc<RwLock<Duration>>,
}
impl UserIpTracker {
/// Создать новый пустой трекер
pub fn new() -> Self {
Self {
active_ips: Arc::new(RwLock::new(HashMap::new())),
recent_ips: Arc::new(RwLock::new(HashMap::new())),
max_ips: Arc::new(RwLock::new(HashMap::new())),
limit_mode: Arc::new(RwLock::new(UserMaxUniqueIpsMode::ActiveWindow)),
limit_window: Arc::new(RwLock::new(Duration::from_secs(30))),
}
}
/// Установить лимит уникальных IP для конкретного пользователя
///
/// # Arguments
/// * `username` - Имя пользователя
/// * `max_ips` - Максимальное количество одновременно активных IP-адресов
pub async fn set_limit_policy(&self, mode: UserMaxUniqueIpsMode, window_secs: u64) {
{
let mut current_mode = self.limit_mode.write().await;
*current_mode = mode;
}
let mut current_window = self.limit_window.write().await;
*current_window = Duration::from_secs(window_secs.max(1));
}
pub async fn set_user_limit(&self, username: &str, max_ips: usize) {
let mut limits = self.max_ips.write().await;
limits.insert(username.to_string(), max_ips);
}
/// Загрузить лимиты из конфигурации
///
/// # Arguments
/// * `limits` - HashMap с лимитами из config.toml
pub async fn load_limits(&self, limits: &HashMap<String, usize>) {
let mut max_ips = self.max_ips.write().await;
for (user, limit) in limits {
max_ips.insert(user.clone(), *limit);
}
pub async fn remove_user_limit(&self, username: &str) {
let mut limits = self.max_ips.write().await;
limits.remove(username);
}
pub async fn load_limits(&self, limits: &HashMap<String, usize>) {
let mut max_ips = self.max_ips.write().await;
max_ips.clone_from(limits);
}
fn prune_recent(user_recent: &mut HashMap<IpAddr, Instant>, now: Instant, window: Duration) {
if user_recent.is_empty() {
return;
}
user_recent.retain(|_, seen_at| now.duration_since(*seen_at) <= window);
}
/// Проверить, может ли пользователь подключиться с данного IP-адреса
/// и добавить IP в список активных, если проверка успешна
///
/// # Arguments
/// * `username` - Имя пользователя
/// * `ip` - IP-адрес клиента
///
/// # Returns
/// * `Ok(())` - Подключение разрешено, IP добавлен в активные
/// * `Err(String)` - Подключение отклонено с описанием причины
pub async fn check_and_add(&self, username: &str, ip: IpAddr) -> Result<(), String> {
// Получаем лимит для пользователя
let max_ips = self.max_ips.read().await;
let limit = match max_ips.get(username) {
Some(limit) => *limit,
None => {
// Если лимит не задан - разрешаем безлимитный доступ
drop(max_ips);
let mut active_ips = self.active_ips.write().await;
let user_ips = active_ips
.entry(username.to_string())
.or_insert_with(HashSet::new);
user_ips.insert(ip);
return Ok(());
}
let limit = {
let max_ips = self.max_ips.read().await;
max_ips.get(username).copied()
};
drop(max_ips);
let mode = *self.limit_mode.read().await;
let window = *self.limit_window.read().await;
let now = Instant::now();
// Проверяем и обновляем активные IP
let mut active_ips = self.active_ips.write().await;
let user_ips = active_ips
let user_active = active_ips
.entry(username.to_string())
.or_insert_with(HashSet::new);
.or_insert_with(HashMap::new);
// Если IP уже есть в списке - это повторное подключение, разрешаем
if user_ips.contains(&ip) {
let mut recent_ips = self.recent_ips.write().await;
let user_recent = recent_ips
.entry(username.to_string())
.or_insert_with(HashMap::new);
Self::prune_recent(user_recent, now, window);
if let Some(count) = user_active.get_mut(&ip) {
*count = count.saturating_add(1);
user_recent.insert(ip, now);
return Ok(());
}
// Проверяем, не превышен ли лимит
if user_ips.len() >= limit {
return Err(format!(
"IP limit reached for user '{}': {}/{} unique IPs already connected",
username,
user_ips.len(),
limit
));
if let Some(limit) = limit {
let active_limit_reached = user_active.len() >= limit;
let recent_limit_reached = user_recent.len() >= limit;
let deny = match mode {
UserMaxUniqueIpsMode::ActiveWindow => active_limit_reached,
UserMaxUniqueIpsMode::TimeWindow => recent_limit_reached,
UserMaxUniqueIpsMode::Combined => active_limit_reached || recent_limit_reached,
};
if deny {
return Err(format!(
"IP limit reached for user '{}': active={}/{} recent={}/{} mode={:?}",
username,
user_active.len(),
limit,
user_recent.len(),
limit,
mode
));
}
}
// Лимит не превышен - добавляем новый IP
user_ips.insert(ip);
user_active.insert(ip, 1);
user_recent.insert(ip, now);
Ok(())
}
/// Удалить IP-адрес из списка активных при отключении клиента
///
/// # Arguments
/// * `username` - Имя пользователя
/// * `ip` - IP-адрес отключившегося клиента
pub async fn remove_ip(&self, username: &str, ip: IpAddr) {
let mut active_ips = self.active_ips.write().await;
if let Some(user_ips) = active_ips.get_mut(username) {
user_ips.remove(&ip);
// Если у пользователя не осталось активных IP - удаляем запись
// для экономии памяти
if let Some(count) = user_ips.get_mut(&ip) {
if *count > 1 {
*count -= 1;
} else {
user_ips.remove(&ip);
}
}
if user_ips.is_empty() {
active_ips.remove(username);
}
}
}
/// Получить текущее количество активных IP-адресов для пользователя
///
/// # Arguments
/// * `username` - Имя пользователя
///
/// # Returns
/// Количество уникальных активных IP-адресов
pub async fn get_active_ip_count(&self, username: &str) -> usize {
let active_ips = self.active_ips.read().await;
active_ips
.get(username)
.map(|ips| ips.len())
.unwrap_or(0)
pub async fn get_recent_counts_for_users(&self, users: &[String]) -> HashMap<String, usize> {
let window = *self.limit_window.read().await;
let now = Instant::now();
let recent_ips = self.recent_ips.read().await;
let mut counts = HashMap::with_capacity(users.len());
for user in users {
let count = if let Some(user_recent) = recent_ips.get(user) {
user_recent
.values()
.filter(|seen_at| now.duration_since(**seen_at) <= window)
.count()
} else {
0
};
counts.insert(user.clone(), count);
}
counts
}
pub async fn get_active_ips_for_users(&self, users: &[String]) -> HashMap<String, Vec<IpAddr>> {
let active_ips = self.active_ips.read().await;
let mut out = HashMap::with_capacity(users.len());
for user in users {
let mut ips = active_ips
.get(user)
.map(|per_ip| per_ip.keys().copied().collect::<Vec<_>>())
.unwrap_or_else(Vec::new);
ips.sort();
out.insert(user.clone(), ips);
}
out
}
pub async fn get_recent_ips_for_users(&self, users: &[String]) -> HashMap<String, Vec<IpAddr>> {
let window = *self.limit_window.read().await;
let now = Instant::now();
let recent_ips = self.recent_ips.read().await;
let mut out = HashMap::with_capacity(users.len());
for user in users {
let mut ips = if let Some(user_recent) = recent_ips.get(user) {
user_recent
.iter()
.filter(|(_, seen_at)| now.duration_since(**seen_at) <= window)
.map(|(ip, _)| *ip)
.collect::<Vec<_>>()
} else {
Vec::new()
};
ips.sort();
out.insert(user.clone(), ips);
}
out
}
pub async fn get_active_ip_count(&self, username: &str) -> usize {
let active_ips = self.active_ips.read().await;
active_ips.get(username).map(|ips| ips.len()).unwrap_or(0)
}
/// Получить список всех активных IP-адресов для пользователя
///
/// # Arguments
/// * `username` - Имя пользователя
///
/// # Returns
/// Вектор с активными IP-адресами
pub async fn get_active_ips(&self, username: &str) -> Vec<IpAddr> {
let active_ips = self.active_ips.read().await;
active_ips
.get(username)
.map(|ips| ips.iter().copied().collect())
.map(|ips| ips.keys().copied().collect())
.unwrap_or_else(Vec::new)
}
/// Получить статистику по всем пользователям
///
/// # Returns
/// Вектор кортежей: (имя_пользователя, количество_активных_IP, лимит)
pub async fn get_stats(&self) -> Vec<(String, usize, usize)> {
let active_ips = self.active_ips.read().await;
let max_ips = self.max_ips.read().await;
let mut stats = Vec::new();
// Собираем статистику по пользователям с активными подключениями
for (username, user_ips) in active_ips.iter() {
let limit = max_ips.get(username).copied().unwrap_or(0);
stats.push((username.clone(), user_ips.len(), limit));
}
stats.sort_by(|a, b| a.0.cmp(&b.0)); // Сортируем по имени пользователя
stats.sort_by(|a, b| a.0.cmp(&b.0));
stats
}
/// Очистить все активные IP для пользователя (при необходимости)
///
/// # Arguments
/// * `username` - Имя пользователя
pub async fn clear_user_ips(&self, username: &str) {
let mut active_ips = self.active_ips.write().await;
active_ips.remove(username);
drop(active_ips);
let mut recent_ips = self.recent_ips.write().await;
recent_ips.remove(username);
}
/// Очистить всю статистику (использовать с осторожностью!)
pub async fn clear_all(&self) {
let mut active_ips = self.active_ips.write().await;
active_ips.clear();
drop(active_ips);
let mut recent_ips = self.recent_ips.write().await;
recent_ips.clear();
}
/// Проверить, подключен ли пользователь с данного IP
///
/// # Arguments
/// * `username` - Имя пользователя
/// * `ip` - IP-адрес для проверки
///
/// # Returns
/// `true` если IP активен, `false` если нет
pub async fn is_ip_active(&self, username: &str, ip: IpAddr) -> bool {
let active_ips = self.active_ips.read().await;
active_ips
.get(username)
.map(|ips| ips.contains(&ip))
.map(|ips| ips.contains_key(&ip))
.unwrap_or(false)
}
/// Получить лимит для пользователя
///
/// # Arguments
/// * `username` - Имя пользователя
///
/// # Returns
/// Лимит IP-адресов или None, если лимит не установлен
pub async fn get_user_limit(&self, username: &str) -> Option<usize> {
let max_ips = self.max_ips.read().await;
max_ips.get(username).copied()
}
/// Форматировать статистику в читаемый текст
///
/// # Returns
/// Строка со статистикой для логов или мониторинга
pub async fn format_stats(&self) -> String {
let stats = self.get_stats().await;
if stats.is_empty() {
return String::from("No active users");
}
let mut output = String::from("User IP Statistics:\n");
output.push_str("==================\n");
for (username, active_count, limit) in stats {
output.push_str(&format!(
"User: {:<20} Active IPs: {}/{}\n",
username,
active_count,
if limit > 0 { limit.to_string() } else { "unlimited".to_string() }
if limit > 0 {
limit.to_string()
} else {
"unlimited".to_string()
}
));
let ips = self.get_active_ips(&username).await;
for ip in ips {
output.push_str(&format!(" └─ {}\n", ip));
output.push_str(&format!(" - {}\n", ip));
}
}
output
}
}
@@ -257,10 +283,6 @@ impl Default for UserIpTracker {
}
}
// ============================================================================
// ТЕСТЫ
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
@@ -283,17 +305,33 @@ mod tests {
let ip2 = test_ipv4(192, 168, 1, 2);
let ip3 = test_ipv4(192, 168, 1, 3);
// Первые два IP должны быть приняты
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert!(tracker.check_and_add("test_user", ip2).await.is_ok());
// Третий IP должен быть отклонен
assert!(tracker.check_and_add("test_user", ip3).await.is_err());
// Проверяем счетчик
assert_eq!(tracker.get_active_ip_count("test_user").await, 2);
}
#[tokio::test]
async fn test_active_window_rejects_new_ip_and_keeps_existing_session() {
let tracker = UserIpTracker::new();
tracker.set_user_limit("test_user", 1).await;
tracker
.set_limit_policy(UserMaxUniqueIpsMode::ActiveWindow, 30)
.await;
let ip1 = test_ipv4(10, 10, 10, 1);
let ip2 = test_ipv4(10, 10, 10, 2);
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert!(tracker.is_ip_active("test_user", ip1).await);
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
// Existing session remains active; only new unique IP is denied.
assert!(tracker.is_ip_active("test_user", ip1).await);
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
}
#[tokio::test]
async fn test_reconnection_from_same_ip() {
let tracker = UserIpTracker::new();
@@ -301,16 +339,29 @@ mod tests {
let ip1 = test_ipv4(192, 168, 1, 1);
// Первое подключение
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
// Повторное подключение с того же IP должно пройти
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
// Счетчик не должен увеличиться
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
}
#[tokio::test]
async fn test_same_ip_disconnect_keeps_active_while_other_session_alive() {
let tracker = UserIpTracker::new();
tracker.set_user_limit("test_user", 2).await;
let ip1 = test_ipv4(192, 168, 1, 1);
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
tracker.remove_ip("test_user", ip1).await;
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
tracker.remove_ip("test_user", ip1).await;
assert_eq!(tracker.get_active_ip_count("test_user").await, 0);
}
#[tokio::test]
async fn test_ip_removal() {
let tracker = UserIpTracker::new();
@@ -320,36 +371,28 @@ mod tests {
let ip2 = test_ipv4(192, 168, 1, 2);
let ip3 = test_ipv4(192, 168, 1, 3);
// Добавляем два IP
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert!(tracker.check_and_add("test_user", ip2).await.is_ok());
// Третий не должен пройти
assert!(tracker.check_and_add("test_user", ip3).await.is_err());
// Удаляем первый IP
tracker.remove_ip("test_user", ip1).await;
// Теперь третий должен пройти
assert!(tracker.check_and_add("test_user", ip3).await.is_ok());
assert_eq!(tracker.get_active_ip_count("test_user").await, 2);
}
#[tokio::test]
async fn test_no_limit() {
let tracker = UserIpTracker::new();
// Не устанавливаем лимит для test_user
let ip1 = test_ipv4(192, 168, 1, 1);
let ip2 = test_ipv4(192, 168, 1, 2);
let ip3 = test_ipv4(192, 168, 1, 3);
// Без лимита все IP должны проходить
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert!(tracker.check_and_add("test_user", ip2).await.is_ok());
assert!(tracker.check_and_add("test_user", ip3).await.is_ok());
assert_eq!(tracker.get_active_ip_count("test_user").await, 3);
}
@@ -362,11 +405,9 @@ mod tests {
let ip1 = test_ipv4(192, 168, 1, 1);
let ip2 = test_ipv4(192, 168, 1, 2);
// user1 может использовать 2 IP
assert!(tracker.check_and_add("user1", ip1).await.is_ok());
assert!(tracker.check_and_add("user1", ip2).await.is_ok());
// user2 может использовать только 1 IP
assert!(tracker.check_and_add("user2", ip1).await.is_ok());
assert!(tracker.check_and_add("user2", ip2).await.is_err());
}
@@ -379,10 +420,9 @@ mod tests {
let ipv4 = test_ipv4(192, 168, 1, 1);
let ipv6 = test_ipv6();
// Должны работать оба типа адресов
assert!(tracker.check_and_add("test_user", ipv4).await.is_ok());
assert!(tracker.check_and_add("test_user", ipv6).await.is_ok());
assert_eq!(tracker.get_active_ip_count("test_user").await, 2);
}
@@ -417,8 +457,7 @@ mod tests {
let stats = tracker.get_stats().await;
assert_eq!(stats.len(), 2);
// Проверяем наличие обоих пользователей в статистике
assert!(stats.iter().any(|(name, _, _)| name == "user1"));
assert!(stats.iter().any(|(name, _, _)| name == "user2"));
}
@@ -427,10 +466,10 @@ mod tests {
async fn test_clear_user_ips() {
let tracker = UserIpTracker::new();
let ip1 = test_ipv4(192, 168, 1, 1);
tracker.check_and_add("test_user", ip1).await.unwrap();
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
tracker.clear_user_ips("test_user").await;
assert_eq!(tracker.get_active_ip_count("test_user").await, 0);
}
@@ -440,9 +479,9 @@ mod tests {
let tracker = UserIpTracker::new();
let ip1 = test_ipv4(192, 168, 1, 1);
let ip2 = test_ipv4(192, 168, 1, 2);
tracker.check_and_add("test_user", ip1).await.unwrap();
assert!(tracker.is_ip_active("test_user", ip1).await);
assert!(!tracker.is_ip_active("test_user", ip2).await);
}
@@ -450,15 +489,85 @@ mod tests {
#[tokio::test]
async fn test_load_limits_from_config() {
let tracker = UserIpTracker::new();
let mut config_limits = HashMap::new();
config_limits.insert("user1".to_string(), 5);
config_limits.insert("user2".to_string(), 3);
tracker.load_limits(&config_limits).await;
assert_eq!(tracker.get_user_limit("user1").await, Some(5));
assert_eq!(tracker.get_user_limit("user2").await, Some(3));
assert_eq!(tracker.get_user_limit("user3").await, None);
}
#[tokio::test]
async fn test_load_limits_replaces_previous_map() {
let tracker = UserIpTracker::new();
let mut first = HashMap::new();
first.insert("user1".to_string(), 2);
first.insert("user2".to_string(), 3);
tracker.load_limits(&first).await;
let mut second = HashMap::new();
second.insert("user2".to_string(), 5);
tracker.load_limits(&second).await;
assert_eq!(tracker.get_user_limit("user1").await, None);
assert_eq!(tracker.get_user_limit("user2").await, Some(5));
}
#[tokio::test]
async fn test_time_window_mode_blocks_recent_ip_churn() {
let tracker = UserIpTracker::new();
tracker.set_user_limit("test_user", 1).await;
tracker
.set_limit_policy(UserMaxUniqueIpsMode::TimeWindow, 30)
.await;
let ip1 = test_ipv4(10, 0, 0, 1);
let ip2 = test_ipv4(10, 0, 0, 2);
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
tracker.remove_ip("test_user", ip1).await;
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
}
#[tokio::test]
async fn test_combined_mode_enforces_active_and_recent_limits() {
let tracker = UserIpTracker::new();
tracker.set_user_limit("test_user", 1).await;
tracker
.set_limit_policy(UserMaxUniqueIpsMode::Combined, 30)
.await;
let ip1 = test_ipv4(10, 0, 1, 1);
let ip2 = test_ipv4(10, 0, 1, 2);
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
tracker.remove_ip("test_user", ip1).await;
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
}
#[tokio::test]
async fn test_time_window_expires() {
let tracker = UserIpTracker::new();
tracker.set_user_limit("test_user", 1).await;
tracker
.set_limit_policy(UserMaxUniqueIpsMode::TimeWindow, 1)
.await;
let ip1 = test_ipv4(10, 1, 0, 1);
let ip2 = test_ipv4(10, 1, 0, 2);
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
tracker.remove_ip("test_user", ip1).await;
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
tokio::time::sleep(Duration::from_millis(1100)).await;
assert!(tracker.check_and_add("test_user", ip2).await.is_ok());
}
}

View File

@@ -4,17 +4,18 @@
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use rand::Rng;
use tokio::net::TcpListener;
use tokio::signal;
use tokio::sync::Semaphore;
use tokio::sync::{Semaphore, mpsc};
use tracing::{debug, error, info, warn};
use tracing_subscriber::{EnvFilter, fmt, prelude::*, reload};
#[cfg(unix)]
use tokio::net::UnixListener;
mod cli;
mod api;
mod config;
mod crypto;
mod error;
@@ -40,8 +41,9 @@ use crate::stats::telemetry::TelemetryPolicy;
use crate::stats::{ReplayChecker, Stats};
use crate::stream::BufferPool;
use crate::transport::middle_proxy::{
MePool, fetch_proxy_config, run_me_ping, MePingFamily, MePingSample, format_sample_line,
format_me_route,
MePool, ProxyConfigData, fetch_proxy_config_with_raw, format_me_route, format_sample_line,
load_proxy_config_cache, run_me_ping, save_proxy_config_cache, MePingFamily, MePingSample,
MeReinitTrigger,
};
use crate::transport::{ListenOptions, UpstreamManager, create_listener, find_listener_processes};
use crate::tls_front::TlsFrontCache;
@@ -171,8 +173,191 @@ async fn write_beobachten_snapshot(path: &str, payload: &str) -> std::io::Result
tokio::fs::write(path, payload).await
}
fn unit_label(value: u64, singular: &'static str, plural: &'static str) -> &'static str {
if value == 1 { singular } else { plural }
}
fn format_uptime(total_secs: u64) -> String {
const SECS_PER_MINUTE: u64 = 60;
const SECS_PER_HOUR: u64 = 60 * SECS_PER_MINUTE;
const SECS_PER_DAY: u64 = 24 * SECS_PER_HOUR;
const SECS_PER_MONTH: u64 = 30 * SECS_PER_DAY;
const SECS_PER_YEAR: u64 = 12 * SECS_PER_MONTH;
let mut remaining = total_secs;
let years = remaining / SECS_PER_YEAR;
remaining %= SECS_PER_YEAR;
let months = remaining / SECS_PER_MONTH;
remaining %= SECS_PER_MONTH;
let days = remaining / SECS_PER_DAY;
remaining %= SECS_PER_DAY;
let hours = remaining / SECS_PER_HOUR;
remaining %= SECS_PER_HOUR;
let minutes = remaining / SECS_PER_MINUTE;
let seconds = remaining % SECS_PER_MINUTE;
let mut parts = Vec::new();
if total_secs > SECS_PER_YEAR {
parts.push(format!(
"{} {}",
years,
unit_label(years, "year", "years")
));
}
if total_secs > SECS_PER_MONTH {
parts.push(format!(
"{} {}",
months,
unit_label(months, "month", "months")
));
}
if total_secs > SECS_PER_DAY {
parts.push(format!(
"{} {}",
days,
unit_label(days, "day", "days")
));
}
if total_secs > SECS_PER_HOUR {
parts.push(format!(
"{} {}",
hours,
unit_label(hours, "hour", "hours")
));
}
if total_secs > SECS_PER_MINUTE {
parts.push(format!(
"{} {}",
minutes,
unit_label(minutes, "minute", "minutes")
));
}
parts.push(format!(
"{} {}",
seconds,
unit_label(seconds, "second", "seconds")
));
format!("{} / {} seconds", parts.join(", "), total_secs)
}
async fn load_startup_proxy_config_snapshot(
url: &str,
cache_path: Option<&str>,
me2dc_fallback: bool,
label: &'static str,
) -> Option<ProxyConfigData> {
loop {
match fetch_proxy_config_with_raw(url).await {
Ok((cfg, raw)) => {
if !cfg.map.is_empty() {
if let Some(path) = cache_path
&& let Err(e) = save_proxy_config_cache(path, &raw).await
{
warn!(error = %e, path, snapshot = label, "Failed to store startup proxy-config cache");
}
return Some(cfg);
}
warn!(snapshot = label, url, "Startup proxy-config is empty; trying disk cache");
if let Some(path) = cache_path {
match load_proxy_config_cache(path).await {
Ok(cached) if !cached.map.is_empty() => {
info!(
snapshot = label,
path,
proxy_for_lines = cached.proxy_for_lines,
"Loaded startup proxy-config from disk cache"
);
return Some(cached);
}
Ok(_) => {
warn!(
snapshot = label,
path,
"Startup proxy-config cache is empty; ignoring cache file"
);
}
Err(cache_err) => {
debug!(
snapshot = label,
path,
error = %cache_err,
"Startup proxy-config cache unavailable"
);
}
}
}
if me2dc_fallback {
error!(
snapshot = label,
"Startup proxy-config unavailable and no saved config found; falling back to direct mode"
);
return None;
}
warn!(
snapshot = label,
retry_in_secs = 2,
"Startup proxy-config unavailable and no saved config found; retrying because me2dc_fallback=false"
);
tokio::time::sleep(Duration::from_secs(2)).await;
}
Err(fetch_err) => {
if let Some(path) = cache_path {
match load_proxy_config_cache(path).await {
Ok(cached) if !cached.map.is_empty() => {
info!(
snapshot = label,
path,
proxy_for_lines = cached.proxy_for_lines,
"Loaded startup proxy-config from disk cache"
);
return Some(cached);
}
Ok(_) => {
warn!(
snapshot = label,
path,
"Startup proxy-config cache is empty; ignoring cache file"
);
}
Err(cache_err) => {
debug!(
snapshot = label,
path,
error = %cache_err,
"Startup proxy-config cache unavailable"
);
}
}
}
if me2dc_fallback {
error!(
snapshot = label,
error = %fetch_err,
"Startup proxy-config unavailable and no cached data; falling back to direct mode"
);
return None;
}
warn!(
snapshot = label,
error = %fetch_err,
retry_in_secs = 2,
"Startup proxy-config unavailable; retrying because me2dc_fallback=false"
);
tokio::time::sleep(Duration::from_secs(2)).await;
}
}
}
}
#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let process_started_at = Instant::now();
let (config_path, cli_silent, cli_log_level) = parse_cli();
let mut config = match ProxyConfig::load(&config_path) {
@@ -261,11 +446,16 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
warn!("Using default tls_domain. Consider setting a custom domain.");
}
let stats = Arc::new(Stats::new());
stats.apply_telemetry_policy(TelemetryPolicy::from_config(&config.general.telemetry));
let upstream_manager = Arc::new(UpstreamManager::new(
config.upstreams.clone(),
config.general.upstream_connect_retry_attempts,
config.general.upstream_connect_retry_backoff_ms,
config.general.upstream_unhealthy_fail_threshold,
config.general.upstream_connect_failfast_hard_errors,
stats.clone(),
));
let mut tls_domains = Vec::with_capacity(1 + config.censorship.tls_domains.len());
@@ -410,15 +600,19 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
log_probe_result(&probe, &decision);
let prefer_ipv6 = decision.prefer_ipv6();
let mut use_middle_proxy = config.general.use_middle_proxy && (decision.ipv4_me || decision.ipv6_me);
let stats = Arc::new(Stats::new());
stats.apply_telemetry_policy(TelemetryPolicy::from_config(&config.general.telemetry));
let mut use_middle_proxy = config.general.use_middle_proxy;
let beobachten = Arc::new(BeobachtenStore::new());
let rng = Arc::new(SecureRandom::new());
// IP Tracker initialization
let ip_tracker = Arc::new(UserIpTracker::new());
ip_tracker.load_limits(&config.access.user_max_unique_ips).await;
ip_tracker
.set_limit_policy(
config.access.user_max_unique_ips_mode,
config.access.user_max_unique_ips_window_secs,
)
.await;
if !config.access.user_max_unique_ips.is_empty() {
info!("IP limits configured for {} users", config.access.user_max_unique_ips.len());
@@ -433,9 +627,18 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
// Connection concurrency limit
let max_connections = Arc::new(Semaphore::new(10_000));
let me2dc_fallback = config.general.me2dc_fallback;
let me_init_retry_attempts = config.general.me_init_retry_attempts;
let me_init_warn_after_attempts: u32 = 3;
if use_middle_proxy && !decision.ipv4_me && !decision.ipv6_me {
warn!("No usable IP family for Middle Proxy detected; falling back to direct DC");
use_middle_proxy = false;
if me2dc_fallback {
warn!("No usable IP family for Middle Proxy detected; falling back to direct DC");
use_middle_proxy = false;
} else {
warn!(
"No usable IP family for Middle Proxy detected; me2dc_fallback=false, ME init retries stay active"
);
}
}
// =====================================================================
@@ -448,7 +651,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
info!("Middle-proxy STUN probing disabled by network.stun_use=false");
}
// ad_tag (proxy_tag) for advertising
// Global ad_tag (pool default). Used when user has no per-user tag in access.user_ad_tags.
let proxy_tag = config
.general
.ad_tag
@@ -465,13 +668,35 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
// proxy-secret is from: https://core.telegram.org/getProxySecret
// =============================================================
let proxy_secret_path = config.general.proxy_secret_path.as_deref();
match crate::transport::middle_proxy::fetch_proxy_secret(
proxy_secret_path,
config.general.proxy_secret_len_max,
)
.await
{
Ok(proxy_secret) => {
let pool_size = config.general.middle_proxy_pool_size.max(1);
let proxy_secret = loop {
match crate::transport::middle_proxy::fetch_proxy_secret(
proxy_secret_path,
config.general.proxy_secret_len_max,
)
.await
{
Ok(proxy_secret) => break Some(proxy_secret),
Err(e) => {
if me2dc_fallback {
error!(
error = %e,
"ME startup failed: proxy-secret is unavailable and no saved secret found; falling back to direct mode"
);
break None;
}
warn!(
error = %e,
retry_in_secs = 2,
"ME startup failed: proxy-secret is unavailable and no saved secret found; retrying because me2dc_fallback=false"
);
tokio::time::sleep(Duration::from_secs(2)).await;
}
}
};
match proxy_secret {
Some(proxy_secret) => {
info!(
secret_len = proxy_secret.len(),
key_sig = format_args!(
@@ -490,103 +715,153 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
"Proxy-secret loaded"
);
// Load ME config (v4/v6) + default DC
let mut cfg_v4 = fetch_proxy_config(
let cfg_v4 = load_startup_proxy_config_snapshot(
"https://core.telegram.org/getProxyConfig",
config.general.proxy_config_v4_cache_path.as_deref(),
me2dc_fallback,
"getProxyConfig",
)
.await
.unwrap_or_default();
let mut cfg_v6 = fetch_proxy_config(
.await;
let cfg_v6 = load_startup_proxy_config_snapshot(
"https://core.telegram.org/getProxyConfigV6",
config.general.proxy_config_v6_cache_path.as_deref(),
me2dc_fallback,
"getProxyConfigV6",
)
.await
.unwrap_or_default();
.await;
if cfg_v4.map.is_empty() {
cfg_v4.map = crate::protocol::constants::TG_MIDDLE_PROXIES_V4.clone();
}
if cfg_v6.map.is_empty() {
cfg_v6.map = crate::protocol::constants::TG_MIDDLE_PROXIES_V6.clone();
}
if let (Some(cfg_v4), Some(cfg_v6)) = (cfg_v4, cfg_v6) {
let pool = MePool::new(
proxy_tag.clone(),
proxy_secret,
config.general.middle_proxy_nat_ip,
me_nat_probe,
None,
config.network.stun_servers.clone(),
config.general.stun_nat_probe_concurrency,
probe.detected_ipv6,
config.timeouts.me_one_retry,
config.timeouts.me_one_timeout_ms,
cfg_v4.map.clone(),
cfg_v6.map.clone(),
cfg_v4.default_dc.or(cfg_v6.default_dc),
decision.clone(),
Some(upstream_manager.clone()),
rng.clone(),
stats.clone(),
config.general.me_keepalive_enabled,
config.general.me_keepalive_interval_secs,
config.general.me_keepalive_jitter_secs,
config.general.me_keepalive_payload_random,
config.general.rpc_proxy_req_every,
config.general.me_warmup_stagger_enabled,
config.general.me_warmup_step_delay_ms,
config.general.me_warmup_step_jitter_ms,
config.general.me_reconnect_max_concurrent_per_dc,
config.general.me_reconnect_backoff_base_ms,
config.general.me_reconnect_backoff_cap_ms,
config.general.me_reconnect_fast_retry_count,
config.general.me_single_endpoint_shadow_writers,
config.general.me_single_endpoint_outage_mode_enabled,
config.general.me_single_endpoint_outage_disable_quarantine,
config.general.me_single_endpoint_outage_backoff_min_ms,
config.general.me_single_endpoint_outage_backoff_max_ms,
config.general.me_single_endpoint_shadow_rotate_every_secs,
config.general.me_floor_mode,
config.general.me_adaptive_floor_idle_secs,
config.general.me_adaptive_floor_min_writers_single_endpoint,
config.general.me_adaptive_floor_recover_grace_secs,
config.general.hardswap,
config.general.me_pool_drain_ttl_secs,
config.general.effective_me_pool_force_close_secs(),
config.general.me_pool_min_fresh_ratio,
config.general.me_hardswap_warmup_delay_min_ms,
config.general.me_hardswap_warmup_delay_max_ms,
config.general.me_hardswap_warmup_extra_passes,
config.general.me_hardswap_warmup_pass_backoff_base_ms,
config.general.me_bind_stale_mode,
config.general.me_bind_stale_ttl_secs,
config.general.me_secret_atomic_snapshot,
config.general.me_deterministic_writer_sort,
config.general.me_socks_kdf_policy,
config.general.me_route_backpressure_base_timeout_ms,
config.general.me_route_backpressure_high_timeout_ms,
config.general.me_route_backpressure_high_watermark_pct,
config.general.me_route_no_writer_mode,
config.general.me_route_no_writer_wait_ms,
config.general.me_route_inline_recovery_attempts,
config.general.me_route_inline_recovery_wait_ms,
);
let pool = MePool::new(
proxy_tag,
proxy_secret,
config.general.middle_proxy_nat_ip,
me_nat_probe,
None,
config.network.stun_servers.clone(),
config.general.stun_nat_probe_concurrency,
probe.detected_ipv6,
config.timeouts.me_one_retry,
config.timeouts.me_one_timeout_ms,
cfg_v4.map.clone(),
cfg_v6.map.clone(),
cfg_v4.default_dc.or(cfg_v6.default_dc),
decision.clone(),
Some(upstream_manager.clone()),
rng.clone(),
stats.clone(),
config.general.me_keepalive_enabled,
config.general.me_keepalive_interval_secs,
config.general.me_keepalive_jitter_secs,
config.general.me_keepalive_payload_random,
config.general.me_warmup_stagger_enabled,
config.general.me_warmup_step_delay_ms,
config.general.me_warmup_step_jitter_ms,
config.general.me_reconnect_max_concurrent_per_dc,
config.general.me_reconnect_backoff_base_ms,
config.general.me_reconnect_backoff_cap_ms,
config.general.me_reconnect_fast_retry_count,
config.general.hardswap,
config.general.me_pool_drain_ttl_secs,
config.general.effective_me_pool_force_close_secs(),
config.general.me_pool_min_fresh_ratio,
config.general.me_hardswap_warmup_delay_min_ms,
config.general.me_hardswap_warmup_delay_max_ms,
config.general.me_hardswap_warmup_extra_passes,
config.general.me_hardswap_warmup_pass_backoff_base_ms,
config.general.me_socks_kdf_policy,
config.general.me_route_backpressure_base_timeout_ms,
config.general.me_route_backpressure_high_timeout_ms,
config.general.me_route_backpressure_high_watermark_pct,
);
let mut init_attempt: u32 = 0;
loop {
init_attempt = init_attempt.saturating_add(1);
match pool.init(pool_size, &rng).await {
Ok(()) => {
info!(
attempt = init_attempt,
"Middle-End pool initialized successfully"
);
let pool_size = config.general.middle_proxy_pool_size.max(1);
loop {
match pool.init(pool_size, &rng).await {
Ok(()) => {
info!("Middle-End pool initialized successfully");
// Phase 4: Start health monitor
let pool_clone = pool.clone();
let rng_clone = rng.clone();
let min_conns = pool_size;
tokio::spawn(async move {
crate::transport::middle_proxy::me_health_monitor(
pool_clone, rng_clone, min_conns,
)
.await;
});
// Phase 4: Start health monitor
let pool_clone = pool.clone();
let rng_clone = rng.clone();
let min_conns = pool_size;
tokio::spawn(async move {
crate::transport::middle_proxy::me_health_monitor(
pool_clone, rng_clone, min_conns,
)
.await;
});
break Some(pool);
}
Err(e) => {
let retries_limited = me2dc_fallback && me_init_retry_attempts > 0;
if retries_limited && init_attempt >= me_init_retry_attempts {
error!(
error = %e,
attempt = init_attempt,
retry_limit = me_init_retry_attempts,
"ME pool init retries exhausted; falling back to direct mode"
);
break None;
}
break Some(pool);
}
Err(e) => {
warn!(
error = %e,
retry_in_secs = 2,
"ME pool is not ready yet; retrying startup initialization"
);
pool.reset_stun_state();
tokio::time::sleep(Duration::from_secs(2)).await;
let retry_limit = if !me2dc_fallback || me_init_retry_attempts == 0 {
String::from("unlimited")
} else {
me_init_retry_attempts.to_string()
};
if init_attempt >= me_init_warn_after_attempts {
warn!(
error = %e,
attempt = init_attempt,
retry_limit = retry_limit,
me2dc_fallback = me2dc_fallback,
retry_in_secs = 2,
"ME pool is not ready yet; retrying startup initialization"
);
} else {
info!(
error = %e,
attempt = init_attempt,
retry_limit = retry_limit,
me2dc_fallback = me2dc_fallback,
retry_in_secs = 2,
"ME pool startup warmup: retrying initialization"
);
}
pool.reset_stun_state();
tokio::time::sleep(Duration::from_secs(2)).await;
}
}
}
} else {
None
}
}
Err(e) => {
error!(error = %e, "Failed to fetch proxy-secret. Falling back to direct mode.");
None
}
None => None,
}
} else {
None
@@ -767,15 +1042,30 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
}
}
let initialized_secs = process_started_at.elapsed().as_secs();
let second_suffix = if initialized_secs == 1 { "" } else { "s" };
info!("===================== Telegram Startup =====================");
info!(
" DC/ME Initialized in {} second{}",
initialized_secs, second_suffix
);
info!("============================================================");
if let Some(ref pool) = me_pool {
pool.set_runtime_ready(true);
}
// Background tasks
let um_clone = upstream_manager.clone();
let decision_clone = decision.clone();
let dc_overrides_for_health = config.dc_overrides.clone();
tokio::spawn(async move {
um_clone
.run_health_checks(
prefer_ipv6,
decision_clone.ipv4_dc,
decision_clone.ipv6_dc,
dc_overrides_for_health,
)
.await;
});
@@ -826,6 +1116,51 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
}
});
let ip_tracker_policy = ip_tracker.clone();
let mut config_rx_ip_limits = config_rx.clone();
tokio::spawn(async move {
let mut prev_limits = config_rx_ip_limits
.borrow()
.access
.user_max_unique_ips
.clone();
let mut prev_mode = config_rx_ip_limits
.borrow()
.access
.user_max_unique_ips_mode;
let mut prev_window = config_rx_ip_limits
.borrow()
.access
.user_max_unique_ips_window_secs;
loop {
if config_rx_ip_limits.changed().await.is_err() {
break;
}
let cfg = config_rx_ip_limits.borrow_and_update().clone();
if prev_limits != cfg.access.user_max_unique_ips {
ip_tracker_policy
.load_limits(&cfg.access.user_max_unique_ips)
.await;
prev_limits = cfg.access.user_max_unique_ips.clone();
}
if prev_mode != cfg.access.user_max_unique_ips_mode
|| prev_window != cfg.access.user_max_unique_ips_window_secs
{
ip_tracker_policy
.set_limit_policy(
cfg.access.user_max_unique_ips_mode,
cfg.access.user_max_unique_ips_window_secs,
)
.await;
prev_mode = cfg.access.user_max_unique_ips_mode;
prev_window = cfg.access.user_max_unique_ips_window_secs;
}
}
});
let beobachten_writer = beobachten.clone();
let config_rx_beobachten = config_rx.clone();
tokio::spawn(async move {
@@ -847,26 +1182,43 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
});
if let Some(ref pool) = me_pool {
let pool_clone = pool.clone();
let rng_clone = rng.clone();
let config_rx_clone = config_rx.clone();
let reinit_trigger_capacity = config
.general
.me_reinit_trigger_channel
.max(1);
let (reinit_tx, reinit_rx) = mpsc::channel::<MeReinitTrigger>(reinit_trigger_capacity);
let pool_clone_sched = pool.clone();
let rng_clone_sched = rng.clone();
let config_rx_clone_sched = config_rx.clone();
tokio::spawn(async move {
crate::transport::middle_proxy::me_config_updater(
pool_clone,
rng_clone,
config_rx_clone,
crate::transport::middle_proxy::me_reinit_scheduler(
pool_clone_sched,
rng_clone_sched,
config_rx_clone_sched,
reinit_rx,
)
.await;
});
let pool_clone = pool.clone();
let config_rx_clone = config_rx.clone();
let reinit_tx_updater = reinit_tx.clone();
tokio::spawn(async move {
crate::transport::middle_proxy::me_config_updater(
pool_clone,
config_rx_clone,
reinit_tx_updater,
)
.await;
});
let pool_clone_rot = pool.clone();
let rng_clone_rot = rng.clone();
let config_rx_clone_rot = config_rx.clone();
let reinit_tx_rotation = reinit_tx.clone();
tokio::spawn(async move {
crate::transport::middle_proxy::me_rotation_task(
pool_clone_rot,
rng_clone_rot,
config_rx_clone_rot,
reinit_tx_rotation,
)
.await;
});
@@ -1115,6 +1467,44 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
});
}
if config.server.api.enabled {
let listen = match config.server.api.listen.parse::<SocketAddr>() {
Ok(listen) => listen,
Err(error) => {
warn!(
error = %error,
listen = %config.server.api.listen,
"Invalid server.api.listen; API is disabled"
);
SocketAddr::from(([127, 0, 0, 1], 0))
}
};
if listen.port() != 0 {
let stats = stats.clone();
let ip_tracker_api = ip_tracker.clone();
let me_pool_api = me_pool.clone();
let upstream_manager_api = upstream_manager.clone();
let config_rx_api = config_rx.clone();
let config_path_api = std::path::PathBuf::from(&config_path);
let startup_detected_ip_v4 = detected_ip_v4;
let startup_detected_ip_v6 = detected_ip_v6;
tokio::spawn(async move {
api::serve(
listen,
stats,
ip_tracker_api,
me_pool_api,
upstream_manager_api,
config_rx_api,
config_path_api,
startup_detected_ip_v4,
startup_detected_ip_v6,
)
.await;
});
}
}
for (listener, listener_proxy_protocol) in listeners {
let mut config_rx: tokio::sync::watch::Receiver<Arc<ProxyConfig>> = config_rx.clone();
let stats = stats.clone();
@@ -1218,7 +1608,36 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
}
match signal::ctrl_c().await {
Ok(()) => info!("Shutting down..."),
Ok(()) => {
let shutdown_started_at = Instant::now();
info!("Shutting down...");
let uptime_secs = process_started_at.elapsed().as_secs();
info!("Uptime: {}", format_uptime(uptime_secs));
if let Some(pool) = &me_pool {
match tokio::time::timeout(
Duration::from_secs(2),
pool.shutdown_send_close_conn_all(),
)
.await
{
Ok(total) => {
info!(
close_conn_sent = total,
"ME shutdown: RPC_CLOSE_CONN broadcast completed"
);
}
Err(_) => {
warn!("ME shutdown: RPC_CLOSE_CONN broadcast timed out");
}
}
}
let shutdown_secs = shutdown_started_at.elapsed().as_secs();
info!(
"Shutdown completed successfully in {} {}.",
shutdown_secs,
unit_label(shutdown_secs, "second", "seconds")
);
}
Err(e) => error!("Signal error: {}", e),
}

View File

@@ -202,6 +202,195 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
}
);
let _ = writeln!(
out,
"# HELP telemt_upstream_connect_attempt_total Upstream connect attempts across all requests"
);
let _ = writeln!(out, "# TYPE telemt_upstream_connect_attempt_total counter");
let _ = writeln!(
out,
"telemt_upstream_connect_attempt_total {}",
if core_enabled {
stats.get_upstream_connect_attempt_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_upstream_connect_success_total Successful upstream connect request cycles"
);
let _ = writeln!(out, "# TYPE telemt_upstream_connect_success_total counter");
let _ = writeln!(
out,
"telemt_upstream_connect_success_total {}",
if core_enabled {
stats.get_upstream_connect_success_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_upstream_connect_fail_total Failed upstream connect request cycles"
);
let _ = writeln!(out, "# TYPE telemt_upstream_connect_fail_total counter");
let _ = writeln!(
out,
"telemt_upstream_connect_fail_total {}",
if core_enabled {
stats.get_upstream_connect_fail_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_upstream_connect_failfast_hard_error_total Hard errors that triggered upstream connect failfast"
);
let _ = writeln!(
out,
"# TYPE telemt_upstream_connect_failfast_hard_error_total counter"
);
let _ = writeln!(
out,
"telemt_upstream_connect_failfast_hard_error_total {}",
if core_enabled {
stats.get_upstream_connect_failfast_hard_error_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_upstream_connect_attempts_per_request Histogram-like buckets for attempts per upstream connect request cycle"
);
let _ = writeln!(out, "# TYPE telemt_upstream_connect_attempts_per_request counter");
let _ = writeln!(
out,
"telemt_upstream_connect_attempts_per_request{{bucket=\"1\"}} {}",
if core_enabled {
stats.get_upstream_connect_attempts_bucket_1()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_attempts_per_request{{bucket=\"2\"}} {}",
if core_enabled {
stats.get_upstream_connect_attempts_bucket_2()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_attempts_per_request{{bucket=\"3_4\"}} {}",
if core_enabled {
stats.get_upstream_connect_attempts_bucket_3_4()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_attempts_per_request{{bucket=\"gt_4\"}} {}",
if core_enabled {
stats.get_upstream_connect_attempts_bucket_gt_4()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_upstream_connect_duration_success_total Histogram-like buckets of successful upstream connect cycle duration"
);
let _ = writeln!(out, "# TYPE telemt_upstream_connect_duration_success_total counter");
let _ = writeln!(
out,
"telemt_upstream_connect_duration_success_total{{bucket=\"le_100ms\"}} {}",
if core_enabled {
stats.get_upstream_connect_duration_success_bucket_le_100ms()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_duration_success_total{{bucket=\"101_500ms\"}} {}",
if core_enabled {
stats.get_upstream_connect_duration_success_bucket_101_500ms()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_duration_success_total{{bucket=\"501_1000ms\"}} {}",
if core_enabled {
stats.get_upstream_connect_duration_success_bucket_501_1000ms()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_duration_success_total{{bucket=\"gt_1000ms\"}} {}",
if core_enabled {
stats.get_upstream_connect_duration_success_bucket_gt_1000ms()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_upstream_connect_duration_fail_total Histogram-like buckets of failed upstream connect cycle duration"
);
let _ = writeln!(out, "# TYPE telemt_upstream_connect_duration_fail_total counter");
let _ = writeln!(
out,
"telemt_upstream_connect_duration_fail_total{{bucket=\"le_100ms\"}} {}",
if core_enabled {
stats.get_upstream_connect_duration_fail_bucket_le_100ms()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_duration_fail_total{{bucket=\"101_500ms\"}} {}",
if core_enabled {
stats.get_upstream_connect_duration_fail_bucket_101_500ms()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_duration_fail_total{{bucket=\"501_1000ms\"}} {}",
if core_enabled {
stats.get_upstream_connect_duration_fail_bucket_501_1000ms()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_upstream_connect_duration_fail_total{{bucket=\"gt_1000ms\"}} {}",
if core_enabled {
stats.get_upstream_connect_duration_fail_bucket_gt_1000ms()
} else {
0
}
);
let _ = writeln!(out, "# HELP telemt_me_keepalive_sent_total ME keepalive frames sent");
let _ = writeln!(out, "# TYPE telemt_me_keepalive_sent_total counter");
let _ = writeln!(
@@ -250,6 +439,93 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
}
);
let _ = writeln!(
out,
"# HELP telemt_me_rpc_proxy_req_signal_sent_total Service RPC_PROXY_REQ activity signals sent"
);
let _ = writeln!(out, "# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter");
let _ = writeln!(
out,
"telemt_me_rpc_proxy_req_signal_sent_total {}",
if me_allows_normal {
stats.get_me_rpc_proxy_req_signal_sent_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_rpc_proxy_req_signal_failed_total Service RPC_PROXY_REQ activity signal failures"
);
let _ = writeln!(
out,
"# TYPE telemt_me_rpc_proxy_req_signal_failed_total counter"
);
let _ = writeln!(
out,
"telemt_me_rpc_proxy_req_signal_failed_total {}",
if me_allows_normal {
stats.get_me_rpc_proxy_req_signal_failed_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_rpc_proxy_req_signal_skipped_no_meta_total Service RPC_PROXY_REQ skipped due to missing writer metadata"
);
let _ = writeln!(
out,
"# TYPE telemt_me_rpc_proxy_req_signal_skipped_no_meta_total counter"
);
let _ = writeln!(
out,
"telemt_me_rpc_proxy_req_signal_skipped_no_meta_total {}",
if me_allows_normal {
stats.get_me_rpc_proxy_req_signal_skipped_no_meta_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_rpc_proxy_req_signal_response_total Service RPC_PROXY_REQ responses observed"
);
let _ = writeln!(
out,
"# TYPE telemt_me_rpc_proxy_req_signal_response_total counter"
);
let _ = writeln!(
out,
"telemt_me_rpc_proxy_req_signal_response_total {}",
if me_allows_normal {
stats.get_me_rpc_proxy_req_signal_response_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_rpc_proxy_req_signal_close_sent_total Service RPC_CLOSE_EXT sent after activity signals"
);
let _ = writeln!(
out,
"# TYPE telemt_me_rpc_proxy_req_signal_close_sent_total counter"
);
let _ = writeln!(
out,
"telemt_me_rpc_proxy_req_signal_close_sent_total {}",
if me_allows_normal {
stats.get_me_rpc_proxy_req_signal_close_sent_total()
} else {
0
}
);
let _ = writeln!(out, "# HELP telemt_me_reconnect_attempts_total ME reconnect attempts");
let _ = writeln!(out, "# TYPE telemt_me_reconnect_attempts_total counter");
let _ = writeln!(
@@ -274,6 +550,58 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
}
);
let _ = writeln!(out, "# HELP telemt_me_handshake_reject_total ME handshake rejects from upstream");
let _ = writeln!(out, "# TYPE telemt_me_handshake_reject_total counter");
let _ = writeln!(
out,
"telemt_me_handshake_reject_total {}",
if me_allows_normal {
stats.get_me_handshake_reject_total()
} else {
0
}
);
let _ = writeln!(out, "# HELP telemt_me_handshake_error_code_total ME handshake reject errors by code");
let _ = writeln!(out, "# TYPE telemt_me_handshake_error_code_total counter");
if me_allows_normal {
for (error_code, count) in stats.get_me_handshake_error_code_counts() {
let _ = writeln!(
out,
"telemt_me_handshake_error_code_total{{error_code=\"{}\"}} {}",
error_code,
count
);
}
}
let _ = writeln!(out, "# HELP telemt_me_reader_eof_total ME reader EOF terminations");
let _ = writeln!(out, "# TYPE telemt_me_reader_eof_total counter");
let _ = writeln!(
out,
"telemt_me_reader_eof_total {}",
if me_allows_normal {
stats.get_me_reader_eof_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_idle_close_by_peer_total ME idle writers closed by peer"
);
let _ = writeln!(out, "# TYPE telemt_me_idle_close_by_peer_total counter");
let _ = writeln!(
out,
"telemt_me_idle_close_by_peer_total {}",
if me_allows_normal {
stats.get_me_idle_close_by_peer_total()
} else {
0
}
);
let _ = writeln!(out, "# HELP telemt_me_crc_mismatch_total ME CRC mismatches");
let _ = writeln!(out, "# TYPE telemt_me_crc_mismatch_total counter");
let _ = writeln!(
@@ -385,6 +713,262 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
}
);
let _ = writeln!(
out,
"# HELP telemt_me_endpoint_quarantine_total ME endpoint quarantines due to rapid flaps"
);
let _ = writeln!(out, "# TYPE telemt_me_endpoint_quarantine_total counter");
let _ = writeln!(
out,
"telemt_me_endpoint_quarantine_total {}",
if me_allows_normal {
stats.get_me_endpoint_quarantine_total()
} else {
0
}
);
let _ = writeln!(out, "# HELP telemt_me_kdf_drift_total ME KDF input drift detections");
let _ = writeln!(out, "# TYPE telemt_me_kdf_drift_total counter");
let _ = writeln!(
out,
"telemt_me_kdf_drift_total {}",
if me_allows_normal {
stats.get_me_kdf_drift_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_kdf_port_only_drift_total ME KDF client-port changes with stable non-port material"
);
let _ = writeln!(out, "# TYPE telemt_me_kdf_port_only_drift_total counter");
let _ = writeln!(
out,
"telemt_me_kdf_port_only_drift_total {}",
if me_allows_debug {
stats.get_me_kdf_port_only_drift_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_hardswap_pending_reuse_total Hardswap cycles that reused an existing pending generation"
);
let _ = writeln!(out, "# TYPE telemt_me_hardswap_pending_reuse_total counter");
let _ = writeln!(
out,
"telemt_me_hardswap_pending_reuse_total {}",
if me_allows_debug {
stats.get_me_hardswap_pending_reuse_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_hardswap_pending_ttl_expired_total Pending hardswap generations reset by TTL expiration"
);
let _ = writeln!(out, "# TYPE telemt_me_hardswap_pending_ttl_expired_total counter");
let _ = writeln!(
out,
"telemt_me_hardswap_pending_ttl_expired_total {}",
if me_allows_normal {
stats.get_me_hardswap_pending_ttl_expired_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_single_endpoint_outage_enter_total Single-endpoint DC outage transitions to active state"
);
let _ = writeln!(
out,
"# TYPE telemt_me_single_endpoint_outage_enter_total counter"
);
let _ = writeln!(
out,
"telemt_me_single_endpoint_outage_enter_total {}",
if me_allows_normal {
stats.get_me_single_endpoint_outage_enter_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_single_endpoint_outage_exit_total Single-endpoint DC outage recovery transitions"
);
let _ = writeln!(
out,
"# TYPE telemt_me_single_endpoint_outage_exit_total counter"
);
let _ = writeln!(
out,
"telemt_me_single_endpoint_outage_exit_total {}",
if me_allows_normal {
stats.get_me_single_endpoint_outage_exit_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_single_endpoint_outage_reconnect_attempt_total Reconnect attempts performed during single-endpoint outages"
);
let _ = writeln!(
out,
"# TYPE telemt_me_single_endpoint_outage_reconnect_attempt_total counter"
);
let _ = writeln!(
out,
"telemt_me_single_endpoint_outage_reconnect_attempt_total {}",
if me_allows_normal {
stats.get_me_single_endpoint_outage_reconnect_attempt_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_single_endpoint_outage_reconnect_success_total Successful reconnect attempts during single-endpoint outages"
);
let _ = writeln!(
out,
"# TYPE telemt_me_single_endpoint_outage_reconnect_success_total counter"
);
let _ = writeln!(
out,
"telemt_me_single_endpoint_outage_reconnect_success_total {}",
if me_allows_normal {
stats.get_me_single_endpoint_outage_reconnect_success_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_single_endpoint_quarantine_bypass_total Outage reconnect attempts that bypassed quarantine"
);
let _ = writeln!(
out,
"# TYPE telemt_me_single_endpoint_quarantine_bypass_total counter"
);
let _ = writeln!(
out,
"telemt_me_single_endpoint_quarantine_bypass_total {}",
if me_allows_normal {
stats.get_me_single_endpoint_quarantine_bypass_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_single_endpoint_shadow_rotate_total Successful periodic shadow rotations for single-endpoint DC groups"
);
let _ = writeln!(
out,
"# TYPE telemt_me_single_endpoint_shadow_rotate_total counter"
);
let _ = writeln!(
out,
"telemt_me_single_endpoint_shadow_rotate_total {}",
if me_allows_normal {
stats.get_me_single_endpoint_shadow_rotate_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_single_endpoint_shadow_rotate_skipped_quarantine_total Shadow rotations skipped because endpoint is quarantined"
);
let _ = writeln!(
out,
"# TYPE telemt_me_single_endpoint_shadow_rotate_skipped_quarantine_total counter"
);
let _ = writeln!(
out,
"telemt_me_single_endpoint_shadow_rotate_skipped_quarantine_total {}",
if me_allows_normal {
stats.get_me_single_endpoint_shadow_rotate_skipped_quarantine_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_floor_mode Runtime ME writer floor policy mode"
);
let _ = writeln!(out, "# TYPE telemt_me_floor_mode gauge");
let floor_mode = config.general.me_floor_mode;
let _ = writeln!(
out,
"telemt_me_floor_mode{{mode=\"static\"}} {}",
if matches!(floor_mode, crate::config::MeFloorMode::Static) {
1
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_floor_mode{{mode=\"adaptive\"}} {}",
if matches!(floor_mode, crate::config::MeFloorMode::Adaptive) {
1
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_floor_mode_switch_all_total Runtime ME floor mode switches"
);
let _ = writeln!(out, "# TYPE telemt_me_floor_mode_switch_all_total counter");
let _ = writeln!(
out,
"telemt_me_floor_mode_switch_all_total {}",
if me_allows_normal {
stats.get_me_floor_mode_switch_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_floor_mode_switch_total{{from=\"static\",to=\"adaptive\"}} {}",
if me_allows_normal {
stats.get_me_floor_mode_switch_static_to_adaptive_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_floor_mode_switch_total{{from=\"adaptive\",to=\"static\"}} {}",
if me_allows_normal {
stats.get_me_floor_mode_switch_adaptive_to_static_total()
} else {
0
}
);
let _ = writeln!(out, "# HELP telemt_secure_padding_invalid_total Invalid secure frame lengths");
let _ = writeln!(out, "# TYPE telemt_secure_padding_invalid_total counter");
let _ = writeln!(
@@ -477,7 +1061,7 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
let _ = writeln!(
out,
"telemt_pool_swap_total {}",
if me_allows_debug {
if me_allows_normal {
stats.get_pool_swap_total()
} else {
0
@@ -615,6 +1199,48 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_no_writer_failfast_total ME route failfast errors due to missing writer in bounded wait window"
);
let _ = writeln!(out, "# TYPE telemt_me_no_writer_failfast_total counter");
let _ = writeln!(
out,
"telemt_me_no_writer_failfast_total {}",
if me_allows_normal {
stats.get_me_no_writer_failfast_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_async_recovery_trigger_total Async ME recovery trigger attempts from route path"
);
let _ = writeln!(out, "# TYPE telemt_me_async_recovery_trigger_total counter");
let _ = writeln!(
out,
"telemt_me_async_recovery_trigger_total {}",
if me_allows_normal {
stats.get_me_async_recovery_trigger_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_inline_recovery_total Legacy inline ME recovery attempts from route path"
);
let _ = writeln!(out, "# TYPE telemt_me_inline_recovery_total counter");
let _ = writeln!(
out,
"telemt_me_inline_recovery_total {}",
if me_allows_normal {
stats.get_me_inline_recovery_total()
} else {
0
}
);
let unresolved_writer_losses = if me_allows_normal {
stats
@@ -653,6 +1279,29 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
let _ = writeln!(out, "# TYPE telemt_user_msgs_from_client counter");
let _ = writeln!(out, "# HELP telemt_user_msgs_to_client Per-user messages sent");
let _ = writeln!(out, "# TYPE telemt_user_msgs_to_client counter");
let _ = writeln!(
out,
"# HELP telemt_ip_reservation_rollback_total IP reservation rollbacks caused by later limit checks"
);
let _ = writeln!(out, "# TYPE telemt_ip_reservation_rollback_total counter");
let _ = writeln!(
out,
"telemt_ip_reservation_rollback_total{{reason=\"tcp_limit\"}} {}",
if core_enabled {
stats.get_ip_reservation_rollback_tcp_limit_total()
} else {
0
}
);
let _ = writeln!(
out,
"telemt_ip_reservation_rollback_total{{reason=\"quota_limit\"}} {}",
if core_enabled {
stats.get_ip_reservation_rollback_quota_limit_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_telemetry_user_series_suppressed User-labeled metric series suppression flag"
@@ -683,11 +1332,21 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
.collect();
let mut unique_users = BTreeSet::new();
unique_users.extend(config.access.users.keys().cloned());
unique_users.extend(config.access.user_max_unique_ips.keys().cloned());
unique_users.extend(ip_counts.keys().cloned());
let unique_users_vec: Vec<String> = unique_users.iter().cloned().collect();
let recent_counts = ip_tracker
.get_recent_counts_for_users(&unique_users_vec)
.await;
let _ = writeln!(out, "# HELP telemt_user_unique_ips_current Per-user current number of unique active IPs");
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_current gauge");
let _ = writeln!(
out,
"# HELP telemt_user_unique_ips_recent_window Per-user unique IPs seen in configured observation window"
);
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_recent_window gauge");
let _ = writeln!(out, "# HELP telemt_user_unique_ips_limit Per-user configured unique IP limit (0 means unlimited)");
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_limit gauge");
let _ = writeln!(out, "# HELP telemt_user_unique_ips_utilization Per-user unique IP usage ratio (0 for unlimited)");
@@ -702,6 +1361,12 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0.0
};
let _ = writeln!(out, "telemt_user_unique_ips_current{{user=\"{}\"}} {}", user, current);
let _ = writeln!(
out,
"telemt_user_unique_ips_recent_window{{user=\"{}\"}} {}",
user,
recent_counts.get(&user).copied().unwrap_or(0)
);
let _ = writeln!(out, "telemt_user_unique_ips_limit{{user=\"{}\"}} {}", user, limit);
let _ = writeln!(
out,
@@ -735,6 +1400,20 @@ mod tests {
stats.increment_connects_all();
stats.increment_connects_bad();
stats.increment_handshake_timeouts();
stats.increment_upstream_connect_attempt_total();
stats.increment_upstream_connect_attempt_total();
stats.increment_upstream_connect_success_total();
stats.increment_upstream_connect_fail_total();
stats.increment_upstream_connect_failfast_hard_error_total();
stats.observe_upstream_connect_attempts_per_request(2);
stats.observe_upstream_connect_duration_ms(220, true);
stats.observe_upstream_connect_duration_ms(1500, false);
stats.increment_me_rpc_proxy_req_signal_sent_total();
stats.increment_me_rpc_proxy_req_signal_failed_total();
stats.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
stats.increment_me_rpc_proxy_req_signal_response_total();
stats.increment_me_rpc_proxy_req_signal_close_sent_total();
stats.increment_me_idle_close_by_peer_total();
stats.increment_user_connects("alice");
stats.increment_user_curr_connects("alice");
stats.add_user_octets_from("alice", 1024);
@@ -752,6 +1431,27 @@ mod tests {
assert!(output.contains("telemt_connections_total 2"));
assert!(output.contains("telemt_connections_bad_total 1"));
assert!(output.contains("telemt_handshake_timeouts_total 1"));
assert!(output.contains("telemt_upstream_connect_attempt_total 2"));
assert!(output.contains("telemt_upstream_connect_success_total 1"));
assert!(output.contains("telemt_upstream_connect_fail_total 1"));
assert!(output.contains("telemt_upstream_connect_failfast_hard_error_total 1"));
assert!(
output.contains("telemt_upstream_connect_attempts_per_request{bucket=\"2\"} 1")
);
assert!(
output.contains(
"telemt_upstream_connect_duration_success_total{bucket=\"101_500ms\"} 1"
)
);
assert!(
output.contains("telemt_upstream_connect_duration_fail_total{bucket=\"gt_1000ms\"} 1")
);
assert!(output.contains("telemt_me_rpc_proxy_req_signal_sent_total 1"));
assert!(output.contains("telemt_me_rpc_proxy_req_signal_failed_total 1"));
assert!(output.contains("telemt_me_rpc_proxy_req_signal_skipped_no_meta_total 1"));
assert!(output.contains("telemt_me_rpc_proxy_req_signal_response_total 1"));
assert!(output.contains("telemt_me_rpc_proxy_req_signal_close_sent_total 1"));
assert!(output.contains("telemt_me_idle_close_by_peer_total 1"));
assert!(output.contains("telemt_user_connections_total{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_connections_current{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_octets_from_client{user=\"alice\"} 1024"));
@@ -759,6 +1459,7 @@ mod tests {
assert!(output.contains("telemt_user_msgs_from_client{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_msgs_to_client{user=\"alice\"} 2"));
assert!(output.contains("telemt_user_unique_ips_current{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_unique_ips_recent_window{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_unique_ips_limit{user=\"alice\"} 4"));
assert!(output.contains("telemt_user_unique_ips_utilization{user=\"alice\"} 0.250000"));
}
@@ -772,7 +1473,8 @@ mod tests {
assert!(output.contains("telemt_connections_total 0"));
assert!(output.contains("telemt_connections_bad_total 0"));
assert!(output.contains("telemt_handshake_timeouts_total 0"));
assert!(!output.contains("user="));
assert!(output.contains("telemt_user_unique_ips_current{user="));
assert!(output.contains("telemt_user_unique_ips_recent_window{user="));
}
#[tokio::test]
@@ -785,11 +1487,15 @@ mod tests {
assert!(output.contains("# TYPE telemt_connections_total counter"));
assert!(output.contains("# TYPE telemt_connections_bad_total counter"));
assert!(output.contains("# TYPE telemt_handshake_timeouts_total counter"));
assert!(output.contains("# TYPE telemt_upstream_connect_attempt_total counter"));
assert!(output.contains("# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter"));
assert!(output.contains("# TYPE telemt_me_idle_close_by_peer_total counter"));
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
assert!(output.contains(
"# TYPE telemt_me_writer_removed_unexpected_minus_restored_total gauge"
));
assert!(output.contains("# TYPE telemt_user_unique_ips_current gauge"));
assert!(output.contains("# TYPE telemt_user_unique_ips_recent_window gauge"));
assert!(output.contains("# TYPE telemt_user_unique_ips_limit gauge"));
assert!(output.contains("# TYPE telemt_user_unique_ips_utilization gauge"));
}

View File

@@ -91,6 +91,11 @@ where
stats.increment_connects_all();
let mut real_peer = normalize_ip(peer);
// For non-TCP streams, use a synthetic local address; may be overridden by PROXY protocol dst
let mut local_addr: SocketAddr = format!("0.0.0.0:{}", config.server.port)
.parse()
.unwrap_or_else(|_| "0.0.0.0:443".parse().unwrap());
if proxy_protocol_enabled {
match parse_proxy_protocol(&mut stream, peer).await {
Ok(info) => {
@@ -101,6 +106,9 @@ where
"PROXY protocol header parsed"
);
real_peer = normalize_ip(info.src_addr);
if let Some(dst) = info.dst_addr {
local_addr = dst;
}
}
Err(e) => {
stats.increment_connects_bad();
@@ -119,11 +127,6 @@ where
let beobachten_for_timeout = beobachten.clone();
let peer_for_timeout = real_peer.ip();
// For non-TCP streams, use a synthetic local address
let local_addr: SocketAddr = format!("0.0.0.0:{}", config.server.port)
.parse()
.unwrap_or_else(|_| "0.0.0.0:443".parse().unwrap());
// Phase 1: handshake (with timeout)
let outcome = match timeout(handshake_timeout, async {
let mut first_bytes = [0u8; 5];
@@ -144,6 +147,7 @@ where
writer,
&first_bytes,
real_peer,
local_addr,
&config,
&beobachten,
)
@@ -169,6 +173,7 @@ where
writer,
&handshake,
real_peer,
local_addr,
&config,
&beobachten,
)
@@ -213,6 +218,7 @@ where
writer,
&first_bytes,
real_peer,
local_addr,
&config,
&beobachten,
)
@@ -238,6 +244,7 @@ where
writer,
&handshake,
real_peer,
local_addr,
&config,
&beobachten,
)
@@ -405,6 +412,8 @@ impl RunningClientHandler {
}
async fn do_handshake(mut self) -> Result<HandshakeOutcome> {
let mut local_addr = self.stream.local_addr().map_err(ProxyError::Io)?;
if self.proxy_protocol_enabled {
match parse_proxy_protocol(&mut self.stream, self.peer).await {
Ok(info) => {
@@ -415,6 +424,9 @@ impl RunningClientHandler {
"PROXY protocol header parsed"
);
self.peer = normalize_ip(info.src_addr);
if let Some(dst) = info.dst_addr {
local_addr = dst;
}
}
Err(e) => {
self.stats.increment_connects_bad();
@@ -440,13 +452,13 @@ impl RunningClientHandler {
debug!(peer = %peer, is_tls = is_tls, "Handshake type detected");
if is_tls {
self.handle_tls_client(first_bytes).await
self.handle_tls_client(first_bytes, local_addr).await
} else {
self.handle_direct_client(first_bytes).await
self.handle_direct_client(first_bytes, local_addr).await
}
}
async fn handle_tls_client(mut self, first_bytes: [u8; 5]) -> Result<HandshakeOutcome> {
async fn handle_tls_client(mut self, first_bytes: [u8; 5], local_addr: SocketAddr) -> Result<HandshakeOutcome> {
let peer = self.peer;
let _ip_tracker = self.ip_tracker.clone();
@@ -463,6 +475,7 @@ impl RunningClientHandler {
writer,
&first_bytes,
peer,
local_addr,
&self.config,
&self.beobachten,
)
@@ -479,7 +492,6 @@ impl RunningClientHandler {
let stats = self.stats.clone();
let buffer_pool = self.buffer_pool.clone();
let local_addr = self.stream.local_addr().map_err(ProxyError::Io)?;
let (read_half, write_half) = self.stream.into_split();
let (mut tls_reader, tls_writer, _tls_user) = match handle_tls_handshake(
@@ -502,6 +514,7 @@ impl RunningClientHandler {
writer,
&handshake,
peer,
local_addr,
&config,
&self.beobachten,
)
@@ -558,7 +571,7 @@ impl RunningClientHandler {
)))
}
async fn handle_direct_client(mut self, first_bytes: [u8; 5]) -> Result<HandshakeOutcome> {
async fn handle_direct_client(mut self, first_bytes: [u8; 5], local_addr: SocketAddr) -> Result<HandshakeOutcome> {
let peer = self.peer;
let _ip_tracker = self.ip_tracker.clone();
@@ -571,6 +584,7 @@ impl RunningClientHandler {
writer,
&first_bytes,
peer,
local_addr,
&self.config,
&self.beobachten,
)
@@ -587,7 +601,6 @@ impl RunningClientHandler {
let stats = self.stats.clone();
let buffer_pool = self.buffer_pool.clone();
let local_addr = self.stream.local_addr().map_err(ProxyError::Io)?;
let (read_half, write_half) = self.stream.into_split();
let (crypto_reader, crypto_writer, success) = match handle_mtproto_handshake(
@@ -609,6 +622,7 @@ impl RunningClientHandler {
writer,
&handshake,
peer,
local_addr,
&config,
&self.beobachten,
)
@@ -658,42 +672,16 @@ impl RunningClientHandler {
R: AsyncRead + Unpin + Send + 'static,
W: AsyncWrite + Unpin + Send + 'static,
{
let user = &success.user;
let user = success.user.clone();
if let Err(e) = Self::check_user_limits_static(user, &config, &stats, peer_addr, &ip_tracker).await {
if let Err(e) = Self::check_user_limits_static(&user, &config, &stats, peer_addr, &ip_tracker).await {
warn!(user = %user, error = %e, "User limit exceeded");
return Err(e);
}
// IP Cleanup Guard: автоматически удаляет IP при выходе из scope
struct IpCleanupGuard {
tracker: Arc<UserIpTracker>,
user: String,
ip: std::net::IpAddr,
}
impl Drop for IpCleanupGuard {
fn drop(&mut self) {
let tracker = self.tracker.clone();
let user = self.user.clone();
let ip = self.ip;
tokio::spawn(async move {
tracker.remove_ip(&user, ip).await;
debug!(user = %user, ip = %ip, "IP cleaned up on disconnect");
});
}
}
let _cleanup = IpCleanupGuard {
tracker: ip_tracker,
user: user.clone(),
ip: peer_addr.ip(),
};
// Decide: middle proxy or direct
if config.general.use_middle_proxy {
let relay_result = if config.general.use_middle_proxy {
if let Some(ref pool) = me_pool {
return handle_via_middle_proxy(
handle_via_middle_proxy(
client_reader,
client_writer,
success,
@@ -704,23 +692,38 @@ impl RunningClientHandler {
local_addr,
rng,
)
.await;
.await
} else {
warn!("use_middle_proxy=true but MePool not initialized, falling back to direct");
handle_via_direct(
client_reader,
client_writer,
success,
upstream_manager,
stats,
config,
buffer_pool,
rng,
)
.await
}
warn!("use_middle_proxy=true but MePool not initialized, falling back to direct");
}
} else {
// Direct mode (original behavior)
handle_via_direct(
client_reader,
client_writer,
success,
upstream_manager,
stats,
config,
buffer_pool,
rng,
)
.await
};
// Direct mode (original behavior)
handle_via_direct(
client_reader,
client_writer,
success,
upstream_manager,
stats,
config,
buffer_pool,
rng,
)
.await
ip_tracker.remove_ip(&user, peer_addr.ip()).await;
relay_result
}
async fn check_user_limits_static(
@@ -738,22 +741,32 @@ impl RunningClientHandler {
});
}
let mut ip_reserved = false;
// IP limit check
if let Err(reason) = ip_tracker.check_and_add(user, peer_addr.ip()).await {
warn!(
user = %user,
ip = %peer_addr.ip(),
reason = %reason,
"IP limit exceeded"
);
return Err(ProxyError::ConnectionLimitExceeded {
user: user.to_string(),
});
match ip_tracker.check_and_add(user, peer_addr.ip()).await {
Ok(()) => {
ip_reserved = true;
}
Err(reason) => {
warn!(
user = %user,
ip = %peer_addr.ip(),
reason = %reason,
"IP limit exceeded"
);
return Err(ProxyError::ConnectionLimitExceeded {
user: user.to_string(),
});
}
}
if let Some(limit) = config.access.user_max_tcp_conns.get(user)
&& stats.get_user_curr_connects(user) >= *limit as u64
{
if ip_reserved {
ip_tracker.remove_ip(user, peer_addr.ip()).await;
stats.increment_ip_reservation_rollback_tcp_limit_total();
}
return Err(ProxyError::ConnectionLimitExceeded {
user: user.to_string(),
});
@@ -762,6 +775,10 @@ impl RunningClientHandler {
if let Some(quota) = config.access.user_data_quota.get(user)
&& stats.get_user_total_octets(user) >= *quota
{
if ip_reserved {
ip_tracker.remove_ip(user, peer_addr.ip()).await;
stats.increment_ip_reservation_rollback_quota_limit_total();
}
return Err(ProxyError::DataQuotaExceeded {
user: user.to_string(),
});

View File

@@ -118,10 +118,16 @@ fn get_dc_addr_static(dc_idx: i16, config: &ProxyConfig) -> Result<SocketAddr> {
// Unknown DC requested by client without override: log and fall back.
if !config.dc_overrides.contains_key(&dc_key) {
warn!(dc_idx = dc_idx, "Requested non-standard DC with no override; falling back to default cluster");
if let Some(path) = &config.general.unknown_dc_log_path
&& let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path)
if config.general.unknown_dc_file_log_enabled
&& let Some(path) = &config.general.unknown_dc_log_path
&& let Ok(handle) = tokio::runtime::Handle::try_current()
{
let _ = writeln!(file, "dc_idx={dc_idx}");
let path = path.clone();
handle.spawn_blocking(move || {
if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) {
let _ = writeln!(file, "dc_idx={dc_idx}");
}
});
}
}

View File

@@ -55,6 +55,7 @@ pub async fn handle_bad_client<R, W>(
writer: W,
initial_data: &[u8],
peer: SocketAddr,
local_addr: SocketAddr,
config: &ProxyConfig,
beobachten: &BeobachtenStore,
)
@@ -87,7 +88,29 @@ where
let connect_result = timeout(MASK_TIMEOUT, UnixStream::connect(sock_path)).await;
match connect_result {
Ok(Ok(stream)) => {
let (mask_read, mask_write) = stream.into_split();
let (mask_read, mut mask_write) = stream.into_split();
let proxy_header: Option<Vec<u8>> = match config.censorship.mask_proxy_protocol {
0 => None,
version => {
let header = match version {
2 => ProxyProtocolV2Builder::new().with_addrs(peer, local_addr).build(),
_ => match (peer, local_addr) {
(SocketAddr::V4(src), SocketAddr::V4(dst)) =>
ProxyProtocolV1Builder::new().tcp4(src.into(), dst.into()).build(),
(SocketAddr::V6(src), SocketAddr::V6(dst)) =>
ProxyProtocolV1Builder::new().tcp6(src.into(), dst.into()).build(),
_ =>
ProxyProtocolV1Builder::new().build(),
},
};
Some(header)
}
};
if let Some(header) = proxy_header {
if mask_write.write_all(&header).await.is_err() {
return;
}
}
if timeout(MASK_RELAY_TIMEOUT, relay_to_mask(reader, writer, mask_read, mask_write, initial_data)).await.is_err() {
debug!("Mask relay timed out (unix socket)");
}
@@ -126,23 +149,16 @@ where
let proxy_header: Option<Vec<u8>> = match config.censorship.mask_proxy_protocol {
0 => None,
version => {
let header = if let Ok(local_addr) = stream.local_addr() {
match version {
2 => ProxyProtocolV2Builder::new().with_addrs(peer, local_addr).build(),
_ => match (peer, local_addr) {
(SocketAddr::V4(src), SocketAddr::V4(dst)) =>
ProxyProtocolV1Builder::new().tcp4(src.into(), dst.into()).build(),
(SocketAddr::V6(src), SocketAddr::V6(dst)) =>
ProxyProtocolV1Builder::new().tcp6(src.into(), dst.into()).build(),
_ =>
ProxyProtocolV1Builder::new().build(),
},
}
} else {
match version {
2 => ProxyProtocolV2Builder::new().build(),
_ => ProxyProtocolV1Builder::new().build(),
}
let header = match version {
2 => ProxyProtocolV2Builder::new().with_addrs(peer, local_addr).build(),
_ => match (peer, local_addr) {
(SocketAddr::V4(src), SocketAddr::V4(dst)) =>
ProxyProtocolV1Builder::new().tcp4(src.into(), dst.into()).build(),
(SocketAddr::V6(src), SocketAddr::V6(dst)) =>
ProxyProtocolV1Builder::new().tcp6(src.into(), dst.into()).build(),
_ =>
ProxyProtocolV1Builder::new().build(),
},
};
Some(header)
}

View File

@@ -26,6 +26,9 @@ enum C2MeCommand {
const DESYNC_DEDUP_WINDOW: Duration = Duration::from_secs(60);
const DESYNC_ERROR_CLASS: &str = "frame_too_large_crypto_desync";
const C2ME_CHANNEL_CAPACITY: usize = 1024;
const C2ME_SOFT_PRESSURE_MIN_FREE_SLOTS: usize = 64;
const C2ME_SENDER_FAIRNESS_BUDGET: usize = 32;
static DESYNC_DEDUP: OnceLock<Mutex<HashMap<u64, Instant>>> = OnceLock::new();
struct RelayForensicsState {
@@ -166,6 +169,27 @@ fn report_desync_frame_too_large(
))
}
fn should_yield_c2me_sender(sent_since_yield: usize, has_backlog: bool) -> bool {
has_backlog && sent_since_yield >= C2ME_SENDER_FAIRNESS_BUDGET
}
async fn enqueue_c2me_command(
tx: &mpsc::Sender<C2MeCommand>,
cmd: C2MeCommand,
) -> std::result::Result<(), mpsc::error::SendError<C2MeCommand>> {
match tx.try_send(cmd) {
Ok(()) => Ok(()),
Err(mpsc::error::TrySendError::Closed(cmd)) => Err(mpsc::error::SendError(cmd)),
Err(mpsc::error::TrySendError::Full(cmd)) => {
// Cooperative yield reduces burst catch-up when the per-conn queue is near saturation.
if tx.capacity() <= C2ME_SOFT_PRESSURE_MIN_FREE_SLOTS {
tokio::task::yield_now().await;
}
tx.send(cmd).await
}
}
}
pub(crate) async fn handle_via_middle_proxy<R, W>(
mut crypto_reader: CryptoReader<R>,
crypto_writer: CryptoWriter<W>,
@@ -214,7 +238,22 @@ where
stats.increment_user_connects(&user);
stats.increment_user_curr_connects(&user);
let proto_flags = proto_flags_for_tag(proto_tag, me_pool.has_proxy_tag());
// Per-user ad_tag from access.user_ad_tags; fallback to general.ad_tag (hot-reloadable)
let user_tag: Option<Vec<u8>> = config
.access
.user_ad_tags
.get(&user)
.and_then(|s| hex::decode(s).ok())
.filter(|v| v.len() == 16);
let global_tag: Option<Vec<u8>> = config
.general
.ad_tag
.as_ref()
.and_then(|s| hex::decode(s).ok())
.filter(|v| v.len() == 16);
let effective_tag = user_tag.or(global_tag);
let proto_flags = proto_flags_for_tag(proto_tag, effective_tag.is_some());
debug!(
trace_id = format_args!("0x{:016x}", trace_id),
user = %user,
@@ -230,9 +269,11 @@ where
let frame_limit = config.general.max_client_frame;
let (c2me_tx, mut c2me_rx) = mpsc::channel::<C2MeCommand>(1024);
let (c2me_tx, mut c2me_rx) = mpsc::channel::<C2MeCommand>(C2ME_CHANNEL_CAPACITY);
let me_pool_c2me = me_pool.clone();
let effective_tag = effective_tag;
let c2me_sender = tokio::spawn(async move {
let mut sent_since_yield = 0usize;
while let Some(cmd) = c2me_rx.recv().await {
match cmd {
C2MeCommand::Data { payload, flags } => {
@@ -243,7 +284,13 @@ where
translated_local_addr,
&payload,
flags,
effective_tag.as_deref(),
).await?;
sent_since_yield = sent_since_yield.saturating_add(1);
if should_yield_c2me_sender(sent_since_yield, !c2me_rx.is_empty()) {
sent_since_yield = 0;
tokio::task::yield_now().await;
}
}
C2MeCommand::Close => {
let _ = me_pool_c2me.send_close(conn_id).await;
@@ -360,8 +407,7 @@ where
flags |= RPC_FLAG_NOT_ENCRYPTED;
}
// Keep client read loop lightweight: route heavy ME send path via a dedicated task.
if c2me_tx
.send(C2MeCommand::Data { payload, flags })
if enqueue_c2me_command(&c2me_tx, C2MeCommand::Data { payload, flags })
.await
.is_err()
{
@@ -372,7 +418,7 @@ where
Ok(None) => {
debug!(conn_id, "Client EOF");
client_closed = true;
let _ = c2me_tx.send(C2MeCommand::Close).await;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
break;
}
Err(e) => {
@@ -647,3 +693,84 @@ where
// ACK should remain low-latency.
client_writer.flush().await.map_err(ProxyError::Io)
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::time::{Duration as TokioDuration, timeout};
#[test]
fn should_yield_sender_only_on_budget_with_backlog() {
assert!(!should_yield_c2me_sender(0, true));
assert!(!should_yield_c2me_sender(C2ME_SENDER_FAIRNESS_BUDGET - 1, true));
assert!(!should_yield_c2me_sender(C2ME_SENDER_FAIRNESS_BUDGET, false));
assert!(should_yield_c2me_sender(C2ME_SENDER_FAIRNESS_BUDGET, true));
}
#[tokio::test]
async fn enqueue_c2me_command_uses_try_send_fast_path() {
let (tx, mut rx) = mpsc::channel::<C2MeCommand>(2);
enqueue_c2me_command(
&tx,
C2MeCommand::Data {
payload: vec![1, 2, 3],
flags: 0,
},
)
.await
.unwrap();
let recv = timeout(TokioDuration::from_millis(50), rx.recv())
.await
.unwrap()
.unwrap();
match recv {
C2MeCommand::Data { payload, flags } => {
assert_eq!(payload, vec![1, 2, 3]);
assert_eq!(flags, 0);
}
C2MeCommand::Close => panic!("unexpected close command"),
}
}
#[tokio::test]
async fn enqueue_c2me_command_falls_back_to_send_when_queue_is_full() {
let (tx, mut rx) = mpsc::channel::<C2MeCommand>(1);
tx.send(C2MeCommand::Data {
payload: vec![9],
flags: 9,
})
.await
.unwrap();
let tx2 = tx.clone();
let producer = tokio::spawn(async move {
enqueue_c2me_command(
&tx2,
C2MeCommand::Data {
payload: vec![7, 7],
flags: 7,
},
)
.await
.unwrap();
});
let _ = timeout(TokioDuration::from_millis(100), rx.recv())
.await
.unwrap();
producer.await.unwrap();
let recv = timeout(TokioDuration::from_millis(100), rx.recv())
.await
.unwrap()
.unwrap();
match recv {
C2MeCommand::Data { payload, flags } => {
assert_eq!(payload, vec![7, 7]);
assert_eq!(flags, 7);
}
C2MeCommand::Close => panic!("unexpected close command"),
}
}
}

View File

@@ -26,14 +26,54 @@ pub struct Stats {
connects_all: AtomicU64,
connects_bad: AtomicU64,
handshake_timeouts: AtomicU64,
upstream_connect_attempt_total: AtomicU64,
upstream_connect_success_total: AtomicU64,
upstream_connect_fail_total: AtomicU64,
upstream_connect_failfast_hard_error_total: AtomicU64,
upstream_connect_attempts_bucket_1: AtomicU64,
upstream_connect_attempts_bucket_2: AtomicU64,
upstream_connect_attempts_bucket_3_4: AtomicU64,
upstream_connect_attempts_bucket_gt_4: AtomicU64,
upstream_connect_duration_success_bucket_le_100ms: AtomicU64,
upstream_connect_duration_success_bucket_101_500ms: AtomicU64,
upstream_connect_duration_success_bucket_501_1000ms: AtomicU64,
upstream_connect_duration_success_bucket_gt_1000ms: AtomicU64,
upstream_connect_duration_fail_bucket_le_100ms: AtomicU64,
upstream_connect_duration_fail_bucket_101_500ms: AtomicU64,
upstream_connect_duration_fail_bucket_501_1000ms: AtomicU64,
upstream_connect_duration_fail_bucket_gt_1000ms: AtomicU64,
me_keepalive_sent: AtomicU64,
me_keepalive_failed: AtomicU64,
me_keepalive_pong: AtomicU64,
me_keepalive_timeout: AtomicU64,
me_rpc_proxy_req_signal_sent_total: AtomicU64,
me_rpc_proxy_req_signal_failed_total: AtomicU64,
me_rpc_proxy_req_signal_skipped_no_meta_total: AtomicU64,
me_rpc_proxy_req_signal_response_total: AtomicU64,
me_rpc_proxy_req_signal_close_sent_total: AtomicU64,
me_reconnect_attempts: AtomicU64,
me_reconnect_success: AtomicU64,
me_handshake_reject_total: AtomicU64,
me_reader_eof_total: AtomicU64,
me_idle_close_by_peer_total: AtomicU64,
me_crc_mismatch: AtomicU64,
me_seq_mismatch: AtomicU64,
me_endpoint_quarantine_total: AtomicU64,
me_kdf_drift_total: AtomicU64,
me_kdf_port_only_drift_total: AtomicU64,
me_hardswap_pending_reuse_total: AtomicU64,
me_hardswap_pending_ttl_expired_total: AtomicU64,
me_single_endpoint_outage_enter_total: AtomicU64,
me_single_endpoint_outage_exit_total: AtomicU64,
me_single_endpoint_outage_reconnect_attempt_total: AtomicU64,
me_single_endpoint_outage_reconnect_success_total: AtomicU64,
me_single_endpoint_quarantine_bypass_total: AtomicU64,
me_single_endpoint_shadow_rotate_total: AtomicU64,
me_single_endpoint_shadow_rotate_skipped_quarantine_total: AtomicU64,
me_floor_mode_switch_total: AtomicU64,
me_floor_mode_switch_static_to_adaptive_total: AtomicU64,
me_floor_mode_switch_adaptive_to_static_total: AtomicU64,
me_handshake_error_codes: DashMap<i32, AtomicU64>,
me_route_drop_no_conn: AtomicU64,
me_route_drop_channel_closed: AtomicU64,
me_route_drop_queue_full: AtomicU64,
@@ -60,6 +100,11 @@ pub struct Stats {
me_refill_failed_total: AtomicU64,
me_writer_restored_same_endpoint_total: AtomicU64,
me_writer_restored_fallback_total: AtomicU64,
me_no_writer_failfast_total: AtomicU64,
me_async_recovery_trigger_total: AtomicU64,
me_inline_recovery_total: AtomicU64,
ip_reservation_rollback_tcp_limit_total: AtomicU64,
ip_reservation_rollback_quota_limit_total: AtomicU64,
telemetry_core_enabled: AtomicBool,
telemetry_user_enabled: AtomicBool,
telemetry_me_level: AtomicU8,
@@ -137,6 +182,99 @@ impl Stats {
self.handshake_timeouts.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_upstream_connect_attempt_total(&self) {
if self.telemetry_core_enabled() {
self.upstream_connect_attempt_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_upstream_connect_success_total(&self) {
if self.telemetry_core_enabled() {
self.upstream_connect_success_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_upstream_connect_fail_total(&self) {
if self.telemetry_core_enabled() {
self.upstream_connect_fail_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_upstream_connect_failfast_hard_error_total(&self) {
if self.telemetry_core_enabled() {
self.upstream_connect_failfast_hard_error_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn observe_upstream_connect_attempts_per_request(&self, attempts: u32) {
if !self.telemetry_core_enabled() {
return;
}
match attempts {
0 => {}
1 => {
self.upstream_connect_attempts_bucket_1
.fetch_add(1, Ordering::Relaxed);
}
2 => {
self.upstream_connect_attempts_bucket_2
.fetch_add(1, Ordering::Relaxed);
}
3..=4 => {
self.upstream_connect_attempts_bucket_3_4
.fetch_add(1, Ordering::Relaxed);
}
_ => {
self.upstream_connect_attempts_bucket_gt_4
.fetch_add(1, Ordering::Relaxed);
}
}
}
pub fn observe_upstream_connect_duration_ms(&self, duration_ms: u64, success: bool) {
if !self.telemetry_core_enabled() {
return;
}
let bucket = match duration_ms {
0..=100 => 0u8,
101..=500 => 1u8,
501..=1000 => 2u8,
_ => 3u8,
};
match (success, bucket) {
(true, 0) => {
self.upstream_connect_duration_success_bucket_le_100ms
.fetch_add(1, Ordering::Relaxed);
}
(true, 1) => {
self.upstream_connect_duration_success_bucket_101_500ms
.fetch_add(1, Ordering::Relaxed);
}
(true, 2) => {
self.upstream_connect_duration_success_bucket_501_1000ms
.fetch_add(1, Ordering::Relaxed);
}
(true, _) => {
self.upstream_connect_duration_success_bucket_gt_1000ms
.fetch_add(1, Ordering::Relaxed);
}
(false, 0) => {
self.upstream_connect_duration_fail_bucket_le_100ms
.fetch_add(1, Ordering::Relaxed);
}
(false, 1) => {
self.upstream_connect_duration_fail_bucket_101_500ms
.fetch_add(1, Ordering::Relaxed);
}
(false, 2) => {
self.upstream_connect_duration_fail_bucket_501_1000ms
.fetch_add(1, Ordering::Relaxed);
}
(false, _) => {
self.upstream_connect_duration_fail_bucket_gt_1000ms
.fetch_add(1, Ordering::Relaxed);
}
}
}
pub fn increment_me_keepalive_sent(&self) {
if self.telemetry_me_allows_debug() {
self.me_keepalive_sent.fetch_add(1, Ordering::Relaxed);
@@ -162,6 +300,36 @@ impl Stats {
self.me_keepalive_timeout.fetch_add(value, Ordering::Relaxed);
}
}
pub fn increment_me_rpc_proxy_req_signal_sent_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_rpc_proxy_req_signal_sent_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_rpc_proxy_req_signal_failed_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_rpc_proxy_req_signal_failed_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_rpc_proxy_req_signal_skipped_no_meta_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_rpc_proxy_req_signal_skipped_no_meta_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_rpc_proxy_req_signal_response_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_rpc_proxy_req_signal_response_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_rpc_proxy_req_signal_close_sent_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_rpc_proxy_req_signal_close_sent_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_reconnect_attempt(&self) {
if self.telemetry_me_allows_normal() {
self.me_reconnect_attempts.fetch_add(1, Ordering::Relaxed);
@@ -172,6 +340,32 @@ impl Stats {
self.me_reconnect_success.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_handshake_reject_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_handshake_reject_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_handshake_error_code(&self, code: i32) {
if !self.telemetry_me_allows_normal() {
return;
}
let entry = self
.me_handshake_error_codes
.entry(code)
.or_insert_with(|| AtomicU64::new(0));
entry.fetch_add(1, Ordering::Relaxed);
}
pub fn increment_me_reader_eof_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_reader_eof_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_idle_close_by_peer_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_idle_close_by_peer_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_crc_mismatch(&self) {
if self.telemetry_me_allows_normal() {
self.me_crc_mismatch.fetch_add(1, Ordering::Relaxed);
@@ -257,7 +451,7 @@ impl Stats {
}
}
pub fn increment_pool_swap_total(&self) {
if self.telemetry_me_allows_debug() {
if self.telemetry_me_allows_normal() {
self.pool_swap_total.fetch_add(1, Ordering::Relaxed);
}
}
@@ -333,16 +527,227 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_no_writer_failfast_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_no_writer_failfast_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_async_recovery_trigger_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_async_recovery_trigger_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_inline_recovery_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_inline_recovery_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_ip_reservation_rollback_tcp_limit_total(&self) {
if self.telemetry_core_enabled() {
self.ip_reservation_rollback_tcp_limit_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_ip_reservation_rollback_quota_limit_total(&self) {
if self.telemetry_core_enabled() {
self.ip_reservation_rollback_quota_limit_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_endpoint_quarantine_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_endpoint_quarantine_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_kdf_drift_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_kdf_drift_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_kdf_port_only_drift_total(&self) {
if self.telemetry_me_allows_debug() {
self.me_kdf_port_only_drift_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_hardswap_pending_reuse_total(&self) {
if self.telemetry_me_allows_debug() {
self.me_hardswap_pending_reuse_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_hardswap_pending_ttl_expired_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_hardswap_pending_ttl_expired_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_single_endpoint_outage_enter_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_single_endpoint_outage_enter_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_single_endpoint_outage_exit_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_single_endpoint_outage_exit_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_single_endpoint_outage_reconnect_attempt_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_single_endpoint_outage_reconnect_attempt_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_single_endpoint_outage_reconnect_success_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_single_endpoint_outage_reconnect_success_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_single_endpoint_quarantine_bypass_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_single_endpoint_quarantine_bypass_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_single_endpoint_shadow_rotate_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_single_endpoint_shadow_rotate_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_single_endpoint_shadow_rotate_skipped_quarantine_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_single_endpoint_shadow_rotate_skipped_quarantine_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_floor_mode_switch_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_floor_mode_switch_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_floor_mode_switch_static_to_adaptive_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_floor_mode_switch_static_to_adaptive_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_floor_mode_switch_adaptive_to_static_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_floor_mode_switch_adaptive_to_static_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn get_connects_all(&self) -> u64 { self.connects_all.load(Ordering::Relaxed) }
pub fn get_connects_bad(&self) -> u64 { self.connects_bad.load(Ordering::Relaxed) }
pub fn get_me_keepalive_sent(&self) -> u64 { self.me_keepalive_sent.load(Ordering::Relaxed) }
pub fn get_me_keepalive_failed(&self) -> u64 { self.me_keepalive_failed.load(Ordering::Relaxed) }
pub fn get_me_keepalive_pong(&self) -> u64 { self.me_keepalive_pong.load(Ordering::Relaxed) }
pub fn get_me_keepalive_timeout(&self) -> u64 { self.me_keepalive_timeout.load(Ordering::Relaxed) }
pub fn get_me_rpc_proxy_req_signal_sent_total(&self) -> u64 {
self.me_rpc_proxy_req_signal_sent_total
.load(Ordering::Relaxed)
}
pub fn get_me_rpc_proxy_req_signal_failed_total(&self) -> u64 {
self.me_rpc_proxy_req_signal_failed_total
.load(Ordering::Relaxed)
}
pub fn get_me_rpc_proxy_req_signal_skipped_no_meta_total(&self) -> u64 {
self.me_rpc_proxy_req_signal_skipped_no_meta_total
.load(Ordering::Relaxed)
}
pub fn get_me_rpc_proxy_req_signal_response_total(&self) -> u64 {
self.me_rpc_proxy_req_signal_response_total
.load(Ordering::Relaxed)
}
pub fn get_me_rpc_proxy_req_signal_close_sent_total(&self) -> u64 {
self.me_rpc_proxy_req_signal_close_sent_total
.load(Ordering::Relaxed)
}
pub fn get_me_reconnect_attempts(&self) -> u64 { self.me_reconnect_attempts.load(Ordering::Relaxed) }
pub fn get_me_reconnect_success(&self) -> u64 { self.me_reconnect_success.load(Ordering::Relaxed) }
pub fn get_me_handshake_reject_total(&self) -> u64 {
self.me_handshake_reject_total.load(Ordering::Relaxed)
}
pub fn get_me_reader_eof_total(&self) -> u64 {
self.me_reader_eof_total.load(Ordering::Relaxed)
}
pub fn get_me_idle_close_by_peer_total(&self) -> u64 {
self.me_idle_close_by_peer_total.load(Ordering::Relaxed)
}
pub fn get_me_crc_mismatch(&self) -> u64 { self.me_crc_mismatch.load(Ordering::Relaxed) }
pub fn get_me_seq_mismatch(&self) -> u64 { self.me_seq_mismatch.load(Ordering::Relaxed) }
pub fn get_me_endpoint_quarantine_total(&self) -> u64 {
self.me_endpoint_quarantine_total.load(Ordering::Relaxed)
}
pub fn get_me_kdf_drift_total(&self) -> u64 {
self.me_kdf_drift_total.load(Ordering::Relaxed)
}
pub fn get_me_kdf_port_only_drift_total(&self) -> u64 {
self.me_kdf_port_only_drift_total.load(Ordering::Relaxed)
}
pub fn get_me_hardswap_pending_reuse_total(&self) -> u64 {
self.me_hardswap_pending_reuse_total
.load(Ordering::Relaxed)
}
pub fn get_me_hardswap_pending_ttl_expired_total(&self) -> u64 {
self.me_hardswap_pending_ttl_expired_total
.load(Ordering::Relaxed)
}
pub fn get_me_single_endpoint_outage_enter_total(&self) -> u64 {
self.me_single_endpoint_outage_enter_total
.load(Ordering::Relaxed)
}
pub fn get_me_single_endpoint_outage_exit_total(&self) -> u64 {
self.me_single_endpoint_outage_exit_total
.load(Ordering::Relaxed)
}
pub fn get_me_single_endpoint_outage_reconnect_attempt_total(&self) -> u64 {
self.me_single_endpoint_outage_reconnect_attempt_total
.load(Ordering::Relaxed)
}
pub fn get_me_single_endpoint_outage_reconnect_success_total(&self) -> u64 {
self.me_single_endpoint_outage_reconnect_success_total
.load(Ordering::Relaxed)
}
pub fn get_me_single_endpoint_quarantine_bypass_total(&self) -> u64 {
self.me_single_endpoint_quarantine_bypass_total
.load(Ordering::Relaxed)
}
pub fn get_me_single_endpoint_shadow_rotate_total(&self) -> u64 {
self.me_single_endpoint_shadow_rotate_total
.load(Ordering::Relaxed)
}
pub fn get_me_single_endpoint_shadow_rotate_skipped_quarantine_total(&self) -> u64 {
self.me_single_endpoint_shadow_rotate_skipped_quarantine_total
.load(Ordering::Relaxed)
}
pub fn get_me_floor_mode_switch_total(&self) -> u64 {
self.me_floor_mode_switch_total.load(Ordering::Relaxed)
}
pub fn get_me_floor_mode_switch_static_to_adaptive_total(&self) -> u64 {
self.me_floor_mode_switch_static_to_adaptive_total
.load(Ordering::Relaxed)
}
pub fn get_me_floor_mode_switch_adaptive_to_static_total(&self) -> u64 {
self.me_floor_mode_switch_adaptive_to_static_total
.load(Ordering::Relaxed)
}
pub fn get_me_handshake_error_code_counts(&self) -> Vec<(i32, u64)> {
let mut out: Vec<(i32, u64)> = self
.me_handshake_error_codes
.iter()
.map(|entry| (*entry.key(), entry.value().load(Ordering::Relaxed)))
.collect();
out.sort_by_key(|(code, _)| *code);
out
}
pub fn get_me_route_drop_no_conn(&self) -> u64 { self.me_route_drop_no_conn.load(Ordering::Relaxed) }
pub fn get_me_route_drop_channel_closed(&self) -> u64 {
self.me_route_drop_channel_closed.load(Ordering::Relaxed)
@@ -419,6 +824,23 @@ impl Stats {
pub fn get_me_writer_restored_fallback_total(&self) -> u64 {
self.me_writer_restored_fallback_total.load(Ordering::Relaxed)
}
pub fn get_me_no_writer_failfast_total(&self) -> u64 {
self.me_no_writer_failfast_total.load(Ordering::Relaxed)
}
pub fn get_me_async_recovery_trigger_total(&self) -> u64 {
self.me_async_recovery_trigger_total.load(Ordering::Relaxed)
}
pub fn get_me_inline_recovery_total(&self) -> u64 {
self.me_inline_recovery_total.load(Ordering::Relaxed)
}
pub fn get_ip_reservation_rollback_tcp_limit_total(&self) -> u64 {
self.ip_reservation_rollback_tcp_limit_total
.load(Ordering::Relaxed)
}
pub fn get_ip_reservation_rollback_quota_limit_total(&self) -> u64 {
self.ip_reservation_rollback_quota_limit_total
.load(Ordering::Relaxed)
}
pub fn increment_user_connects(&self, user: &str) {
if !self.telemetry_user_enabled() {
@@ -505,6 +927,65 @@ impl Stats {
}
pub fn get_handshake_timeouts(&self) -> u64 { self.handshake_timeouts.load(Ordering::Relaxed) }
pub fn get_upstream_connect_attempt_total(&self) -> u64 {
self.upstream_connect_attempt_total.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_success_total(&self) -> u64 {
self.upstream_connect_success_total.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_fail_total(&self) -> u64 {
self.upstream_connect_fail_total.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_failfast_hard_error_total(&self) -> u64 {
self.upstream_connect_failfast_hard_error_total
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_attempts_bucket_1(&self) -> u64 {
self.upstream_connect_attempts_bucket_1.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_attempts_bucket_2(&self) -> u64 {
self.upstream_connect_attempts_bucket_2.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_attempts_bucket_3_4(&self) -> u64 {
self.upstream_connect_attempts_bucket_3_4
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_attempts_bucket_gt_4(&self) -> u64 {
self.upstream_connect_attempts_bucket_gt_4
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_duration_success_bucket_le_100ms(&self) -> u64 {
self.upstream_connect_duration_success_bucket_le_100ms
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_duration_success_bucket_101_500ms(&self) -> u64 {
self.upstream_connect_duration_success_bucket_101_500ms
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_duration_success_bucket_501_1000ms(&self) -> u64 {
self.upstream_connect_duration_success_bucket_501_1000ms
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_duration_success_bucket_gt_1000ms(&self) -> u64 {
self.upstream_connect_duration_success_bucket_gt_1000ms
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_duration_fail_bucket_le_100ms(&self) -> u64 {
self.upstream_connect_duration_fail_bucket_le_100ms
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_duration_fail_bucket_101_500ms(&self) -> u64 {
self.upstream_connect_duration_fail_bucket_101_500ms
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_duration_fail_bucket_501_1000ms(&self) -> u64 {
self.upstream_connect_duration_fail_bucket_501_1000ms
.load(Ordering::Relaxed)
}
pub fn get_upstream_connect_duration_fail_bucket_gt_1000ms(&self) -> u64 {
self.upstream_connect_duration_fail_bucket_gt_1000ms
.load(Ordering::Relaxed)
}
pub fn iter_user_stats(&self) -> dashmap::iter::Iter<'_, String, UserStats> {
self.user_stats.iter()

View File

@@ -336,22 +336,35 @@ impl PendingCiphertext {
}
fn remaining_capacity(&self) -> usize {
self.max_len.saturating_sub(self.buf.len())
self.max_len.saturating_sub(self.pending_len())
}
fn compact_consumed_prefix(&mut self) {
if self.pos == 0 {
return;
}
if self.pos >= self.buf.len() {
self.buf.clear();
self.pos = 0;
return;
}
let _ = self.buf.split_to(self.pos);
self.pos = 0;
}
fn advance(&mut self, n: usize) {
self.pos = (self.pos + n).min(self.buf.len());
if self.pos == self.buf.len() {
self.buf.clear();
self.pos = 0;
self.compact_consumed_prefix();
return;
}
// Compact when a large prefix was consumed.
if self.pos >= 16 * 1024 {
let _ = self.buf.split_to(self.pos);
self.pos = 0;
self.compact_consumed_prefix();
}
}
@@ -379,6 +392,11 @@ impl PendingCiphertext {
));
}
// Reclaim consumed prefix when physical storage is the only limiter.
if self.pos > 0 && self.buf.len() + plaintext.len() > self.max_len {
self.compact_consumed_prefix();
}
let start = self.buf.len();
self.buf.reserve(plaintext.len());
self.buf.extend_from_slice(plaintext);
@@ -777,3 +795,70 @@ impl<S: AsyncWrite + Unpin> AsyncWrite for PassthroughStream<S> {
Pin::new(&mut self.inner).poll_shutdown(cx)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_ctr() -> AesCtr {
AesCtr::new(&[0x11; 32], 0x0102_0304_0506_0708_1112_1314_1516_1718)
}
#[test]
fn pending_capacity_reclaims_after_partial_advance_without_compaction_threshold() {
let mut pending = PendingCiphertext::new(1024);
let mut ctr = test_ctr();
let payload = vec![0x41; 900];
pending.push_encrypted(&mut ctr, &payload).unwrap();
// Keep position below compaction threshold to validate logical-capacity accounting.
pending.advance(800);
assert_eq!(pending.pending_len(), 100);
assert_eq!(pending.remaining_capacity(), 924);
}
#[test]
fn push_encrypted_respects_pending_limit() {
let mut pending = PendingCiphertext::new(64);
let mut ctr = test_ctr();
pending.push_encrypted(&mut ctr, &[0x10; 64]).unwrap();
let err = pending.push_encrypted(&mut ctr, &[0x20]).unwrap_err();
assert_eq!(err.kind(), ErrorKind::WouldBlock);
}
#[test]
fn push_encrypted_compacts_prefix_when_physical_buffer_would_overflow() {
let mut pending = PendingCiphertext::new(64);
let mut ctr = test_ctr();
pending.push_encrypted(&mut ctr, &[0x22; 60]).unwrap();
pending.advance(30);
pending.push_encrypted(&mut ctr, &[0x33; 30]).unwrap();
assert_eq!(pending.pending_len(), 60);
assert!(pending.buf.len() <= 64);
}
#[test]
fn pending_ciphertext_preserves_stream_order_across_drain_and_append() {
let mut pending = PendingCiphertext::new(128);
let mut ctr = test_ctr();
let first = vec![0xA1; 80];
let second = vec![0xB2; 40];
pending.push_encrypted(&mut ctr, &first).unwrap();
pending.advance(50);
pending.push_encrypted(&mut ctr, &second).unwrap();
let mut baseline_ctr = test_ctr();
let mut baseline_plain = Vec::with_capacity(first.len() + second.len());
baseline_plain.extend_from_slice(&first);
baseline_plain.extend_from_slice(&second);
baseline_ctr.apply(&mut baseline_plain);
let expected = &baseline_plain[50..];
assert_eq!(pending.pending_slice(), expected);
}
}

View File

@@ -499,7 +499,7 @@ async fn fetch_via_raw_tls(
sock = %sock_path,
"Raw TLS fetch using mask unix socket"
);
return fetch_via_raw_tls_stream(stream, sni, connect_timeout, 0).await;
return fetch_via_raw_tls_stream(stream, sni, connect_timeout, proxy_protocol).await;
}
Ok(Err(e)) => {
warn!(
@@ -631,7 +631,7 @@ async fn fetch_via_rustls(
sock = %sock_path,
"Rustls fetch using mask unix socket"
);
return fetch_via_rustls_stream(stream, host, sni, 0).await;
return fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await;
}
Ok(Err(e)) => {
warn!(

View File

@@ -1,19 +1,20 @@
use std::collections::HashMap;
use std::hash::{DefaultHasher, Hash, Hasher};
use std::net::IpAddr;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use httpdate;
use tokio::sync::watch;
use tokio::sync::{mpsc, watch};
use tracing::{debug, info, warn};
use crate::config::ProxyConfig;
use crate::error::Result;
use super::MePool;
use super::rotation::{MeReinitTrigger, enqueue_reinit_trigger};
use super::secret::download_proxy_secret_with_max_len;
use crate::crypto::SecureRandom;
use std::time::SystemTime;
async fn retry_fetch(url: &str) -> Option<ProxyConfigData> {
@@ -38,6 +39,89 @@ async fn retry_fetch(url: &str) -> Option<ProxyConfigData> {
pub struct ProxyConfigData {
pub map: HashMap<i32, Vec<(IpAddr, u16)>>,
pub default_dc: Option<i32>,
pub http_status: u16,
pub proxy_for_lines: u32,
}
pub fn parse_proxy_config_text(text: &str, http_status: u16) -> ProxyConfigData {
let mut map: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
let mut proxy_for_lines: u32 = 0;
for line in text.lines() {
if let Some((dc, ip, port)) = parse_proxy_line(line) {
map.entry(dc).or_default().push((ip, port));
proxy_for_lines = proxy_for_lines.saturating_add(1);
}
}
let default_dc = text.lines().find_map(|l| {
let t = l.trim();
if let Some(rest) = t.strip_prefix("default") {
return rest.trim().trim_end_matches(';').parse::<i32>().ok();
}
None
});
ProxyConfigData {
map,
default_dc,
http_status,
proxy_for_lines,
}
}
pub async fn load_proxy_config_cache(path: &str) -> Result<ProxyConfigData> {
let text = tokio::fs::read_to_string(path).await.map_err(|e| {
crate::error::ProxyError::Proxy(format!("read proxy-config cache '{path}' failed: {e}"))
})?;
Ok(parse_proxy_config_text(&text, 200))
}
pub async fn save_proxy_config_cache(path: &str, raw_text: &str) -> Result<()> {
if let Some(parent) = Path::new(path).parent()
&& !parent.as_os_str().is_empty()
{
tokio::fs::create_dir_all(parent).await.map_err(|e| {
crate::error::ProxyError::Proxy(format!(
"create proxy-config cache dir '{}' failed: {e}",
parent.display()
))
})?;
}
tokio::fs::write(path, raw_text).await.map_err(|e| {
crate::error::ProxyError::Proxy(format!("write proxy-config cache '{path}' failed: {e}"))
})?;
Ok(())
}
pub async fn fetch_proxy_config_with_raw(url: &str) -> Result<(ProxyConfigData, String)> {
let resp = reqwest::get(url)
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config GET failed: {e}")))?
;
let http_status = resp.status().as_u16();
if let Some(date) = resp.headers().get(reqwest::header::DATE)
&& let Ok(date_str) = date.to_str()
&& let Ok(server_time) = httpdate::parse_http_date(date_str)
&& let Ok(skew) = SystemTime::now().duration_since(server_time).or_else(|e| {
server_time.duration_since(SystemTime::now()).map_err(|_| e)
})
{
let skew_secs = skew.as_secs();
if skew_secs > 60 {
warn!(skew_secs, "Time skew >60s detected from fetch_proxy_config Date header");
} else if skew_secs > 30 {
warn!(skew_secs, "Time skew >30s detected from fetch_proxy_config Date header");
}
}
let text = resp
.text()
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config read failed: {e}")))?;
let parsed = parse_proxy_config_text(&text, http_status);
Ok((parsed, text))
}
#[derive(Debug, Default)]
@@ -168,60 +252,46 @@ fn parse_proxy_line(line: &str) -> Option<(i32, IpAddr, u16)> {
}
pub async fn fetch_proxy_config(url: &str) -> Result<ProxyConfigData> {
let resp = reqwest::get(url)
fetch_proxy_config_with_raw(url)
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config GET failed: {e}")))?
;
.map(|(parsed, _raw)| parsed)
}
if let Some(date) = resp.headers().get(reqwest::header::DATE)
&& let Ok(date_str) = date.to_str()
&& let Ok(server_time) = httpdate::parse_http_date(date_str)
&& let Ok(skew) = SystemTime::now().duration_since(server_time).or_else(|e| {
server_time.duration_since(SystemTime::now()).map_err(|_| e)
})
fn snapshot_passes_guards(
cfg: &ProxyConfig,
snapshot: &ProxyConfigData,
snapshot_name: &'static str,
) -> bool {
if cfg.general.me_snapshot_require_http_2xx
&& !(200..=299).contains(&snapshot.http_status)
{
let skew_secs = skew.as_secs();
if skew_secs > 60 {
warn!(skew_secs, "Time skew >60s detected from fetch_proxy_config Date header");
} else if skew_secs > 30 {
warn!(skew_secs, "Time skew >30s detected from fetch_proxy_config Date header");
}
warn!(
snapshot = snapshot_name,
http_status = snapshot.http_status,
"ME snapshot rejected by non-2xx HTTP status"
);
return false;
}
let text = resp
.text()
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config read failed: {e}")))?;
let mut map: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
for line in text.lines() {
if let Some((dc, ip, port)) = parse_proxy_line(line) {
map.entry(dc).or_default().push((ip, port));
}
let min_proxy_for = cfg.general.me_snapshot_min_proxy_for_lines;
if snapshot.proxy_for_lines < min_proxy_for {
warn!(
snapshot = snapshot_name,
parsed_proxy_for_lines = snapshot.proxy_for_lines,
min_proxy_for_lines = min_proxy_for,
"ME snapshot rejected by proxy_for line floor"
);
return false;
}
let default_dc = text
.lines()
.find_map(|l| {
let t = l.trim();
if let Some(rest) = t.strip_prefix("default") {
return rest
.trim()
.trim_end_matches(';')
.parse::<i32>()
.ok();
}
None
});
Ok(ProxyConfigData { map, default_dc })
true
}
async fn run_update_cycle(
pool: &Arc<MePool>,
rng: &Arc<SecureRandom>,
cfg: &ProxyConfig,
state: &mut UpdaterState,
reinit_tx: &mpsc::Sender<MeReinitTrigger>,
) {
pool.update_runtime_reinit_policy(
cfg.general.hardswap,
@@ -232,6 +302,20 @@ async fn run_update_cycle(
cfg.general.me_hardswap_warmup_delay_max_ms,
cfg.general.me_hardswap_warmup_extra_passes,
cfg.general.me_hardswap_warmup_pass_backoff_base_ms,
cfg.general.me_bind_stale_mode,
cfg.general.me_bind_stale_ttl_secs,
cfg.general.me_secret_atomic_snapshot,
cfg.general.me_deterministic_writer_sort,
cfg.general.me_single_endpoint_shadow_writers,
cfg.general.me_single_endpoint_outage_mode_enabled,
cfg.general.me_single_endpoint_outage_disable_quarantine,
cfg.general.me_single_endpoint_outage_backoff_min_ms,
cfg.general.me_single_endpoint_outage_backoff_max_ms,
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
cfg.general.me_floor_mode,
cfg.general.me_adaptive_floor_idle_secs,
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
cfg.general.me_adaptive_floor_recover_grace_secs,
);
let required_cfg_snapshots = cfg.general.me_config_stable_snapshots.max(1);
@@ -242,44 +326,48 @@ async fn run_update_cycle(
let mut ready_v4: Option<(ProxyConfigData, u64)> = None;
let cfg_v4 = retry_fetch("https://core.telegram.org/getProxyConfig").await;
if let Some(cfg_v4) = cfg_v4 {
let cfg_v4_hash = hash_proxy_config(&cfg_v4);
let stable_hits = state.config_v4.observe(cfg_v4_hash);
if stable_hits < required_cfg_snapshots {
debug!(
stable_hits,
required_cfg_snapshots,
snapshot = format_args!("0x{cfg_v4_hash:016x}"),
"ME config v4 candidate observed"
);
} else if state.config_v4.is_applied(cfg_v4_hash) {
debug!(
snapshot = format_args!("0x{cfg_v4_hash:016x}"),
"ME config v4 stable snapshot already applied"
);
} else {
ready_v4 = Some((cfg_v4, cfg_v4_hash));
if snapshot_passes_guards(cfg, &cfg_v4, "getProxyConfig") {
let cfg_v4_hash = hash_proxy_config(&cfg_v4);
let stable_hits = state.config_v4.observe(cfg_v4_hash);
if stable_hits < required_cfg_snapshots {
debug!(
stable_hits,
required_cfg_snapshots,
snapshot = format_args!("0x{cfg_v4_hash:016x}"),
"ME config v4 candidate observed"
);
} else if state.config_v4.is_applied(cfg_v4_hash) {
debug!(
snapshot = format_args!("0x{cfg_v4_hash:016x}"),
"ME config v4 stable snapshot already applied"
);
} else {
ready_v4 = Some((cfg_v4, cfg_v4_hash));
}
}
}
let mut ready_v6: Option<(ProxyConfigData, u64)> = None;
let cfg_v6 = retry_fetch("https://core.telegram.org/getProxyConfigV6").await;
if let Some(cfg_v6) = cfg_v6 {
let cfg_v6_hash = hash_proxy_config(&cfg_v6);
let stable_hits = state.config_v6.observe(cfg_v6_hash);
if stable_hits < required_cfg_snapshots {
debug!(
stable_hits,
required_cfg_snapshots,
snapshot = format_args!("0x{cfg_v6_hash:016x}"),
"ME config v6 candidate observed"
);
} else if state.config_v6.is_applied(cfg_v6_hash) {
debug!(
snapshot = format_args!("0x{cfg_v6_hash:016x}"),
"ME config v6 stable snapshot already applied"
);
} else {
ready_v6 = Some((cfg_v6, cfg_v6_hash));
if snapshot_passes_guards(cfg, &cfg_v6, "getProxyConfigV6") {
let cfg_v6_hash = hash_proxy_config(&cfg_v6);
let stable_hits = state.config_v6.observe(cfg_v6_hash);
if stable_hits < required_cfg_snapshots {
debug!(
stable_hits,
required_cfg_snapshots,
snapshot = format_args!("0x{cfg_v6_hash:016x}"),
"ME config v6 candidate observed"
);
} else if state.config_v6.is_applied(cfg_v6_hash) {
debug!(
snapshot = format_args!("0x{cfg_v6_hash:016x}"),
"ME config v6 stable snapshot already applied"
);
} else {
ready_v6 = Some((cfg_v6, cfg_v6_hash));
}
}
}
@@ -292,28 +380,40 @@ async fn run_update_cycle(
let update_v6 = ready_v6
.as_ref()
.map(|(snapshot, _)| snapshot.map.clone());
let changed = pool.update_proxy_maps(update_v4, update_v6).await;
if let Some((snapshot, hash)) = ready_v4 {
if let Some(dc) = snapshot.default_dc {
pool.default_dc
.store(dc, std::sync::atomic::Ordering::Relaxed);
}
state.config_v4.mark_applied(hash);
}
if let Some((_snapshot, hash)) = ready_v6 {
state.config_v6.mark_applied(hash);
}
state.last_map_apply_at = Some(tokio::time::Instant::now());
if changed {
maps_changed = true;
info!("ME config update applied after stable-gate");
let update_is_empty =
update_v4.is_empty() && update_v6.as_ref().is_none_or(|v| v.is_empty());
let apply_outcome = if update_is_empty && !cfg.general.me_snapshot_reject_empty_map {
super::pool_config::SnapshotApplyOutcome::AppliedNoDelta
} else {
debug!("ME config stable-gate applied with no map delta");
pool.update_proxy_maps(update_v4, update_v6).await
};
if matches!(
apply_outcome,
super::pool_config::SnapshotApplyOutcome::RejectedEmpty
) {
warn!("ME config stable snapshot rejected (empty endpoint map)");
} else {
if let Some((snapshot, hash)) = ready_v4 {
if let Some(dc) = snapshot.default_dc {
pool.default_dc
.store(dc, std::sync::atomic::Ordering::Relaxed);
}
state.config_v4.mark_applied(hash);
}
if let Some((_snapshot, hash)) = ready_v6 {
state.config_v6.mark_applied(hash);
}
state.last_map_apply_at = Some(tokio::time::Instant::now());
if apply_outcome.changed() {
maps_changed = true;
info!("ME config update applied after stable-gate");
} else {
debug!("ME config stable-gate applied with no map delta");
}
}
} else if let Some(last) = state.last_map_apply_at {
let wait_secs = map_apply_cooldown_remaining_secs(last, apply_cooldown);
@@ -325,8 +425,7 @@ async fn run_update_cycle(
}
if maps_changed {
pool.zero_downtime_reinit_after_map_change(rng.as_ref())
.await;
enqueue_reinit_trigger(reinit_tx, MeReinitTrigger::MapChanged);
}
pool.reset_stun_state();
@@ -367,8 +466,8 @@ async fn run_update_cycle(
pub async fn me_config_updater(
pool: Arc<MePool>,
rng: Arc<SecureRandom>,
mut config_rx: watch::Receiver<Arc<ProxyConfig>>,
reinit_tx: mpsc::Sender<MeReinitTrigger>,
) {
let mut state = UpdaterState::default();
let mut update_every_secs = config_rx
@@ -387,7 +486,7 @@ pub async fn me_config_updater(
tokio::select! {
_ = &mut sleep => {
let cfg = config_rx.borrow().clone();
run_update_cycle(&pool, &rng, cfg.as_ref(), &mut state).await;
run_update_cycle(&pool, cfg.as_ref(), &mut state, &reinit_tx).await;
let refreshed_secs = cfg.general.effective_update_every_secs().max(1);
if refreshed_secs != update_every_secs {
info!(
@@ -415,6 +514,20 @@ pub async fn me_config_updater(
cfg.general.me_hardswap_warmup_delay_max_ms,
cfg.general.me_hardswap_warmup_extra_passes,
cfg.general.me_hardswap_warmup_pass_backoff_base_ms,
cfg.general.me_bind_stale_mode,
cfg.general.me_bind_stale_ttl_secs,
cfg.general.me_secret_atomic_snapshot,
cfg.general.me_deterministic_writer_sort,
cfg.general.me_single_endpoint_shadow_writers,
cfg.general.me_single_endpoint_outage_mode_enabled,
cfg.general.me_single_endpoint_outage_disable_quarantine,
cfg.general.me_single_endpoint_outage_backoff_min_ms,
cfg.general.me_single_endpoint_outage_backoff_max_ms,
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
cfg.general.me_floor_mode,
cfg.general.me_adaptive_floor_idle_secs,
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
cfg.general.me_adaptive_floor_recover_grace_secs,
);
let new_secs = cfg.general.effective_update_every_secs().max(1);
if new_secs == update_every_secs {
@@ -429,7 +542,7 @@ pub async fn me_config_updater(
);
update_every_secs = new_secs;
update_every = Duration::from_secs(update_every_secs);
run_update_cycle(&pool, &rng, cfg.as_ref(), &mut state).await;
run_update_cycle(&pool, cfg.as_ref(), &mut state, &reinit_tx).await;
next_tick = tokio::time::Instant::now() + update_every;
} else {
info!(

View File

@@ -1,5 +1,8 @@
use std::net::{IpAddr, SocketAddr};
use std::sync::atomic::Ordering;
use std::time::{Duration, Instant};
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use socket2::{SockRef, TcpKeepalive};
#[cfg(target_os = "linux")]
use libc;
@@ -33,6 +36,24 @@ use super::codec::{
use super::wire::{extract_ip_material, IpMaterial};
use super::MePool;
const ME_KDF_DRIFT_STRICT: bool = false;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
enum KdfClientPortSource {
LocalSocket = 0,
SocksBound = 1,
}
impl KdfClientPortSource {
fn from_socks_bound_port(socks_bound_port: Option<u16>) -> Self {
if socks_bound_port.is_some() {
Self::SocksBound
} else {
Self::LocalSocket
}
}
}
/// Result of a successful ME handshake with timings.
pub(crate) struct HandshakeOutput {
pub rd: ReadHalf<TcpStream>,
@@ -46,6 +67,22 @@ pub(crate) struct HandshakeOutput {
}
impl MePool {
fn kdf_material_fingerprint(
local_ip_nat: IpAddr,
peer_addr_nat: SocketAddr,
reflected_ip: Option<IpAddr>,
socks_bound_ip: Option<IpAddr>,
client_port_source: KdfClientPortSource,
) -> u64 {
let mut hasher = DefaultHasher::new();
local_ip_nat.hash(&mut hasher);
peer_addr_nat.hash(&mut hasher);
reflected_ip.hash(&mut hasher);
socks_bound_ip.hash(&mut hasher);
client_port_source.hash(&mut hasher);
hasher.finish()
}
async fn resolve_dc_idx_for_endpoint(&self, addr: SocketAddr) -> Option<i16> {
if addr.is_ipv4() {
let map = self.proxy_map_v4.read().await;
@@ -267,7 +304,16 @@ impl MePool {
.unwrap_or_default()
.as_secs() as u32;
let ks = self.key_selector().await;
let secret_atomic_snapshot = self.secret_atomic_snapshot.load(Ordering::Relaxed);
let (ks, secret) = if secret_atomic_snapshot {
let snapshot = self.secret_snapshot().await;
(snapshot.key_selector, snapshot.secret)
} else {
// Backward-compatible mode: key selector and secret may come from different updates.
let key_selector = self.key_selector().await;
let secret = self.secret_snapshot().await.secret;
(key_selector, secret)
};
let nonce_payload = build_nonce_payload(ks, crypto_ts, &my_nonce);
let nonce_frame = build_rpc_frame(-2, &nonce_payload, RpcChecksumMode::Crc32);
let dump = hex_dump(&nonce_frame[..nonce_frame.len().min(44)]);
@@ -329,10 +375,50 @@ impl MePool {
let ts_bytes = crypto_ts.to_le_bytes();
let server_port_bytes = peer_addr_nat.port().to_le_bytes();
let client_port_for_kdf = socks_bound_addr
let socks_bound_port = socks_bound_addr
.map(|bound| bound.port())
.filter(|port| *port != 0)
.unwrap_or(local_addr_nat.port());
.filter(|port| *port != 0);
let client_port_for_kdf = socks_bound_port.unwrap_or(local_addr_nat.port());
let client_port_source = KdfClientPortSource::from_socks_bound_port(socks_bound_port);
let kdf_fingerprint = Self::kdf_material_fingerprint(
local_addr_nat.ip(),
peer_addr_nat,
reflected.map(|value| value.ip()),
socks_bound_addr.map(|value| value.ip()),
client_port_source,
);
let mut kdf_fingerprint_guard = self.kdf_material_fingerprint.lock().await;
if let Some((prev_fingerprint, prev_client_port)) =
kdf_fingerprint_guard.get(&peer_addr_nat).copied()
{
if prev_fingerprint != kdf_fingerprint {
self.stats.increment_me_kdf_drift_total();
warn!(
%peer_addr_nat,
%local_addr_nat,
client_port_for_kdf,
client_port_source = ?client_port_source,
"ME KDF material drift detected for endpoint"
);
if ME_KDF_DRIFT_STRICT {
return Err(ProxyError::InvalidHandshake(
"ME KDF material drift detected (strict mode)".to_string(),
));
}
} else if prev_client_port != client_port_for_kdf {
self.stats.increment_me_kdf_port_only_drift_total();
debug!(
%peer_addr_nat,
previous_client_port_for_kdf = prev_client_port,
client_port_for_kdf,
client_port_source = ?client_port_source,
"ME KDF client port changed with stable material"
);
}
}
kdf_fingerprint_guard.insert(peer_addr_nat, (kdf_fingerprint, client_port_for_kdf));
drop(kdf_fingerprint_guard);
let client_port_bytes = client_port_for_kdf.to_le_bytes();
let server_ip = extract_ip_material(peer_addr_nat);
@@ -357,8 +443,6 @@ impl MePool {
let diag_level: u8 = std::env::var("ME_DIAG").ok().and_then(|v| v.parse().ok()).unwrap_or(0);
let secret: Vec<u8> = self.proxy_secret.read().await.clone();
let prekey_client = build_middleproxy_prekey(
&srv_nonce,
&my_nonce,
@@ -532,6 +616,8 @@ impl MePool {
} else {
-1
};
self.stats.increment_me_handshake_reject_total();
self.stats.increment_me_handshake_error_code(err_code);
return Err(ProxyError::InvalidHandshake(format!(
"ME rejected handshake (error={err_code})"
)));

View File

@@ -1,11 +1,13 @@
use std::collections::HashMap;
use std::collections::HashSet;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tracing::{debug, info, warn};
use rand::Rng;
use tracing::{debug, info, warn};
use crate::config::MeFloorMode;
use crate::crypto::SecureRandom;
use crate::network::IpFamily;
@@ -15,11 +17,23 @@ const HEALTH_INTERVAL_SECS: u64 = 1;
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
#[allow(dead_code)]
const MAX_CONCURRENT_PER_DC_DEFAULT: usize = 1;
const SHADOW_ROTATE_RETRY_SECS: u64 = 30;
const IDLE_REFRESH_TRIGGER_BASE_SECS: u64 = 45;
const IDLE_REFRESH_TRIGGER_JITTER_SECS: u64 = 5;
const IDLE_REFRESH_RETRY_SECS: u64 = 8;
const IDLE_REFRESH_SUCCESS_GUARD_SECS: u64 = 5;
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new();
let mut next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut inflight: HashMap<(i32, IpFamily), usize> = HashMap::new();
let mut outage_backoff: HashMap<(i32, IpFamily), u64> = HashMap::new();
let mut outage_next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut single_endpoint_outage: HashSet<(i32, IpFamily)> = HashSet::new();
let mut shadow_rotate_deadline: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut idle_refresh_next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut adaptive_idle_since: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut adaptive_recover_until: HashMap<(i32, IpFamily), Instant> = HashMap::new();
loop {
tokio::time::sleep(Duration::from_secs(HEALTH_INTERVAL_SECS)).await;
pool.prune_closed_writers().await;
@@ -30,6 +44,13 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut backoff,
&mut next_attempt,
&mut inflight,
&mut outage_backoff,
&mut outage_next_attempt,
&mut single_endpoint_outage,
&mut shadow_rotate_deadline,
&mut idle_refresh_next_attempt,
&mut adaptive_idle_since,
&mut adaptive_recover_until,
)
.await;
check_family(
@@ -39,6 +60,13 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut backoff,
&mut next_attempt,
&mut inflight,
&mut outage_backoff,
&mut outage_next_attempt,
&mut single_endpoint_outage,
&mut shadow_rotate_deadline,
&mut idle_refresh_next_attempt,
&mut adaptive_idle_since,
&mut adaptive_recover_until,
)
.await;
}
@@ -51,6 +79,13 @@ async fn check_family(
backoff: &mut HashMap<(i32, IpFamily), u64>,
next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
inflight: &mut HashMap<(i32, IpFamily), usize>,
outage_backoff: &mut HashMap<(i32, IpFamily), u64>,
outage_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
single_endpoint_outage: &mut HashSet<(i32, IpFamily)>,
shadow_rotate_deadline: &mut HashMap<(i32, IpFamily), Instant>,
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
) {
let enabled = match family {
IpFamily::V4 => pool.decision.ipv4_me,
@@ -77,32 +112,119 @@ async fn check_family(
endpoints.dedup();
}
let mut live_addr_counts = HashMap::<SocketAddr, usize>::new();
for writer in pool
.writers
.read()
.await
.iter()
.filter(|w| !w.draining.load(std::sync::atomic::Ordering::Relaxed))
{
*live_addr_counts.entry(writer.addr).or_insert(0) += 1;
if pool.floor_mode() == MeFloorMode::Static {
adaptive_idle_since.clear();
adaptive_recover_until.clear();
}
let mut live_addr_counts = HashMap::<SocketAddr, usize>::new();
let mut live_writer_ids_by_addr = HashMap::<SocketAddr, Vec<u64>>::new();
for writer in pool.writers.read().await.iter().filter(|w| {
!w.draining.load(std::sync::atomic::Ordering::Relaxed)
}) {
*live_addr_counts.entry(writer.addr).or_insert(0) += 1;
live_writer_ids_by_addr
.entry(writer.addr)
.or_default()
.push(writer.id);
}
let writer_idle_since = pool.registry.writer_idle_since_snapshot().await;
for (dc, endpoints) in dc_endpoints {
if endpoints.is_empty() {
continue;
}
let required = MePool::required_writers_for_dc(endpoints.len());
let key = (dc, family);
let reduce_for_idle = should_reduce_floor_for_idle(
pool,
key,
&endpoints,
&live_writer_ids_by_addr,
adaptive_idle_since,
adaptive_recover_until,
)
.await;
let required = pool.required_writers_for_dc_with_floor_mode(endpoints.len(), reduce_for_idle);
let alive = endpoints
.iter()
.map(|addr| *live_addr_counts.get(addr).unwrap_or(&0))
.sum::<usize>();
if endpoints.len() == 1 && pool.single_endpoint_outage_mode_enabled() && alive == 0 {
if single_endpoint_outage.insert(key) {
pool.stats.increment_me_single_endpoint_outage_enter_total();
warn!(
dc = %dc,
?family,
required,
endpoint_count = endpoints.len(),
"Single-endpoint DC outage detected"
);
}
recover_single_endpoint_outage(
pool,
rng,
key,
endpoints[0],
required,
outage_backoff,
outage_next_attempt,
)
.await;
continue;
}
if single_endpoint_outage.remove(&key) {
pool.stats.increment_me_single_endpoint_outage_exit_total();
outage_backoff.remove(&key);
outage_next_attempt.remove(&key);
shadow_rotate_deadline.remove(&key);
idle_refresh_next_attempt.remove(&key);
adaptive_idle_since.remove(&key);
adaptive_recover_until.remove(&key);
info!(
dc = %dc,
?family,
alive,
required,
endpoint_count = endpoints.len(),
"Single-endpoint DC outage recovered"
);
}
if alive >= required {
maybe_refresh_idle_writer_for_dc(
pool,
rng,
key,
dc,
family,
&endpoints,
alive,
required,
&live_writer_ids_by_addr,
&writer_idle_since,
idle_refresh_next_attempt,
)
.await;
maybe_rotate_single_endpoint_shadow(
pool,
rng,
key,
dc,
family,
&endpoints,
alive,
required,
&live_writer_ids_by_addr,
shadow_rotate_deadline,
)
.await;
continue;
}
let missing = required - alive;
let key = (dc, family);
let now = Instant::now();
if let Some(ts) = next_attempt.get(&key)
&& now < *ts
@@ -112,7 +234,18 @@ async fn check_family(
let max_concurrent = pool.me_reconnect_max_concurrent_per_dc.max(1) as usize;
if *inflight.get(&key).unwrap_or(&0) >= max_concurrent {
return;
continue;
}
if pool.has_refill_inflight_for_endpoints(&endpoints).await {
debug!(
dc = %dc,
?family,
alive,
required,
endpoint_count = endpoints.len(),
"Skipping health reconnect: immediate refill is already in flight for this DC group"
);
continue;
}
*inflight.entry(key).or_insert(0) += 1;
@@ -162,18 +295,402 @@ async fn check_family(
let wait = Duration::from_millis(next_ms)
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
next_attempt.insert(key, now + wait);
warn!(
dc = %dc,
?family,
alive = now_alive,
required,
endpoint_count = endpoints.len(),
backoff_ms = next_ms,
"DC writer floor is below required level, scheduled reconnect"
);
if pool.is_runtime_ready() {
warn!(
dc = %dc,
?family,
alive = now_alive,
required,
endpoint_count = endpoints.len(),
backoff_ms = next_ms,
"DC writer floor is below required level, scheduled reconnect"
);
} else {
info!(
dc = %dc,
?family,
alive = now_alive,
required,
endpoint_count = endpoints.len(),
backoff_ms = next_ms,
"DC writer floor is below required level during startup, scheduled reconnect"
);
}
}
if let Some(v) = inflight.get_mut(&key) {
*v = v.saturating_sub(1);
}
}
}
async fn maybe_refresh_idle_writer_for_dc(
pool: &Arc<MePool>,
rng: &Arc<SecureRandom>,
key: (i32, IpFamily),
dc: i32,
family: IpFamily,
endpoints: &[SocketAddr],
alive: usize,
required: usize,
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
writer_idle_since: &HashMap<u64, u64>,
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
) {
if alive < required {
return;
}
let now = Instant::now();
if let Some(next) = idle_refresh_next_attempt.get(&key)
&& now < *next
{
return;
}
let now_epoch_secs = MePool::now_epoch_secs();
let mut candidate: Option<(u64, SocketAddr, u64, u64)> = None;
for endpoint in endpoints {
let Some(writer_ids) = live_writer_ids_by_addr.get(endpoint) else {
continue;
};
for writer_id in writer_ids {
let Some(idle_since_epoch_secs) = writer_idle_since.get(writer_id).copied() else {
continue;
};
let idle_age_secs = now_epoch_secs.saturating_sub(idle_since_epoch_secs);
let threshold_secs = IDLE_REFRESH_TRIGGER_BASE_SECS
+ (*writer_id % (IDLE_REFRESH_TRIGGER_JITTER_SECS + 1));
if idle_age_secs < threshold_secs {
continue;
}
if candidate
.as_ref()
.map(|(_, _, age, _)| idle_age_secs > *age)
.unwrap_or(true)
{
candidate = Some((*writer_id, *endpoint, idle_age_secs, threshold_secs));
}
}
}
let Some((old_writer_id, endpoint, idle_age_secs, threshold_secs)) = candidate else {
return;
};
let rotate_ok = match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
Ok(Ok(())) => true,
Ok(Err(error)) => {
debug!(
dc = %dc,
?family,
%endpoint,
old_writer_id,
idle_age_secs,
threshold_secs,
%error,
"Idle writer pre-refresh connect failed"
);
false
}
Err(_) => {
debug!(
dc = %dc,
?family,
%endpoint,
old_writer_id,
idle_age_secs,
threshold_secs,
"Idle writer pre-refresh connect timed out"
);
false
}
};
if !rotate_ok {
idle_refresh_next_attempt.insert(key, now + Duration::from_secs(IDLE_REFRESH_RETRY_SECS));
return;
}
pool.mark_writer_draining_with_timeout(old_writer_id, pool.force_close_timeout(), false)
.await;
idle_refresh_next_attempt.insert(
key,
now + Duration::from_secs(IDLE_REFRESH_SUCCESS_GUARD_SECS),
);
info!(
dc = %dc,
?family,
%endpoint,
old_writer_id,
idle_age_secs,
threshold_secs,
alive,
required,
"Idle writer refreshed before upstream idle timeout"
);
}
async fn should_reduce_floor_for_idle(
pool: &Arc<MePool>,
key: (i32, IpFamily),
endpoints: &[SocketAddr],
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
) -> bool {
if endpoints.len() != 1 || pool.floor_mode() != MeFloorMode::Adaptive {
adaptive_idle_since.remove(&key);
adaptive_recover_until.remove(&key);
return false;
}
let now = Instant::now();
let endpoint = endpoints[0];
let writer_ids = live_writer_ids_by_addr
.get(&endpoint)
.map(Vec::as_slice)
.unwrap_or(&[]);
let has_bound_clients = has_bound_clients_on_endpoint(pool, writer_ids).await;
if has_bound_clients {
adaptive_idle_since.remove(&key);
adaptive_recover_until.insert(key, now + pool.adaptive_floor_recover_grace_duration());
return false;
}
if let Some(recover_until) = adaptive_recover_until.get(&key)
&& now < *recover_until
{
adaptive_idle_since.remove(&key);
return false;
}
adaptive_recover_until.remove(&key);
let idle_since = adaptive_idle_since.entry(key).or_insert(now);
now.saturating_duration_since(*idle_since) >= pool.adaptive_floor_idle_duration()
}
async fn has_bound_clients_on_endpoint(pool: &Arc<MePool>, writer_ids: &[u64]) -> bool {
for writer_id in writer_ids {
if !pool.registry.is_writer_empty(*writer_id).await {
return true;
}
}
false
}
async fn recover_single_endpoint_outage(
pool: &Arc<MePool>,
rng: &Arc<SecureRandom>,
key: (i32, IpFamily),
endpoint: SocketAddr,
required: usize,
outage_backoff: &mut HashMap<(i32, IpFamily), u64>,
outage_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
) {
let now = Instant::now();
if let Some(ts) = outage_next_attempt.get(&key)
&& now < *ts
{
return;
}
let (min_backoff_ms, max_backoff_ms) = pool.single_endpoint_outage_backoff_bounds_ms();
pool.stats
.increment_me_single_endpoint_outage_reconnect_attempt_total();
let bypass_quarantine = pool.single_endpoint_outage_disable_quarantine();
let attempt_ok = if bypass_quarantine {
pool.stats
.increment_me_single_endpoint_quarantine_bypass_total();
match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
Ok(Ok(())) => true,
Ok(Err(e)) => {
debug!(
dc = %key.0,
family = ?key.1,
%endpoint,
error = %e,
"Single-endpoint outage reconnect failed (quarantine bypass path)"
);
false
}
Err(_) => {
debug!(
dc = %key.0,
family = ?key.1,
%endpoint,
"Single-endpoint outage reconnect timed out (quarantine bypass path)"
);
false
}
}
} else {
let one_endpoint = [endpoint];
match tokio::time::timeout(
pool.me_one_timeout,
pool.connect_endpoints_round_robin(&one_endpoint, rng.as_ref()),
)
.await
{
Ok(ok) => ok,
Err(_) => {
debug!(
dc = %key.0,
family = ?key.1,
%endpoint,
"Single-endpoint outage reconnect timed out"
);
false
}
}
};
if attempt_ok {
pool.stats
.increment_me_single_endpoint_outage_reconnect_success_total();
pool.stats.increment_me_reconnect_success();
outage_backoff.insert(key, min_backoff_ms);
let jitter = min_backoff_ms / JITTER_FRAC_NUM;
let wait = Duration::from_millis(min_backoff_ms)
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
outage_next_attempt.insert(key, now + wait);
info!(
dc = %key.0,
family = ?key.1,
%endpoint,
required,
backoff_ms = min_backoff_ms,
"Single-endpoint outage reconnect succeeded"
);
return;
}
pool.stats.increment_me_reconnect_attempt();
let current_ms = *outage_backoff.get(&key).unwrap_or(&min_backoff_ms);
let next_ms = current_ms.saturating_mul(2).min(max_backoff_ms);
outage_backoff.insert(key, next_ms);
let jitter = next_ms / JITTER_FRAC_NUM;
let wait = Duration::from_millis(next_ms)
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
outage_next_attempt.insert(key, now + wait);
warn!(
dc = %key.0,
family = ?key.1,
%endpoint,
required,
backoff_ms = next_ms,
"Single-endpoint outage reconnect scheduled"
);
}
async fn maybe_rotate_single_endpoint_shadow(
pool: &Arc<MePool>,
rng: &Arc<SecureRandom>,
key: (i32, IpFamily),
dc: i32,
family: IpFamily,
endpoints: &[SocketAddr],
alive: usize,
required: usize,
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
shadow_rotate_deadline: &mut HashMap<(i32, IpFamily), Instant>,
) {
if endpoints.len() != 1 || alive < required {
return;
}
let Some(interval) = pool.single_endpoint_shadow_rotate_interval() else {
return;
};
let now = Instant::now();
if let Some(deadline) = shadow_rotate_deadline.get(&key)
&& now < *deadline
{
return;
}
let endpoint = endpoints[0];
if pool.is_endpoint_quarantined(endpoint).await {
pool.stats
.increment_me_single_endpoint_shadow_rotate_skipped_quarantine_total();
shadow_rotate_deadline.insert(key, now + Duration::from_secs(SHADOW_ROTATE_RETRY_SECS));
debug!(
dc = %dc,
?family,
%endpoint,
"Single-endpoint shadow rotation skipped: endpoint is quarantined"
);
return;
}
let Some(writer_ids) = live_writer_ids_by_addr.get(&endpoint) else {
shadow_rotate_deadline.insert(key, now + Duration::from_secs(SHADOW_ROTATE_RETRY_SECS));
return;
};
let mut candidate_writer_id = None;
for writer_id in writer_ids {
if pool.registry.is_writer_empty(*writer_id).await {
candidate_writer_id = Some(*writer_id);
break;
}
}
let Some(old_writer_id) = candidate_writer_id else {
shadow_rotate_deadline.insert(key, now + Duration::from_secs(SHADOW_ROTATE_RETRY_SECS));
debug!(
dc = %dc,
?family,
%endpoint,
alive,
required,
"Single-endpoint shadow rotation skipped: no empty writer candidate"
);
return;
};
let rotate_ok = match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
Ok(Ok(())) => true,
Ok(Err(e)) => {
debug!(
dc = %dc,
?family,
%endpoint,
error = %e,
"Single-endpoint shadow rotation connect failed"
);
false
}
Err(_) => {
debug!(
dc = %dc,
?family,
%endpoint,
"Single-endpoint shadow rotation connect timed out"
);
false
}
};
if !rotate_ok {
shadow_rotate_deadline.insert(
key,
now + interval.min(Duration::from_secs(SHADOW_ROTATE_RETRY_SECS)),
);
return;
}
pool.mark_writer_draining_with_timeout(old_writer_id, pool.force_close_timeout(), false)
.await;
pool.stats.increment_me_single_endpoint_shadow_rotate_total();
shadow_rotate_deadline.insert(key, now + interval);
info!(
dc = %dc,
?family,
%endpoint,
old_writer_id,
rotate_every_secs = interval.as_secs(),
"Single-endpoint shadow writer rotated"
);
}

View File

@@ -18,6 +18,7 @@ mod rotation;
mod send;
mod secret;
mod wire;
mod pool_status;
use bytes::Bytes;
@@ -29,8 +30,12 @@ pub use pool::MePool;
pub use pool_nat::{stun_probe, detect_public_ip};
pub use registry::ConnRegistry;
pub use secret::fetch_proxy_secret;
pub use config_updater::{fetch_proxy_config, me_config_updater};
pub use rotation::me_rotation_task;
#[allow(unused_imports)]
pub use config_updater::{
ProxyConfigData, fetch_proxy_config, fetch_proxy_config_with_raw, load_proxy_config_cache,
me_config_updater, save_proxy_config_cache,
};
pub use rotation::{MeReinitTrigger, me_reinit_scheduler, me_rotation_task};
pub use wire::proto_flags_for_tag;
#[derive(Debug)]

View File

@@ -7,7 +7,7 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use tokio::sync::{Mutex, Notify, RwLock, mpsc};
use tokio_util::sync::CancellationToken;
use crate::config::MeSocksKdfPolicy;
use crate::config::{MeBindStaleMode, MeFloorMode, MeRouteNoWriterMode, MeSocksKdfPolicy};
use crate::crypto::SecureRandom;
use crate::network::IpFamily;
use crate::network::probe::NetworkDecision;
@@ -16,11 +16,19 @@ use crate::transport::UpstreamManager;
use super::ConnRegistry;
use super::codec::WriterCommand;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) struct RefillDcKey {
pub dc: i32,
pub family: IpFamily,
}
#[derive(Clone)]
pub struct MeWriter {
pub id: u64,
pub addr: SocketAddr,
pub generation: u64,
pub contour: Arc<AtomicU8>,
pub created_at: Instant,
pub tx: mpsc::Sender<WriterCommand>,
pub cancel: CancellationToken,
pub degraded: Arc<AtomicBool>,
@@ -29,6 +37,36 @@ pub struct MeWriter {
pub allow_drain_fallback: Arc<AtomicBool>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub(super) enum WriterContour {
Warm = 0,
Active = 1,
Draining = 2,
}
impl WriterContour {
pub(super) fn as_u8(self) -> u8 {
self as u8
}
pub(super) fn from_u8(value: u8) -> Self {
match value {
0 => Self::Warm,
1 => Self::Active,
2 => Self::Draining,
_ => Self::Draining,
}
}
}
#[derive(Debug, Clone)]
pub struct SecretSnapshot {
pub epoch: u64,
pub key_selector: u32,
pub secret: Vec<u8>,
}
#[allow(dead_code)]
pub struct MePool {
pub(super) registry: Arc<ConnRegistry>,
@@ -38,7 +76,7 @@ pub struct MePool {
pub(super) upstream: Option<Arc<UpstreamManager>>,
pub(super) rng: Arc<SecureRandom>,
pub(super) proxy_tag: Option<Vec<u8>>,
pub(super) proxy_secret: Arc<RwLock<Vec<u8>>>,
pub(super) proxy_secret: Arc<RwLock<SecretSnapshot>>,
pub(super) nat_ip_cfg: Option<IpAddr>,
pub(super) nat_ip_detected: Arc<RwLock<Option<IpAddr>>>,
pub(super) nat_probe: bool,
@@ -56,6 +94,7 @@ pub struct MePool {
pub(super) me_keepalive_interval: Duration,
pub(super) me_keepalive_jitter: Duration,
pub(super) me_keepalive_payload_random: bool,
pub(super) rpc_proxy_req_every_secs: AtomicU64,
pub(super) me_warmup_stagger_enabled: bool,
pub(super) me_warmup_step_delay: Duration,
pub(super) me_warmup_step_jitter: Duration,
@@ -63,6 +102,16 @@ pub struct MePool {
pub(super) me_reconnect_backoff_base: Duration,
pub(super) me_reconnect_backoff_cap: Duration,
pub(super) me_reconnect_fast_retry_count: u32,
pub(super) me_single_endpoint_shadow_writers: AtomicU8,
pub(super) me_single_endpoint_outage_mode_enabled: AtomicBool,
pub(super) me_single_endpoint_outage_disable_quarantine: AtomicBool,
pub(super) me_single_endpoint_outage_backoff_min_ms: AtomicU64,
pub(super) me_single_endpoint_outage_backoff_max_ms: AtomicU64,
pub(super) me_single_endpoint_shadow_rotate_every_secs: AtomicU64,
pub(super) me_floor_mode: AtomicU8,
pub(super) me_adaptive_floor_idle_secs: AtomicU64,
pub(super) me_adaptive_floor_min_writers_single_endpoint: AtomicU8,
pub(super) me_adaptive_floor_recover_grace_secs: AtomicU64,
pub(super) proxy_map_v4: Arc<RwLock<HashMap<i32, Vec<(IpAddr, u16)>>>>,
pub(super) proxy_map_v6: Arc<RwLock<HashMap<i32, Vec<(IpAddr, u16)>>>>,
pub(super) default_dc: AtomicI32,
@@ -72,10 +121,18 @@ pub struct MePool {
pub(super) nat_reflection_cache: Arc<Mutex<NatReflectionCache>>,
pub(super) writer_available: Arc<Notify>,
pub(super) refill_inflight: Arc<Mutex<HashSet<SocketAddr>>>,
pub(super) refill_inflight_dc: Arc<Mutex<HashSet<RefillDcKey>>>,
pub(super) conn_count: AtomicUsize,
pub(super) stats: Arc<crate::stats::Stats>,
pub(super) generation: AtomicU64,
pub(super) active_generation: AtomicU64,
pub(super) warm_generation: AtomicU64,
pub(super) pending_hardswap_generation: AtomicU64,
pub(super) pending_hardswap_started_at_epoch_secs: AtomicU64,
pub(super) pending_hardswap_map_hash: AtomicU64,
pub(super) hardswap: AtomicBool,
pub(super) endpoint_quarantine: Arc<Mutex<HashMap<SocketAddr, Instant>>>,
pub(super) kdf_material_fingerprint: Arc<Mutex<HashMap<SocketAddr, (u64, u16)>>>,
pub(super) me_pool_drain_ttl_secs: AtomicU64,
pub(super) me_pool_force_close_secs: AtomicU64,
pub(super) me_pool_min_fresh_ratio_permille: AtomicU32,
@@ -83,7 +140,16 @@ pub struct MePool {
pub(super) me_hardswap_warmup_delay_max_ms: AtomicU64,
pub(super) me_hardswap_warmup_extra_passes: AtomicU32,
pub(super) me_hardswap_warmup_pass_backoff_base_ms: AtomicU64,
pub(super) me_bind_stale_mode: AtomicU8,
pub(super) me_bind_stale_ttl_secs: AtomicU64,
pub(super) secret_atomic_snapshot: AtomicBool,
pub(super) me_deterministic_writer_sort: AtomicBool,
pub(super) me_socks_kdf_policy: AtomicU8,
pub(super) me_route_no_writer_mode: AtomicU8,
pub(super) me_route_no_writer_wait: Duration,
pub(super) me_route_inline_recovery_attempts: u32,
pub(super) me_route_inline_recovery_wait: Duration,
pub(super) runtime_ready: AtomicBool,
pool_size: usize,
}
@@ -132,6 +198,7 @@ impl MePool {
me_keepalive_interval_secs: u64,
me_keepalive_jitter_secs: u64,
me_keepalive_payload_random: bool,
rpc_proxy_req_every_secs: u64,
me_warmup_stagger_enabled: bool,
me_warmup_step_delay_ms: u64,
me_warmup_step_jitter_ms: u64,
@@ -139,6 +206,16 @@ impl MePool {
me_reconnect_backoff_base_ms: u64,
me_reconnect_backoff_cap_ms: u64,
me_reconnect_fast_retry_count: u32,
me_single_endpoint_shadow_writers: u8,
me_single_endpoint_outage_mode_enabled: bool,
me_single_endpoint_outage_disable_quarantine: bool,
me_single_endpoint_outage_backoff_min_ms: u64,
me_single_endpoint_outage_backoff_max_ms: u64,
me_single_endpoint_shadow_rotate_every_secs: u64,
me_floor_mode: MeFloorMode,
me_adaptive_floor_idle_secs: u64,
me_adaptive_floor_min_writers_single_endpoint: u8,
me_adaptive_floor_recover_grace_secs: u64,
hardswap: bool,
me_pool_drain_ttl_secs: u64,
me_pool_force_close_secs: u64,
@@ -147,10 +224,18 @@ impl MePool {
me_hardswap_warmup_delay_max_ms: u64,
me_hardswap_warmup_extra_passes: u8,
me_hardswap_warmup_pass_backoff_base_ms: u64,
me_bind_stale_mode: MeBindStaleMode,
me_bind_stale_ttl_secs: u64,
me_secret_atomic_snapshot: bool,
me_deterministic_writer_sort: bool,
me_socks_kdf_policy: MeSocksKdfPolicy,
me_route_backpressure_base_timeout_ms: u64,
me_route_backpressure_high_timeout_ms: u64,
me_route_backpressure_high_watermark_pct: u8,
me_route_no_writer_mode: MeRouteNoWriterMode,
me_route_no_writer_wait_ms: u64,
me_route_inline_recovery_attempts: u32,
me_route_inline_recovery_wait_ms: u64,
) -> Arc<Self> {
let registry = Arc::new(ConnRegistry::new());
registry.update_route_backpressure_policy(
@@ -166,7 +251,20 @@ impl MePool {
upstream,
rng,
proxy_tag,
proxy_secret: Arc::new(RwLock::new(proxy_secret)),
proxy_secret: Arc::new(RwLock::new(SecretSnapshot {
epoch: 1,
key_selector: if proxy_secret.len() >= 4 {
u32::from_le_bytes([
proxy_secret[0],
proxy_secret[1],
proxy_secret[2],
proxy_secret[3],
])
} else {
0
},
secret: proxy_secret,
})),
nat_ip_cfg: nat_ip,
nat_ip_detected: Arc::new(RwLock::new(None)),
nat_probe,
@@ -185,6 +283,7 @@ impl MePool {
me_keepalive_interval: Duration::from_secs(me_keepalive_interval_secs),
me_keepalive_jitter: Duration::from_secs(me_keepalive_jitter_secs),
me_keepalive_payload_random,
rpc_proxy_req_every_secs: AtomicU64::new(rpc_proxy_req_every_secs),
me_warmup_stagger_enabled,
me_warmup_step_delay: Duration::from_millis(me_warmup_step_delay_ms),
me_warmup_step_jitter: Duration::from_millis(me_warmup_step_jitter_ms),
@@ -192,6 +291,30 @@ impl MePool {
me_reconnect_backoff_base: Duration::from_millis(me_reconnect_backoff_base_ms),
me_reconnect_backoff_cap: Duration::from_millis(me_reconnect_backoff_cap_ms),
me_reconnect_fast_retry_count,
me_single_endpoint_shadow_writers: AtomicU8::new(me_single_endpoint_shadow_writers),
me_single_endpoint_outage_mode_enabled: AtomicBool::new(
me_single_endpoint_outage_mode_enabled,
),
me_single_endpoint_outage_disable_quarantine: AtomicBool::new(
me_single_endpoint_outage_disable_quarantine,
),
me_single_endpoint_outage_backoff_min_ms: AtomicU64::new(
me_single_endpoint_outage_backoff_min_ms,
),
me_single_endpoint_outage_backoff_max_ms: AtomicU64::new(
me_single_endpoint_outage_backoff_max_ms,
),
me_single_endpoint_shadow_rotate_every_secs: AtomicU64::new(
me_single_endpoint_shadow_rotate_every_secs,
),
me_floor_mode: AtomicU8::new(me_floor_mode.as_u8()),
me_adaptive_floor_idle_secs: AtomicU64::new(me_adaptive_floor_idle_secs),
me_adaptive_floor_min_writers_single_endpoint: AtomicU8::new(
me_adaptive_floor_min_writers_single_endpoint,
),
me_adaptive_floor_recover_grace_secs: AtomicU64::new(
me_adaptive_floor_recover_grace_secs,
),
pool_size: 2,
proxy_map_v4: Arc::new(RwLock::new(proxy_map_v4)),
proxy_map_v6: Arc::new(RwLock::new(proxy_map_v6)),
@@ -202,9 +325,17 @@ impl MePool {
nat_reflection_cache: Arc::new(Mutex::new(NatReflectionCache::default())),
writer_available: Arc::new(Notify::new()),
refill_inflight: Arc::new(Mutex::new(HashSet::new())),
refill_inflight_dc: Arc::new(Mutex::new(HashSet::new())),
conn_count: AtomicUsize::new(0),
generation: AtomicU64::new(1),
active_generation: AtomicU64::new(1),
warm_generation: AtomicU64::new(0),
pending_hardswap_generation: AtomicU64::new(0),
pending_hardswap_started_at_epoch_secs: AtomicU64::new(0),
pending_hardswap_map_hash: AtomicU64::new(0),
hardswap: AtomicBool::new(hardswap),
endpoint_quarantine: Arc::new(Mutex::new(HashMap::new())),
kdf_material_fingerprint: Arc::new(Mutex::new(HashMap::new())),
me_pool_drain_ttl_secs: AtomicU64::new(me_pool_drain_ttl_secs),
me_pool_force_close_secs: AtomicU64::new(me_pool_force_close_secs),
me_pool_min_fresh_ratio_permille: AtomicU32::new(Self::ratio_to_permille(
@@ -216,16 +347,29 @@ impl MePool {
me_hardswap_warmup_pass_backoff_base_ms: AtomicU64::new(
me_hardswap_warmup_pass_backoff_base_ms,
),
me_bind_stale_mode: AtomicU8::new(me_bind_stale_mode.as_u8()),
me_bind_stale_ttl_secs: AtomicU64::new(me_bind_stale_ttl_secs),
secret_atomic_snapshot: AtomicBool::new(me_secret_atomic_snapshot),
me_deterministic_writer_sort: AtomicBool::new(me_deterministic_writer_sort),
me_socks_kdf_policy: AtomicU8::new(me_socks_kdf_policy.as_u8()),
me_route_no_writer_mode: AtomicU8::new(me_route_no_writer_mode.as_u8()),
me_route_no_writer_wait: Duration::from_millis(me_route_no_writer_wait_ms),
me_route_inline_recovery_attempts,
me_route_inline_recovery_wait: Duration::from_millis(me_route_inline_recovery_wait_ms),
runtime_ready: AtomicBool::new(false),
})
}
pub fn has_proxy_tag(&self) -> bool {
self.proxy_tag.is_some()
pub fn current_generation(&self) -> u64 {
self.active_generation.load(Ordering::Relaxed)
}
pub fn current_generation(&self) -> u64 {
self.generation.load(Ordering::Relaxed)
pub fn set_runtime_ready(&self, ready: bool) {
self.runtime_ready.store(ready, Ordering::Relaxed);
}
pub fn is_runtime_ready(&self) -> bool {
self.runtime_ready.load(Ordering::Relaxed)
}
pub fn update_runtime_reinit_policy(
@@ -238,6 +382,20 @@ impl MePool {
hardswap_warmup_delay_max_ms: u64,
hardswap_warmup_extra_passes: u8,
hardswap_warmup_pass_backoff_base_ms: u64,
bind_stale_mode: MeBindStaleMode,
bind_stale_ttl_secs: u64,
secret_atomic_snapshot: bool,
deterministic_writer_sort: bool,
single_endpoint_shadow_writers: u8,
single_endpoint_outage_mode_enabled: bool,
single_endpoint_outage_disable_quarantine: bool,
single_endpoint_outage_backoff_min_ms: u64,
single_endpoint_outage_backoff_max_ms: u64,
single_endpoint_shadow_rotate_every_secs: u64,
floor_mode: MeFloorMode,
adaptive_floor_idle_secs: u64,
adaptive_floor_min_writers_single_endpoint: u8,
adaptive_floor_recover_grace_secs: u64,
) {
self.hardswap.store(hardswap, Ordering::Relaxed);
self.me_pool_drain_ttl_secs
@@ -254,6 +412,49 @@ impl MePool {
.store(hardswap_warmup_extra_passes as u32, Ordering::Relaxed);
self.me_hardswap_warmup_pass_backoff_base_ms
.store(hardswap_warmup_pass_backoff_base_ms, Ordering::Relaxed);
self.me_bind_stale_mode
.store(bind_stale_mode.as_u8(), Ordering::Relaxed);
self.me_bind_stale_ttl_secs
.store(bind_stale_ttl_secs, Ordering::Relaxed);
self.secret_atomic_snapshot
.store(secret_atomic_snapshot, Ordering::Relaxed);
self.me_deterministic_writer_sort
.store(deterministic_writer_sort, Ordering::Relaxed);
self.me_single_endpoint_shadow_writers
.store(single_endpoint_shadow_writers, Ordering::Relaxed);
self.me_single_endpoint_outage_mode_enabled
.store(single_endpoint_outage_mode_enabled, Ordering::Relaxed);
self.me_single_endpoint_outage_disable_quarantine
.store(single_endpoint_outage_disable_quarantine, Ordering::Relaxed);
self.me_single_endpoint_outage_backoff_min_ms
.store(single_endpoint_outage_backoff_min_ms, Ordering::Relaxed);
self.me_single_endpoint_outage_backoff_max_ms
.store(single_endpoint_outage_backoff_max_ms, Ordering::Relaxed);
self.me_single_endpoint_shadow_rotate_every_secs
.store(single_endpoint_shadow_rotate_every_secs, Ordering::Relaxed);
let previous_floor_mode = self.floor_mode();
self.me_floor_mode
.store(floor_mode.as_u8(), Ordering::Relaxed);
self.me_adaptive_floor_idle_secs
.store(adaptive_floor_idle_secs, Ordering::Relaxed);
self.me_adaptive_floor_min_writers_single_endpoint
.store(adaptive_floor_min_writers_single_endpoint, Ordering::Relaxed);
self.me_adaptive_floor_recover_grace_secs
.store(adaptive_floor_recover_grace_secs, Ordering::Relaxed);
if previous_floor_mode != floor_mode {
self.stats.increment_me_floor_mode_switch_total();
match (previous_floor_mode, floor_mode) {
(MeFloorMode::Static, MeFloorMode::Adaptive) => {
self.stats
.increment_me_floor_mode_switch_static_to_adaptive_total();
}
(MeFloorMode::Adaptive, MeFloorMode::Static) => {
self.stats
.increment_me_floor_mode_switch_adaptive_to_static_total();
}
_ => {}
}
}
}
pub fn reset_stun_state(&self) {
@@ -307,11 +508,96 @@ impl MePool {
}
pub(super) async fn key_selector(&self) -> u32 {
let secret = self.proxy_secret.read().await;
if secret.len() >= 4 {
u32::from_le_bytes([secret[0], secret[1], secret[2], secret[3]])
self.proxy_secret.read().await.key_selector
}
pub(super) async fn secret_snapshot(&self) -> SecretSnapshot {
self.proxy_secret.read().await.clone()
}
pub(super) fn bind_stale_mode(&self) -> MeBindStaleMode {
MeBindStaleMode::from_u8(self.me_bind_stale_mode.load(Ordering::Relaxed))
}
pub(super) fn required_writers_for_dc(&self, endpoint_count: usize) -> usize {
if endpoint_count == 0 {
return 0;
}
if endpoint_count == 1 {
let shadow = self
.me_single_endpoint_shadow_writers
.load(Ordering::Relaxed) as usize;
return (1 + shadow).max(3);
}
endpoint_count.max(3)
}
pub(super) fn floor_mode(&self) -> MeFloorMode {
MeFloorMode::from_u8(self.me_floor_mode.load(Ordering::Relaxed))
}
pub(super) fn adaptive_floor_idle_duration(&self) -> Duration {
Duration::from_secs(self.me_adaptive_floor_idle_secs.load(Ordering::Relaxed))
}
pub(super) fn adaptive_floor_recover_grace_duration(&self) -> Duration {
Duration::from_secs(
self.me_adaptive_floor_recover_grace_secs
.load(Ordering::Relaxed),
)
}
pub(super) fn required_writers_for_dc_with_floor_mode(
&self,
endpoint_count: usize,
reduce_for_idle: bool,
) -> usize {
let base_required = self.required_writers_for_dc(endpoint_count);
if !reduce_for_idle {
return base_required;
}
if endpoint_count != 1 || self.floor_mode() != MeFloorMode::Adaptive {
return base_required;
}
let min_writers = (self
.me_adaptive_floor_min_writers_single_endpoint
.load(Ordering::Relaxed) as usize)
.max(1);
base_required.min(min_writers)
}
pub(super) fn single_endpoint_outage_mode_enabled(&self) -> bool {
self.me_single_endpoint_outage_mode_enabled
.load(Ordering::Relaxed)
}
pub(super) fn single_endpoint_outage_disable_quarantine(&self) -> bool {
self.me_single_endpoint_outage_disable_quarantine
.load(Ordering::Relaxed)
}
pub(super) fn single_endpoint_outage_backoff_bounds_ms(&self) -> (u64, u64) {
let min_ms = self
.me_single_endpoint_outage_backoff_min_ms
.load(Ordering::Relaxed);
let max_ms = self
.me_single_endpoint_outage_backoff_max_ms
.load(Ordering::Relaxed);
if min_ms <= max_ms {
(min_ms, max_ms)
} else {
0
(max_ms, min_ms)
}
}
pub(super) fn single_endpoint_shadow_rotate_interval(&self) -> Option<Duration> {
let secs = self
.me_single_endpoint_shadow_rotate_every_secs
.load(Ordering::Relaxed);
if secs == 0 {
None
} else {
Some(Duration::from_secs(secs))
}
}

View File

@@ -7,12 +7,29 @@ use tracing::warn;
use super::pool::MePool;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SnapshotApplyOutcome {
AppliedChanged,
AppliedNoDelta,
RejectedEmpty,
}
impl SnapshotApplyOutcome {
pub fn changed(self) -> bool {
matches!(self, SnapshotApplyOutcome::AppliedChanged)
}
}
impl MePool {
pub async fn update_proxy_maps(
&self,
new_v4: HashMap<i32, Vec<(IpAddr, u16)>>,
new_v6: Option<HashMap<i32, Vec<(IpAddr, u16)>>>,
) -> bool {
) -> SnapshotApplyOutcome {
if new_v4.is_empty() && new_v6.as_ref().is_none_or(|v| v.is_empty()) {
return SnapshotApplyOutcome::RejectedEmpty;
}
let mut changed = false;
{
let mut guard = self.proxy_map_v4.write().await;
@@ -51,7 +68,11 @@ impl MePool {
}
}
}
changed
if changed {
SnapshotApplyOutcome::AppliedChanged
} else {
SnapshotApplyOutcome::AppliedNoDelta
}
}
pub async fn update_secret(self: &Arc<Self>, new_secret: Vec<u8>) -> bool {
@@ -60,8 +81,19 @@ impl MePool {
return false;
}
let mut guard = self.proxy_secret.write().await;
if *guard != new_secret {
*guard = new_secret;
if guard.secret != new_secret {
guard.secret = new_secret;
guard.key_selector = if guard.secret.len() >= 4 {
u32::from_le_bytes([
guard.secret[0],
guard.secret[1],
guard.secret[2],
guard.secret[3],
])
} else {
0
};
guard.epoch = guard.epoch.saturating_add(1);
drop(guard);
self.reconnect_all().await;
return true;

View File

@@ -19,7 +19,7 @@ impl MePool {
me_servers = self.proxy_map_v4.read().await.len(),
pool_size,
key_selector = format_args!("0x{ks:08x}"),
secret_len = self.proxy_secret.read().await.len(),
secret_len = self.proxy_secret.read().await.secret.len(),
"Initializing ME pool"
);

View File

@@ -2,27 +2,173 @@ use std::collections::HashSet;
use std::net::SocketAddr;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::time::{Duration, Instant};
use tracing::{debug, info, warn};
use crate::crypto::SecureRandom;
use crate::network::IpFamily;
use super::pool::MePool;
use super::pool::{MePool, RefillDcKey, WriterContour};
const ME_FLAP_UPTIME_THRESHOLD_SECS: u64 = 20;
const ME_FLAP_QUARANTINE_SECS: u64 = 25;
impl MePool {
pub(super) async fn maybe_quarantine_flapping_endpoint(
&self,
addr: SocketAddr,
uptime: Duration,
) {
if uptime > Duration::from_secs(ME_FLAP_UPTIME_THRESHOLD_SECS) {
return;
}
let until = Instant::now() + Duration::from_secs(ME_FLAP_QUARANTINE_SECS);
let mut guard = self.endpoint_quarantine.lock().await;
guard.retain(|_, expiry| *expiry > Instant::now());
guard.insert(addr, until);
self.stats.increment_me_endpoint_quarantine_total();
warn!(
%addr,
uptime_ms = uptime.as_millis(),
quarantine_secs = ME_FLAP_QUARANTINE_SECS,
"ME endpoint temporarily quarantined due to rapid writer flap"
);
}
pub(super) async fn is_endpoint_quarantined(&self, addr: SocketAddr) -> bool {
let mut guard = self.endpoint_quarantine.lock().await;
let now = Instant::now();
guard.retain(|_, expiry| *expiry > now);
guard.contains_key(&addr)
}
async fn connectable_endpoints(&self, endpoints: &[SocketAddr]) -> Vec<SocketAddr> {
if endpoints.is_empty() {
return Vec::new();
}
let mut guard = self.endpoint_quarantine.lock().await;
let now = Instant::now();
guard.retain(|_, expiry| *expiry > now);
let mut ready = Vec::<SocketAddr>::with_capacity(endpoints.len());
let mut earliest_quarantine: Option<(SocketAddr, Instant)> = None;
for addr in endpoints {
if let Some(expiry) = guard.get(addr).copied() {
match earliest_quarantine {
Some((_, current_expiry)) if current_expiry <= expiry => {}
_ => earliest_quarantine = Some((*addr, expiry)),
}
} else {
ready.push(*addr);
}
}
if !ready.is_empty() {
return ready;
}
if let Some((addr, expiry)) = earliest_quarantine {
debug!(
%addr,
wait_ms = expiry.saturating_duration_since(now).as_millis(),
"All ME endpoints are quarantined for the DC group; retrying earliest one"
);
return vec![addr];
}
Vec::new()
}
pub(super) async fn has_refill_inflight_for_endpoints(&self, endpoints: &[SocketAddr]) -> bool {
if endpoints.is_empty() {
return false;
}
{
let guard = self.refill_inflight.lock().await;
if endpoints.iter().any(|addr| guard.contains(addr)) {
return true;
}
}
let dc_keys = self.resolve_refill_dc_keys_for_endpoints(endpoints).await;
if dc_keys.is_empty() {
return false;
}
let guard = self.refill_inflight_dc.lock().await;
dc_keys.iter().any(|key| guard.contains(key))
}
async fn resolve_refill_dc_key_for_addr(&self, addr: SocketAddr) -> Option<RefillDcKey> {
let family = if addr.is_ipv4() {
IpFamily::V4
} else {
IpFamily::V6
};
let map = self.proxy_map_for_family(family).await;
for (dc, endpoints) in map {
if endpoints
.into_iter()
.any(|(ip, port)| SocketAddr::new(ip, port) == addr)
{
return Some(RefillDcKey {
dc: dc.abs(),
family,
});
}
}
None
}
async fn resolve_refill_dc_keys_for_endpoints(
&self,
endpoints: &[SocketAddr],
) -> HashSet<RefillDcKey> {
let mut out = HashSet::<RefillDcKey>::new();
for addr in endpoints {
if let Some(key) = self.resolve_refill_dc_key_for_addr(*addr).await {
out.insert(key);
}
}
out
}
pub(super) async fn connect_endpoints_round_robin(
self: &Arc<Self>,
endpoints: &[SocketAddr],
rng: &SecureRandom,
) -> bool {
if endpoints.is_empty() {
self.connect_endpoints_round_robin_with_generation_contour(
endpoints,
rng,
self.current_generation(),
WriterContour::Active,
)
.await
}
pub(super) async fn connect_endpoints_round_robin_with_generation_contour(
self: &Arc<Self>,
endpoints: &[SocketAddr],
rng: &SecureRandom,
generation: u64,
contour: WriterContour,
) -> bool {
let candidates = self.connectable_endpoints(endpoints).await;
if candidates.is_empty() {
return false;
}
let start = (self.rr.fetch_add(1, Ordering::Relaxed) as usize) % endpoints.len();
for offset in 0..endpoints.len() {
let idx = (start + offset) % endpoints.len();
let addr = endpoints[idx];
match self.connect_one(addr, rng).await {
let start = (self.rr.fetch_add(1, Ordering::Relaxed) as usize) % candidates.len();
for offset in 0..candidates.len() {
let idx = (start + offset) % candidates.len();
let addr = candidates[idx];
match self
.connect_one_with_generation_contour(addr, rng, generation, contour)
.await
{
Ok(()) => return true,
Err(e) => debug!(%addr, error = %e, "ME connect failed during round-robin warmup"),
}
@@ -83,29 +229,37 @@ impl MePool {
async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr) -> bool {
let fast_retries = self.me_reconnect_fast_retry_count.max(1);
let same_endpoint_quarantined = self.is_endpoint_quarantined(addr).await;
for attempt in 0..fast_retries {
self.stats.increment_me_reconnect_attempt();
match self.connect_one(addr, self.rng.as_ref()).await {
Ok(()) => {
self.stats.increment_me_reconnect_success();
self.stats.increment_me_writer_restored_same_endpoint_total();
info!(
%addr,
attempt = attempt + 1,
"ME writer restored on the same endpoint"
);
return true;
}
Err(e) => {
debug!(
%addr,
attempt = attempt + 1,
error = %e,
"ME immediate same-endpoint reconnect failed"
);
if !same_endpoint_quarantined {
for attempt in 0..fast_retries {
self.stats.increment_me_reconnect_attempt();
match self.connect_one(addr, self.rng.as_ref()).await {
Ok(()) => {
self.stats.increment_me_reconnect_success();
self.stats.increment_me_writer_restored_same_endpoint_total();
info!(
%addr,
attempt = attempt + 1,
"ME writer restored on the same endpoint"
);
return true;
}
Err(e) => {
debug!(
%addr,
attempt = attempt + 1,
error = %e,
"ME immediate same-endpoint reconnect failed"
);
}
}
}
} else {
debug!(
%addr,
"Skipping immediate same-endpoint reconnect because endpoint is quarantined"
);
}
let dc_endpoints = self.endpoints_for_same_dc(addr).await;
@@ -138,6 +292,9 @@ impl MePool {
pub(crate) fn trigger_immediate_refill(self: &Arc<Self>, addr: SocketAddr) {
let pool = Arc::clone(self);
tokio::spawn(async move {
let dc_endpoints = pool.endpoints_for_same_dc(addr).await;
let dc_keys = pool.resolve_refill_dc_keys_for_endpoints(&dc_endpoints).await;
{
let mut guard = pool.refill_inflight.lock().await;
if !guard.insert(addr) {
@@ -145,6 +302,19 @@ impl MePool {
return;
}
}
if !dc_keys.is_empty() {
let mut dc_guard = pool.refill_inflight_dc.lock().await;
if dc_keys.iter().any(|key| dc_guard.contains(key)) {
pool.stats.increment_me_refill_skipped_inflight_total();
drop(dc_guard);
let mut guard = pool.refill_inflight.lock().await;
guard.remove(&addr);
return;
}
dc_guard.extend(dc_keys.iter().copied());
}
pool.stats.increment_me_refill_triggered_total();
let restored = pool.refill_writer_after_loss(addr).await;
@@ -154,6 +324,13 @@ impl MePool {
let mut guard = pool.refill_inflight.lock().await;
guard.remove(&addr);
drop(guard);
if !dc_keys.is_empty() {
let mut dc_guard = pool.refill_inflight_dc.lock().await;
for key in &dc_keys {
dc_guard.remove(key);
}
}
});
}
}

View File

@@ -1,4 +1,5 @@
use std::collections::{HashMap, HashSet};
use std::hash::{Hash, Hasher};
use std::net::SocketAddr;
use std::sync::Arc;
use std::sync::atomic::Ordering;
@@ -7,12 +8,58 @@ use std::time::Duration;
use rand::Rng;
use rand::seq::SliceRandom;
use tracing::{debug, info, warn};
use std::collections::hash_map::DefaultHasher;
use crate::crypto::SecureRandom;
use super::pool::MePool;
use super::pool::{MePool, WriterContour};
const ME_HARDSWAP_PENDING_TTL_SECS: u64 = 1800;
impl MePool {
fn desired_map_hash(desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>) -> u64 {
let mut hasher = DefaultHasher::new();
let mut dcs: Vec<i32> = desired_by_dc.keys().copied().collect();
dcs.sort_unstable();
for dc in dcs {
dc.hash(&mut hasher);
let mut endpoints: Vec<SocketAddr> = desired_by_dc
.get(&dc)
.map(|set| set.iter().copied().collect())
.unwrap_or_default();
endpoints.sort_unstable();
for endpoint in endpoints {
endpoint.hash(&mut hasher);
}
}
hasher.finish()
}
fn clear_pending_hardswap_state(&self) {
self.pending_hardswap_generation.store(0, Ordering::Relaxed);
self.pending_hardswap_started_at_epoch_secs
.store(0, Ordering::Relaxed);
self.pending_hardswap_map_hash.store(0, Ordering::Relaxed);
self.warm_generation.store(0, Ordering::Relaxed);
}
async fn promote_warm_generation_to_active(&self, generation: u64) {
self.active_generation.store(generation, Ordering::Relaxed);
self.warm_generation.store(0, Ordering::Relaxed);
let ws = self.writers.read().await;
for writer in ws.iter() {
if writer.draining.load(Ordering::Relaxed) {
continue;
}
if writer.generation == generation {
writer
.contour
.store(WriterContour::Active.as_u8(), Ordering::Relaxed);
}
}
}
fn coverage_ratio(
desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>,
active_writer_addrs: &HashSet<SocketAddr>,
@@ -101,10 +148,6 @@ impl MePool {
out
}
pub(super) fn required_writers_for_dc(endpoint_count: usize) -> usize {
endpoint_count.max(3)
}
fn hardswap_warmup_connect_delay_ms(&self) -> u64 {
let min_ms = self.me_hardswap_warmup_delay_min_ms.load(Ordering::Relaxed);
let max_ms = self.me_hardswap_warmup_delay_max_ms.load(Ordering::Relaxed);
@@ -174,7 +217,7 @@ impl MePool {
let mut endpoint_list: Vec<SocketAddr> = endpoints.iter().copied().collect();
endpoint_list.sort_unstable();
let required = Self::required_writers_for_dc(endpoint_list.len());
let required = self.required_writers_for_dc(endpoint_list.len());
let mut completed = false;
let mut last_fresh_count = self
.fresh_writer_count_for_endpoints(generation, endpoints)
@@ -202,7 +245,14 @@ impl MePool {
let delay_ms = self.hardswap_warmup_connect_delay_ms();
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
let connected = self.connect_endpoints_round_robin(&endpoint_list, rng).await;
let connected = self
.connect_endpoints_round_robin_with_generation_contour(
&endpoint_list,
rng,
generation,
WriterContour::Warm,
)
.await;
debug!(
dc = *dc,
pass = pass_idx + 1,
@@ -265,11 +315,61 @@ impl MePool {
return;
}
let desired_map_hash = Self::desired_map_hash(&desired_by_dc);
let now_epoch_secs = Self::now_epoch_secs();
let previous_generation = self.current_generation();
let generation = self.generation.fetch_add(1, Ordering::Relaxed) + 1;
let hardswap = self.hardswap.load(Ordering::Relaxed);
let generation = if hardswap {
let pending_generation = self.pending_hardswap_generation.load(Ordering::Relaxed);
let pending_started_at = self
.pending_hardswap_started_at_epoch_secs
.load(Ordering::Relaxed);
let pending_map_hash = self.pending_hardswap_map_hash.load(Ordering::Relaxed);
let pending_age_secs = now_epoch_secs.saturating_sub(pending_started_at);
let pending_ttl_expired = pending_started_at > 0 && pending_age_secs > ME_HARDSWAP_PENDING_TTL_SECS;
let pending_matches_map = pending_map_hash != 0 && pending_map_hash == desired_map_hash;
if pending_generation != 0
&& pending_generation >= previous_generation
&& pending_matches_map
&& !pending_ttl_expired
{
self.stats.increment_me_hardswap_pending_reuse_total();
debug!(
previous_generation,
generation = pending_generation,
pending_age_secs,
"ME hardswap continues with pending generation"
);
pending_generation
} else {
if pending_generation != 0 && pending_ttl_expired {
self.stats.increment_me_hardswap_pending_ttl_expired_total();
warn!(
previous_generation,
generation = pending_generation,
pending_age_secs,
pending_ttl_secs = ME_HARDSWAP_PENDING_TTL_SECS,
"ME hardswap pending generation expired by TTL; starting fresh generation"
);
}
let next_generation = self.generation.fetch_add(1, Ordering::Relaxed) + 1;
self.pending_hardswap_generation
.store(next_generation, Ordering::Relaxed);
self.pending_hardswap_started_at_epoch_secs
.store(now_epoch_secs, Ordering::Relaxed);
self.pending_hardswap_map_hash
.store(desired_map_hash, Ordering::Relaxed);
self.warm_generation.store(next_generation, Ordering::Relaxed);
next_generation
}
} else {
self.clear_pending_hardswap_state();
self.generation.fetch_add(1, Ordering::Relaxed) + 1
};
if hardswap {
self.warm_generation.store(generation, Ordering::Relaxed);
self.warmup_generation_for_all_dcs(rng, generation, &desired_by_dc)
.await;
} else {
@@ -305,7 +405,7 @@ impl MePool {
if endpoints.is_empty() {
continue;
}
let required = Self::required_writers_for_dc(endpoints.len());
let required = self.required_writers_for_dc(endpoints.len());
let fresh_count = writers
.iter()
.filter(|w| !w.draining.load(Ordering::Relaxed))
@@ -334,6 +434,10 @@ impl MePool {
return;
}
if hardswap {
self.promote_warm_generation_to_active(generation).await;
}
let desired_addrs: HashSet<SocketAddr> = desired_by_dc
.values()
.flat_map(|set| set.iter().copied())
@@ -354,6 +458,9 @@ impl MePool {
drop(writers);
if stale_writer_ids.is_empty() {
if hardswap {
self.clear_pending_hardswap_state();
}
debug!("ME reinit cycle completed with no stale writers");
return;
}
@@ -375,6 +482,9 @@ impl MePool {
self.mark_writer_draining_with_timeout(writer_id, drain_timeout, !hardswap)
.await;
}
if hardswap {
self.clear_pending_hardswap_state();
}
}
pub async fn zero_downtime_reinit_periodic(self: &Arc<Self>, rng: &SecureRandom) {

View File

@@ -0,0 +1,424 @@
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::net::SocketAddr;
use std::sync::atomic::Ordering;
use std::time::Instant;
use super::pool::{MePool, WriterContour};
use crate::config::{MeBindStaleMode, MeFloorMode, MeSocksKdfPolicy};
use crate::transport::upstream::IpPreference;
#[derive(Clone, Debug)]
pub(crate) struct MeApiWriterStatusSnapshot {
pub writer_id: u64,
pub dc: Option<i16>,
pub endpoint: SocketAddr,
pub generation: u64,
pub state: &'static str,
pub draining: bool,
pub degraded: bool,
pub bound_clients: usize,
pub idle_for_secs: Option<u64>,
pub rtt_ema_ms: Option<f64>,
}
#[derive(Clone, Debug)]
pub(crate) struct MeApiDcStatusSnapshot {
pub dc: i16,
pub endpoints: Vec<SocketAddr>,
pub available_endpoints: usize,
pub available_pct: f64,
pub required_writers: usize,
pub alive_writers: usize,
pub coverage_pct: f64,
pub rtt_ms: Option<f64>,
pub load: usize,
}
#[derive(Clone, Debug)]
pub(crate) struct MeApiStatusSnapshot {
pub generated_at_epoch_secs: u64,
pub configured_dc_groups: usize,
pub configured_endpoints: usize,
pub available_endpoints: usize,
pub available_pct: f64,
pub required_writers: usize,
pub alive_writers: usize,
pub coverage_pct: f64,
pub writers: Vec<MeApiWriterStatusSnapshot>,
pub dcs: Vec<MeApiDcStatusSnapshot>,
}
#[derive(Clone, Debug)]
pub(crate) struct MeApiQuarantinedEndpointSnapshot {
pub endpoint: SocketAddr,
pub remaining_ms: u64,
}
#[derive(Clone, Debug)]
pub(crate) struct MeApiDcPathSnapshot {
pub dc: i16,
pub ip_preference: Option<&'static str>,
pub selected_addr_v4: Option<SocketAddr>,
pub selected_addr_v6: Option<SocketAddr>,
}
#[derive(Clone, Debug)]
pub(crate) struct MeApiRuntimeSnapshot {
pub active_generation: u64,
pub warm_generation: u64,
pub pending_hardswap_generation: u64,
pub pending_hardswap_age_secs: Option<u64>,
pub hardswap_enabled: bool,
pub floor_mode: &'static str,
pub adaptive_floor_idle_secs: u64,
pub adaptive_floor_min_writers_single_endpoint: u8,
pub adaptive_floor_recover_grace_secs: u64,
pub me_keepalive_enabled: bool,
pub me_keepalive_interval_secs: u64,
pub me_keepalive_jitter_secs: u64,
pub me_keepalive_payload_random: bool,
pub rpc_proxy_req_every_secs: u64,
pub me_reconnect_max_concurrent_per_dc: u32,
pub me_reconnect_backoff_base_ms: u64,
pub me_reconnect_backoff_cap_ms: u64,
pub me_reconnect_fast_retry_count: u32,
pub me_pool_drain_ttl_secs: u64,
pub me_pool_force_close_secs: u64,
pub me_pool_min_fresh_ratio: f32,
pub me_bind_stale_mode: &'static str,
pub me_bind_stale_ttl_secs: u64,
pub me_single_endpoint_shadow_writers: u8,
pub me_single_endpoint_outage_mode_enabled: bool,
pub me_single_endpoint_outage_disable_quarantine: bool,
pub me_single_endpoint_outage_backoff_min_ms: u64,
pub me_single_endpoint_outage_backoff_max_ms: u64,
pub me_single_endpoint_shadow_rotate_every_secs: u64,
pub me_deterministic_writer_sort: bool,
pub me_socks_kdf_policy: &'static str,
pub quarantined_endpoints: Vec<MeApiQuarantinedEndpointSnapshot>,
pub network_path: Vec<MeApiDcPathSnapshot>,
}
impl MePool {
pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot {
let now_epoch_secs = Self::now_epoch_secs();
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
if self.decision.ipv4_me {
let map = self.proxy_map_v4.read().await.clone();
for (dc, addrs) in map {
let abs_dc = dc.abs();
if abs_dc == 0 {
continue;
}
let Ok(dc_idx) = i16::try_from(abs_dc) else {
continue;
};
let entry = endpoints_by_dc.entry(dc_idx).or_default();
for (ip, port) in addrs {
entry.insert(SocketAddr::new(ip, port));
}
}
}
if self.decision.ipv6_me {
let map = self.proxy_map_v6.read().await.clone();
for (dc, addrs) in map {
let abs_dc = dc.abs();
if abs_dc == 0 {
continue;
}
let Ok(dc_idx) = i16::try_from(abs_dc) else {
continue;
};
let entry = endpoints_by_dc.entry(dc_idx).or_default();
for (ip, port) in addrs {
entry.insert(SocketAddr::new(ip, port));
}
}
}
let mut endpoint_to_dc = HashMap::<SocketAddr, i16>::new();
for (dc, endpoints) in &endpoints_by_dc {
for endpoint in endpoints {
endpoint_to_dc.entry(*endpoint).or_insert(*dc);
}
}
let configured_dc_groups = endpoints_by_dc.len();
let configured_endpoints = endpoints_by_dc.values().map(BTreeSet::len).sum();
let required_writers = endpoints_by_dc
.values()
.map(|endpoints| self.required_writers_for_dc_with_floor_mode(endpoints.len(), false))
.sum();
let idle_since = self.registry.writer_idle_since_snapshot().await;
let activity = self.registry.writer_activity_snapshot().await;
let rtt = self.rtt_stats.lock().await.clone();
let writers = self.writers.read().await.clone();
let mut live_writers_by_endpoint = HashMap::<SocketAddr, usize>::new();
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
let mut dc_rtt_agg = HashMap::<i16, (f64, u64)>::new();
let mut writer_rows = Vec::<MeApiWriterStatusSnapshot>::with_capacity(writers.len());
for writer in writers {
let endpoint = writer.addr;
let dc = endpoint_to_dc.get(&endpoint).copied();
let draining = writer.draining.load(Ordering::Relaxed);
let degraded = writer.degraded.load(Ordering::Relaxed);
let bound_clients = activity
.bound_clients_by_writer
.get(&writer.id)
.copied()
.unwrap_or(0);
let idle_for_secs = idle_since
.get(&writer.id)
.map(|idle_ts| now_epoch_secs.saturating_sub(*idle_ts));
let rtt_ema_ms = rtt.get(&writer.id).map(|(_, ema)| *ema);
let state = match WriterContour::from_u8(writer.contour.load(Ordering::Relaxed)) {
WriterContour::Warm => "warm",
WriterContour::Active => "active",
WriterContour::Draining => "draining",
};
if !draining {
*live_writers_by_endpoint.entry(endpoint).or_insert(0) += 1;
if let Some(dc_idx) = dc {
*live_writers_by_dc.entry(dc_idx).or_insert(0) += 1;
if let Some(ema_ms) = rtt_ema_ms {
let entry = dc_rtt_agg.entry(dc_idx).or_insert((0.0, 0));
entry.0 += ema_ms;
entry.1 += 1;
}
}
}
writer_rows.push(MeApiWriterStatusSnapshot {
writer_id: writer.id,
dc,
endpoint,
generation: writer.generation,
state,
draining,
degraded,
bound_clients,
idle_for_secs,
rtt_ema_ms,
});
}
writer_rows.sort_by_key(|row| (row.dc.unwrap_or(i16::MAX), row.endpoint, row.writer_id));
let mut dcs = Vec::<MeApiDcStatusSnapshot>::with_capacity(endpoints_by_dc.len());
let mut available_endpoints = 0usize;
let mut alive_writers = 0usize;
for (dc, endpoints) in endpoints_by_dc {
let endpoint_count = endpoints.len();
let dc_available_endpoints = endpoints
.iter()
.filter(|endpoint| live_writers_by_endpoint.contains_key(endpoint))
.count();
let dc_required_writers =
self.required_writers_for_dc_with_floor_mode(endpoint_count, false);
let dc_alive_writers = live_writers_by_dc.get(&dc).copied().unwrap_or(0);
let dc_load = activity
.active_sessions_by_target_dc
.get(&dc)
.copied()
.unwrap_or(0);
let dc_rtt_ms = dc_rtt_agg
.get(&dc)
.and_then(|(sum, count)| (*count > 0).then_some(*sum / (*count as f64)));
available_endpoints += dc_available_endpoints;
alive_writers += dc_alive_writers;
dcs.push(MeApiDcStatusSnapshot {
dc,
endpoints: endpoints.into_iter().collect(),
available_endpoints: dc_available_endpoints,
available_pct: ratio_pct(dc_available_endpoints, endpoint_count),
required_writers: dc_required_writers,
alive_writers: dc_alive_writers,
coverage_pct: ratio_pct(dc_alive_writers, dc_required_writers),
rtt_ms: dc_rtt_ms,
load: dc_load,
});
}
MeApiStatusSnapshot {
generated_at_epoch_secs: now_epoch_secs,
configured_dc_groups,
configured_endpoints,
available_endpoints,
available_pct: ratio_pct(available_endpoints, configured_endpoints),
required_writers,
alive_writers,
coverage_pct: ratio_pct(alive_writers, required_writers),
writers: writer_rows,
dcs,
}
}
pub(crate) async fn api_runtime_snapshot(&self) -> MeApiRuntimeSnapshot {
let now = Instant::now();
let now_epoch_secs = Self::now_epoch_secs();
let pending_started_at = self
.pending_hardswap_started_at_epoch_secs
.load(Ordering::Relaxed);
let pending_hardswap_age_secs = (pending_started_at > 0)
.then_some(now_epoch_secs.saturating_sub(pending_started_at));
let mut quarantined_endpoints = Vec::<MeApiQuarantinedEndpointSnapshot>::new();
{
let guard = self.endpoint_quarantine.lock().await;
for (endpoint, expires_at) in guard.iter() {
if *expires_at <= now {
continue;
}
let remaining_ms = expires_at.duration_since(now).as_millis() as u64;
quarantined_endpoints.push(MeApiQuarantinedEndpointSnapshot {
endpoint: *endpoint,
remaining_ms,
});
}
}
quarantined_endpoints.sort_by_key(|entry| entry.endpoint);
let mut network_path = Vec::<MeApiDcPathSnapshot>::new();
if let Some(upstream) = &self.upstream {
for dc in 1..=5 {
let dc_idx = dc as i16;
let ip_preference = upstream
.get_dc_ip_preference(dc_idx)
.await
.map(ip_preference_label);
let selected_addr_v4 = upstream.get_dc_addr(dc_idx, false).await;
let selected_addr_v6 = upstream.get_dc_addr(dc_idx, true).await;
network_path.push(MeApiDcPathSnapshot {
dc: dc_idx,
ip_preference,
selected_addr_v4,
selected_addr_v6,
});
}
}
MeApiRuntimeSnapshot {
active_generation: self.active_generation.load(Ordering::Relaxed),
warm_generation: self.warm_generation.load(Ordering::Relaxed),
pending_hardswap_generation: self.pending_hardswap_generation.load(Ordering::Relaxed),
pending_hardswap_age_secs,
hardswap_enabled: self.hardswap.load(Ordering::Relaxed),
floor_mode: floor_mode_label(self.floor_mode()),
adaptive_floor_idle_secs: self.me_adaptive_floor_idle_secs.load(Ordering::Relaxed),
adaptive_floor_min_writers_single_endpoint: self
.me_adaptive_floor_min_writers_single_endpoint
.load(Ordering::Relaxed),
adaptive_floor_recover_grace_secs: self
.me_adaptive_floor_recover_grace_secs
.load(Ordering::Relaxed),
me_keepalive_enabled: self.me_keepalive_enabled,
me_keepalive_interval_secs: self.me_keepalive_interval.as_secs(),
me_keepalive_jitter_secs: self.me_keepalive_jitter.as_secs(),
me_keepalive_payload_random: self.me_keepalive_payload_random,
rpc_proxy_req_every_secs: self.rpc_proxy_req_every_secs.load(Ordering::Relaxed),
me_reconnect_max_concurrent_per_dc: self.me_reconnect_max_concurrent_per_dc,
me_reconnect_backoff_base_ms: self.me_reconnect_backoff_base.as_millis() as u64,
me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64,
me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed),
me_pool_force_close_secs: self.me_pool_force_close_secs.load(Ordering::Relaxed),
me_pool_min_fresh_ratio: Self::permille_to_ratio(
self.me_pool_min_fresh_ratio_permille.load(Ordering::Relaxed),
),
me_bind_stale_mode: bind_stale_mode_label(self.bind_stale_mode()),
me_bind_stale_ttl_secs: self.me_bind_stale_ttl_secs.load(Ordering::Relaxed),
me_single_endpoint_shadow_writers: self
.me_single_endpoint_shadow_writers
.load(Ordering::Relaxed),
me_single_endpoint_outage_mode_enabled: self
.me_single_endpoint_outage_mode_enabled
.load(Ordering::Relaxed),
me_single_endpoint_outage_disable_quarantine: self
.me_single_endpoint_outage_disable_quarantine
.load(Ordering::Relaxed),
me_single_endpoint_outage_backoff_min_ms: self
.me_single_endpoint_outage_backoff_min_ms
.load(Ordering::Relaxed),
me_single_endpoint_outage_backoff_max_ms: self
.me_single_endpoint_outage_backoff_max_ms
.load(Ordering::Relaxed),
me_single_endpoint_shadow_rotate_every_secs: self
.me_single_endpoint_shadow_rotate_every_secs
.load(Ordering::Relaxed),
me_deterministic_writer_sort: self
.me_deterministic_writer_sort
.load(Ordering::Relaxed),
me_socks_kdf_policy: socks_kdf_policy_label(self.socks_kdf_policy()),
quarantined_endpoints,
network_path,
}
}
}
fn ratio_pct(part: usize, total: usize) -> f64 {
if total == 0 {
return 0.0;
}
let pct = ((part as f64) / (total as f64)) * 100.0;
pct.clamp(0.0, 100.0)
}
fn floor_mode_label(mode: MeFloorMode) -> &'static str {
match mode {
MeFloorMode::Static => "static",
MeFloorMode::Adaptive => "adaptive",
}
}
fn bind_stale_mode_label(mode: MeBindStaleMode) -> &'static str {
match mode {
MeBindStaleMode::Never => "never",
MeBindStaleMode::Ttl => "ttl",
MeBindStaleMode::Always => "always",
}
}
fn socks_kdf_policy_label(policy: MeSocksKdfPolicy) -> &'static str {
match policy {
MeSocksKdfPolicy::Strict => "strict",
MeSocksKdfPolicy::Compat => "compat",
}
}
fn ip_preference_label(preference: IpPreference) -> &'static str {
match preference {
IpPreference::Unknown => "unknown",
IpPreference::PreferV6 => "prefer_v6",
IpPreference::PreferV4 => "prefer_v4",
IpPreference::BothWork => "both",
IpPreference::Unavailable => "unavailable",
}
}
#[cfg(test)]
mod tests {
use super::ratio_pct;
#[test]
fn ratio_pct_is_zero_when_denominator_is_zero() {
assert_eq!(ratio_pct(1, 0), 0.0);
}
#[test]
fn ratio_pct_is_capped_at_100() {
assert_eq!(ratio_pct(7, 3), 100.0);
}
#[test]
fn ratio_pct_reports_expected_value() {
assert_eq!(ratio_pct(1, 4), 25.0);
}
}

View File

@@ -1,7 +1,8 @@
use std::net::SocketAddr;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering};
use std::time::{Duration, Instant};
use std::io::ErrorKind;
use bytes::BytesMut;
use rand::Rng;
@@ -9,18 +10,25 @@ use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken;
use tracing::{debug, info, warn};
use crate::config::MeBindStaleMode;
use crate::crypto::SecureRandom;
use crate::error::{ProxyError, Result};
use crate::protocol::constants::RPC_PING_U32;
use crate::protocol::constants::{RPC_CLOSE_EXT_U32, RPC_PING_U32};
use super::codec::{RpcWriter, WriterCommand};
use super::pool::{MePool, MeWriter};
use super::pool::{MePool, MeWriter, WriterContour};
use super::reader::reader_loop;
use super::registry::BoundConn;
use super::wire::build_proxy_req_payload;
const ME_ACTIVE_PING_SECS: u64 = 25;
const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
fn is_me_peer_closed_error(error: &ProxyError) -> bool {
matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof)
}
impl MePool {
pub(crate) async fn prune_closed_writers(self: &Arc<Self>) {
@@ -42,7 +50,23 @@ impl MePool {
}
pub(crate) async fn connect_one(self: &Arc<Self>, addr: SocketAddr, rng: &SecureRandom) -> Result<()> {
let secret_len = self.proxy_secret.read().await.len();
self.connect_one_with_generation_contour(
addr,
rng,
self.current_generation(),
WriterContour::Active,
)
.await
}
pub(super) async fn connect_one_with_generation_contour(
self: &Arc<Self>,
addr: SocketAddr,
rng: &SecureRandom,
generation: u64,
contour: WriterContour,
) -> Result<()> {
let secret_len = self.proxy_secret.read().await.secret.len();
if secret_len < 32 {
return Err(ProxyError::Proxy("proxy-secret too short for ME auth".into()));
}
@@ -51,7 +75,7 @@ impl MePool {
let hs = self.handshake_only(stream, addr, upstream_egress, rng).await?;
let writer_id = self.next_writer_id.fetch_add(1, Ordering::Relaxed);
let generation = self.current_generation();
let contour = Arc::new(AtomicU8::new(contour.as_u8()));
let cancel = CancellationToken::new();
let degraded = Arc::new(AtomicBool::new(false));
let draining = Arc::new(AtomicBool::new(false));
@@ -88,6 +112,8 @@ impl MePool {
id: writer_id,
addr,
generation,
contour: contour.clone(),
created_at: Instant::now(),
tx: tx.clone(),
cancel: cancel.clone(),
degraded: degraded.clone(),
@@ -96,6 +122,7 @@ impl MePool {
allow_drain_fallback: allow_drain_fallback.clone(),
};
self.writers.write().await.push(writer.clone());
self.registry.mark_writer_idle(writer_id).await;
self.conn_count.fetch_add(1, Ordering::Relaxed);
self.writer_available.notify_one();
@@ -105,6 +132,7 @@ impl MePool {
let ping_tracker_reader = ping_tracker.clone();
let rtt_stats = self.rtt_stats.clone();
let stats_reader = self.stats.clone();
let stats_reader_close = self.stats.clone();
let stats_ping = self.stats.clone();
let pool = Arc::downgrade(self);
let cancel_ping = cancel.clone();
@@ -116,6 +144,13 @@ impl MePool {
let keepalive_enabled = self.me_keepalive_enabled;
let keepalive_interval = self.me_keepalive_interval;
let keepalive_jitter = self.me_keepalive_jitter;
let rpc_proxy_req_every_secs = self.rpc_proxy_req_every_secs.load(Ordering::Relaxed);
let tx_signal = tx.clone();
let stats_signal = self.stats.clone();
let cancel_signal = cancel.clone();
let cleanup_for_signal = cleanup_done.clone();
let pool_signal = Arc::downgrade(self);
let keepalive_jitter_signal = self.me_keepalive_jitter;
let cancel_reader_token = cancel.clone();
let cancel_ping_token = cancel_ping.clone();
@@ -137,6 +172,15 @@ impl MePool {
cancel_reader_token.clone(),
)
.await;
let idle_close_by_peer = if let Err(e) = res.as_ref() {
is_me_peer_closed_error(e) && reg.is_writer_empty(writer_id).await
} else {
false
};
if idle_close_by_peer {
stats_reader_close.increment_me_idle_close_by_peer_total();
info!(writer_id, "ME socket closed by peer on idle writer");
}
if let Some(pool) = pool.upgrade()
&& cleanup_for_reader
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
@@ -145,7 +189,9 @@ impl MePool {
pool.remove_writer_and_close_clients(writer_id).await;
}
if let Err(e) = res {
warn!(error = %e, "ME reader ended");
if !idle_close_by_peer {
warn!(error = %e, "ME reader ended");
}
}
let mut ws = writers_arc.write().await;
ws.retain(|w| w.id != writer_id);
@@ -234,6 +280,116 @@ impl MePool {
}
});
tokio::spawn(async move {
if rpc_proxy_req_every_secs == 0 {
return;
}
let interval = Duration::from_secs(rpc_proxy_req_every_secs);
let startup_jitter_ms = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
rand::rng().random_range(0..=effective_jitter_ms as u64)
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
}
loop {
let wait = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
interval + Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
};
tokio::select! {
_ = cancel_signal.cancelled() => break,
_ = tokio::time::sleep(wait) => {}
}
let Some(pool) = pool_signal.upgrade() else {
break;
};
let Some(meta) = pool.registry.get_last_writer_meta(writer_id).await else {
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
};
let (conn_id, mut service_rx) = pool.registry.register().await;
pool.registry
.bind_writer(conn_id, writer_id, tx_signal.clone(), meta.clone())
.await;
let payload = build_proxy_req_payload(
conn_id,
meta.client_addr,
meta.our_addr,
&[],
pool.proxy_tag.as_deref(),
meta.proto_flags,
);
if tx_signal.send(WriterCommand::DataAndFlush(payload)).await.is_err() {
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
cancel_signal.cancel();
if cleanup_for_signal
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
pool.remove_writer_and_close_clients(writer_id).await;
}
break;
}
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
if matches!(
tokio::time::timeout(
Duration::from_millis(ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS),
service_rx.recv(),
)
.await,
Ok(Some(_))
) {
stats_signal.increment_me_rpc_proxy_req_signal_response_total();
}
let mut close_payload = Vec::with_capacity(12);
close_payload.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
close_payload.extend_from_slice(&conn_id.to_le_bytes());
if tx_signal
.send(WriterCommand::DataAndFlush(close_payload))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
cancel_signal.cancel();
if cleanup_for_signal
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
pool.remove_writer_and_close_clients(writer_id).await;
}
break;
}
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
let _ = pool.registry.unregister(conn_id).await;
}
});
Ok(())
}
@@ -248,6 +404,7 @@ impl MePool {
async fn remove_writer_only(self: &Arc<Self>, writer_id: u64) -> Vec<BoundConn> {
let mut close_tx: Option<mpsc::Sender<WriterCommand>> = None;
let mut removed_addr: Option<SocketAddr> = None;
let mut removed_uptime: Option<Duration> = None;
let mut trigger_refill = false;
{
let mut ws = self.writers.write().await;
@@ -260,6 +417,7 @@ impl MePool {
self.stats.increment_me_writer_removed_total();
w.cancel.cancel();
removed_addr = Some(w.addr);
removed_uptime = Some(w.created_at.elapsed());
trigger_refill = !was_draining;
if trigger_refill {
self.stats.increment_me_writer_removed_unexpected_total();
@@ -274,6 +432,9 @@ impl MePool {
if trigger_refill
&& let Some(addr) = removed_addr
{
if let Some(uptime) = removed_uptime {
self.maybe_quarantine_flapping_endpoint(addr, uptime).await;
}
self.trigger_immediate_refill(addr);
}
self.rtt_stats.lock().await.remove(&writer_id);
@@ -298,6 +459,8 @@ impl MePool {
if !already_draining {
self.stats.increment_pool_drain_active();
}
w.contour
.store(WriterContour::Draining.as_u8(), Ordering::Relaxed);
w.draining.store(true, Ordering::Relaxed);
true
} else {
@@ -351,16 +514,22 @@ impl MePool {
return false;
}
let ttl_secs = self.me_pool_drain_ttl_secs.load(Ordering::Relaxed);
if ttl_secs == 0 {
return true;
}
match self.bind_stale_mode() {
MeBindStaleMode::Never => false,
MeBindStaleMode::Always => true,
MeBindStaleMode::Ttl => {
let ttl_secs = self.me_bind_stale_ttl_secs.load(Ordering::Relaxed);
if ttl_secs == 0 {
return true;
}
let started = writer.draining_started_at_epoch_secs.load(Ordering::Relaxed);
if started == 0 {
return false;
}
let started = writer.draining_started_at_epoch_secs.load(Ordering::Relaxed);
if started == 0 {
return false;
}
Self::now_epoch_secs().saturating_sub(started) <= ttl_secs
Self::now_epoch_secs().saturating_sub(started) <= ttl_secs
}
}
}
}

View File

@@ -1,4 +1,5 @@
use std::collections::HashMap;
use std::io::ErrorKind;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::Instant;
@@ -45,7 +46,11 @@ pub(crate) async fn reader_loop(
_ = cancel.cancelled() => return Ok(()),
};
if n == 0 {
return Ok(());
stats.increment_me_reader_eof_total();
return Err(ProxyError::Io(std::io::Error::new(
ErrorKind::UnexpectedEof,
"ME socket closed by peer",
)));
}
raw.extend_from_slice(&tmp[..n]);

View File

@@ -1,7 +1,7 @@
use std::collections::{HashMap, HashSet};
use std::net::SocketAddr;
use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
use std::time::Duration;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tokio::sync::{mpsc, RwLock};
use tokio::sync::mpsc::error::TrySendError;
@@ -45,12 +45,20 @@ pub struct ConnWriter {
pub tx: mpsc::Sender<WriterCommand>,
}
#[derive(Clone, Debug, Default)]
pub(super) struct WriterActivitySnapshot {
pub bound_clients_by_writer: HashMap<u64, usize>,
pub active_sessions_by_target_dc: HashMap<i16, usize>,
}
struct RegistryInner {
map: HashMap<u64, mpsc::Sender<MeResponse>>,
writers: HashMap<u64, mpsc::Sender<WriterCommand>>,
writer_for_conn: HashMap<u64, u64>,
conns_for_writer: HashMap<u64, HashSet<u64>>,
meta: HashMap<u64, ConnMeta>,
last_meta_for_writer: HashMap<u64, ConnMeta>,
writer_idle_since_epoch_secs: HashMap<u64, u64>,
}
impl RegistryInner {
@@ -61,6 +69,8 @@ impl RegistryInner {
writer_for_conn: HashMap::new(),
conns_for_writer: HashMap::new(),
meta: HashMap::new(),
last_meta_for_writer: HashMap::new(),
writer_idle_since_epoch_secs: HashMap::new(),
}
}
}
@@ -74,6 +84,13 @@ pub struct ConnRegistry {
}
impl ConnRegistry {
fn now_epoch_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}
pub fn new() -> Self {
let start = rand::random::<u64>() | 1;
Self {
@@ -121,8 +138,16 @@ impl ConnRegistry {
inner.map.remove(&id);
inner.meta.remove(&id);
if let Some(writer_id) = inner.writer_for_conn.remove(&id) {
if let Some(set) = inner.conns_for_writer.get_mut(&writer_id) {
let became_empty = if let Some(set) = inner.conns_for_writer.get_mut(&writer_id) {
set.remove(&id);
set.is_empty()
} else {
false
};
if became_empty {
inner
.writer_idle_since_epoch_secs
.insert(writer_id, Self::now_epoch_secs());
}
return Some(writer_id);
}
@@ -191,8 +216,10 @@ impl ConnRegistry {
meta: ConnMeta,
) {
let mut inner = self.inner.write().await;
inner.meta.entry(conn_id).or_insert(meta);
inner.meta.entry(conn_id).or_insert(meta.clone());
inner.writer_for_conn.insert(conn_id, writer_id);
inner.last_meta_for_writer.insert(writer_id, meta);
inner.writer_idle_since_epoch_secs.remove(&writer_id);
inner.writers.entry(writer_id).or_insert_with(|| tx.clone());
inner
.conns_for_writer
@@ -201,6 +228,49 @@ impl ConnRegistry {
.insert(conn_id);
}
pub async fn mark_writer_idle(&self, writer_id: u64) {
let mut inner = self.inner.write().await;
inner.conns_for_writer.entry(writer_id).or_insert_with(HashSet::new);
inner
.writer_idle_since_epoch_secs
.entry(writer_id)
.or_insert(Self::now_epoch_secs());
}
pub async fn get_last_writer_meta(&self, writer_id: u64) -> Option<ConnMeta> {
let inner = self.inner.read().await;
inner.last_meta_for_writer.get(&writer_id).cloned()
}
pub async fn writer_idle_since_snapshot(&self) -> HashMap<u64, u64> {
let inner = self.inner.read().await;
inner.writer_idle_since_epoch_secs.clone()
}
pub(super) async fn writer_activity_snapshot(&self) -> WriterActivitySnapshot {
let inner = self.inner.read().await;
let mut bound_clients_by_writer = HashMap::<u64, usize>::new();
let mut active_sessions_by_target_dc = HashMap::<i16, usize>::new();
for (writer_id, conn_ids) in &inner.conns_for_writer {
bound_clients_by_writer.insert(*writer_id, conn_ids.len());
}
for conn_meta in inner.meta.values() {
let dc_u16 = conn_meta.target_dc.unsigned_abs();
if dc_u16 == 0 {
continue;
}
if let Ok(dc) = i16::try_from(dc_u16) {
*active_sessions_by_target_dc.entry(dc).or_insert(0) += 1;
}
}
WriterActivitySnapshot {
bound_clients_by_writer,
active_sessions_by_target_dc,
}
}
pub async fn get_writer(&self, conn_id: u64) -> Option<ConnWriter> {
let inner = self.inner.read().await;
let writer_id = inner.writer_for_conn.get(&conn_id).cloned()?;
@@ -208,9 +278,16 @@ impl ConnRegistry {
Some(ConnWriter { writer_id, tx: writer })
}
pub async fn active_conn_ids(&self) -> Vec<u64> {
let inner = self.inner.read().await;
inner.writer_for_conn.keys().copied().collect()
}
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
let mut inner = self.inner.write().await;
inner.writers.remove(&writer_id);
inner.last_meta_for_writer.remove(&writer_id);
inner.writer_idle_since_epoch_secs.remove(&writer_id);
let conns = inner
.conns_for_writer
.remove(&writer_id)
@@ -246,3 +323,69 @@ impl ConnRegistry {
.unwrap_or(true)
}
}
#[cfg(test)]
mod tests {
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use super::ConnMeta;
use super::ConnRegistry;
#[tokio::test]
async fn writer_activity_snapshot_tracks_writer_and_dc_load() {
let registry = ConnRegistry::new();
let (conn_a, _rx_a) = registry.register().await;
let (conn_b, _rx_b) = registry.register().await;
let (conn_c, _rx_c) = registry.register().await;
let (writer_tx_a, _writer_rx_a) = tokio::sync::mpsc::channel(8);
let (writer_tx_b, _writer_rx_b) = tokio::sync::mpsc::channel(8);
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443);
registry
.bind_writer(
conn_a,
10,
writer_tx_a.clone(),
ConnMeta {
target_dc: 2,
client_addr: addr,
our_addr: addr,
proto_flags: 0,
},
)
.await;
registry
.bind_writer(
conn_b,
10,
writer_tx_a,
ConnMeta {
target_dc: -2,
client_addr: addr,
our_addr: addr,
proto_flags: 0,
},
)
.await;
registry
.bind_writer(
conn_c,
20,
writer_tx_b,
ConnMeta {
target_dc: 4,
client_addr: addr,
our_addr: addr,
proto_flags: 0,
},
)
.await;
let snapshot = registry.writer_activity_snapshot().await;
assert_eq!(snapshot.bound_clients_by_writer.get(&10), Some(&2));
assert_eq!(snapshot.bound_clients_by_writer.get(&20), Some(&1));
assert_eq!(snapshot.active_sessions_by_target_dc.get(&2), Some(&2));
assert_eq!(snapshot.active_sessions_by_target_dc.get(&4), Some(&1));
}
}

View File

@@ -1,19 +1,111 @@
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::watch;
use tracing::{info, warn};
use tokio::sync::{mpsc, watch};
use tracing::{debug, info, warn};
use crate::config::ProxyConfig;
use crate::crypto::SecureRandom;
use super::MePool;
/// Periodically reinitialize ME generations and swap them after full warmup.
pub async fn me_rotation_task(
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MeReinitTrigger {
Periodic,
MapChanged,
}
impl MeReinitTrigger {
fn as_str(self) -> &'static str {
match self {
MeReinitTrigger::Periodic => "periodic",
MeReinitTrigger::MapChanged => "map-change",
}
}
}
pub fn enqueue_reinit_trigger(
tx: &mpsc::Sender<MeReinitTrigger>,
trigger: MeReinitTrigger,
) {
match tx.try_send(trigger) {
Ok(()) => {}
Err(tokio::sync::mpsc::error::TrySendError::Full(_)) => {
debug!(trigger = trigger.as_str(), "ME reinit trigger dropped (queue full)");
}
Err(tokio::sync::mpsc::error::TrySendError::Closed(_)) => {
warn!(trigger = trigger.as_str(), "ME reinit trigger dropped (scheduler closed)");
}
}
}
pub async fn me_reinit_scheduler(
pool: Arc<MePool>,
rng: Arc<SecureRandom>,
config_rx: watch::Receiver<Arc<ProxyConfig>>,
mut trigger_rx: mpsc::Receiver<MeReinitTrigger>,
) {
info!("ME reinit scheduler started");
loop {
let Some(first_trigger) = trigger_rx.recv().await else {
warn!("ME reinit scheduler stopped: trigger channel closed");
break;
};
let mut map_change_seen = matches!(first_trigger, MeReinitTrigger::MapChanged);
let mut periodic_seen = matches!(first_trigger, MeReinitTrigger::Periodic);
let cfg = config_rx.borrow().clone();
let coalesce_window = Duration::from_millis(cfg.general.me_reinit_coalesce_window_ms);
if !coalesce_window.is_zero() {
let deadline = tokio::time::Instant::now() + coalesce_window;
loop {
let now = tokio::time::Instant::now();
if now >= deadline {
break;
}
match tokio::time::timeout(deadline - now, trigger_rx.recv()).await {
Ok(Some(next)) => {
if next == MeReinitTrigger::MapChanged {
map_change_seen = true;
} else {
periodic_seen = true;
}
}
Ok(None) => break,
Err(_) => break,
}
}
}
let reason = if map_change_seen && periodic_seen {
"map-change+periodic"
} else if map_change_seen {
"map-change"
} else {
"periodic"
};
if cfg.general.me_reinit_singleflight {
debug!(reason, "ME reinit scheduled (single-flight)");
pool.zero_downtime_reinit_periodic(rng.as_ref()).await;
} else {
debug!(reason, "ME reinit scheduled (concurrent mode)");
let pool_clone = pool.clone();
let rng_clone = rng.clone();
tokio::spawn(async move {
pool_clone
.zero_downtime_reinit_periodic(rng_clone.as_ref())
.await;
});
}
}
}
/// Periodically enqueue reinitialization triggers for ME generations.
pub async fn me_rotation_task(
mut config_rx: watch::Receiver<Arc<ProxyConfig>>,
reinit_tx: mpsc::Sender<MeReinitTrigger>,
) {
let mut interval_secs = config_rx
.borrow()
@@ -31,7 +123,7 @@ pub async fn me_rotation_task(
tokio::select! {
_ = &mut sleep => {
pool.zero_downtime_reinit_periodic(rng.as_ref()).await;
enqueue_reinit_trigger(&reinit_tx, MeReinitTrigger::Periodic);
let refreshed_secs = config_rx
.borrow()
.general
@@ -70,7 +162,7 @@ pub async fn me_rotation_task(
);
interval_secs = new_secs;
interval = Duration::from_secs(interval_secs);
pool.zero_downtime_reinit_periodic(rng.as_ref()).await;
enqueue_reinit_trigger(&reinit_tx, MeReinitTrigger::Periodic);
next_tick = tokio::time::Instant::now() + interval;
} else {
info!(

View File

@@ -1,21 +1,30 @@
use std::cmp::Reverse;
use std::collections::{HashMap, HashSet};
use std::net::SocketAddr;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::time::Duration;
use std::time::{Duration, Instant};
use tokio::sync::mpsc::error::TrySendError;
use tracing::{debug, warn};
use crate::config::MeRouteNoWriterMode;
use crate::error::{ProxyError, Result};
use crate::network::IpFamily;
use crate::protocol::constants::RPC_CLOSE_EXT_U32;
use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32};
use super::MePool;
use super::codec::WriterCommand;
use super::pool::WriterContour;
use super::wire::build_proxy_req_payload;
use rand::seq::SliceRandom;
use super::registry::ConnMeta;
const IDLE_WRITER_PENALTY_MID_SECS: u64 = 45;
const IDLE_WRITER_PENALTY_HIGH_SECS: u64 = 55;
impl MePool {
/// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default.
pub async fn send_proxy_req(
self: &Arc<Self>,
conn_id: u64,
@@ -24,13 +33,15 @@ impl MePool {
our_addr: SocketAddr,
data: &[u8],
proto_flags: u32,
tag_override: Option<&[u8]>,
) -> Result<()> {
let tag = tag_override.or(self.proxy_tag.as_deref());
let payload = build_proxy_req_payload(
conn_id,
client_addr,
our_addr,
data,
self.proxy_tag.as_deref(),
tag,
proto_flags,
);
let meta = ConnMeta {
@@ -39,19 +50,25 @@ impl MePool {
our_addr,
proto_flags,
};
let mut emergency_attempts = 0;
let no_writer_mode =
MeRouteNoWriterMode::from_u8(self.me_route_no_writer_mode.load(Ordering::Relaxed));
let mut no_writer_deadline: Option<Instant> = None;
let mut emergency_attempts = 0u32;
let mut async_recovery_triggered = false;
loop {
if let Some(current) = self.registry.get_writer(conn_id).await {
let send_res = {
current
.tx
.send(WriterCommand::Data(payload.clone()))
.await
};
match send_res {
match current.tx.try_send(WriterCommand::Data(payload.clone())) {
Ok(()) => return Ok(()),
Err(_) => {
Err(TrySendError::Full(cmd)) => {
if current.tx.send(cmd).await.is_ok() {
return Ok(());
}
warn!(writer_id = current.writer_id, "ME writer channel closed");
self.remove_writer_and_close_clients(current.writer_id).await;
continue;
}
Err(TrySendError::Closed(_)) => {
warn!(writer_id = current.writer_id, "ME writer channel closed");
self.remove_writer_and_close_clients(current.writer_id).await;
continue;
@@ -62,83 +79,200 @@ impl MePool {
let mut writers_snapshot = {
let ws = self.writers.read().await;
if ws.is_empty() {
// Create waiter before recovery attempts so notify_one permits are not missed.
let waiter = self.writer_available.notified();
drop(ws);
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
for (_dc, addrs) in map.iter() {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
if self.connect_one(addr, self.rng.as_ref()).await.is_ok() {
self.writer_available.notify_one();
match no_writer_mode {
MeRouteNoWriterMode::AsyncRecoveryFailfast => {
let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.me_route_no_writer_wait
});
if !async_recovery_triggered {
let triggered =
self.trigger_async_recovery_for_target_dc(target_dc).await;
if !triggered {
self.trigger_async_recovery_global().await;
}
async_recovery_triggered = true;
}
if self.wait_for_writer_until(deadline).await {
continue;
}
self.stats.increment_me_no_writer_failfast_total();
return Err(ProxyError::Proxy(
"No ME writer available in failfast window".into(),
));
}
MeRouteNoWriterMode::InlineRecoveryLegacy => {
self.stats.increment_me_inline_recovery_total();
for _ in 0..self.me_route_inline_recovery_attempts.max(1) {
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
for (_dc, addrs) in &map {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
let _ = self.connect_one(addr, self.rng.as_ref()).await;
}
}
}
if !self.writers.read().await.is_empty() {
break;
}
}
}
}
if !self.writers.read().await.is_empty() {
continue;
}
if tokio::time::timeout(Duration::from_secs(3), waiter).await.is_err() {
if !self.writers.read().await.is_empty() {
if !self.writers.read().await.is_empty() {
continue;
}
let waiter = self.writer_available.notified();
if tokio::time::timeout(self.me_route_inline_recovery_wait, waiter)
.await
.is_err()
{
if !self.writers.read().await.is_empty() {
continue;
}
self.stats.increment_me_no_writer_failfast_total();
return Err(ProxyError::Proxy(
"All ME connections dead (legacy wait timeout)".into(),
));
}
continue;
}
return Err(ProxyError::Proxy("All ME connections dead (waited 3s)".into()));
}
continue;
}
ws.clone()
};
let mut candidate_indices = self.candidate_indices_for_dc(&writers_snapshot, target_dc).await;
let mut candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, false)
.await;
if candidate_indices.is_empty() {
// Emergency connect-on-demand
if emergency_attempts >= 3 {
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
}
emergency_attempts += 1;
for family in self.family_order() {
let map_guard = match family {
IpFamily::V4 => self.proxy_map_v4.read().await,
IpFamily::V6 => self.proxy_map_v6.read().await,
};
if let Some(addrs) = map_guard.get(&(target_dc as i32)) {
let mut shuffled = addrs.clone();
shuffled.shuffle(&mut rand::rng());
drop(map_guard);
for (ip, port) in shuffled {
let addr = SocketAddr::new(ip, port);
if self.connect_one(addr, self.rng.as_ref()).await.is_ok() {
break;
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, true)
.await;
}
if candidate_indices.is_empty() {
match no_writer_mode {
MeRouteNoWriterMode::AsyncRecoveryFailfast => {
let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.me_route_no_writer_wait
});
if !async_recovery_triggered {
let triggered = self.trigger_async_recovery_for_target_dc(target_dc).await;
if !triggered {
self.trigger_async_recovery_global().await;
}
async_recovery_triggered = true;
}
if self.wait_for_candidate_until(target_dc, deadline).await {
continue;
}
self.stats.increment_me_no_writer_failfast_total();
return Err(ProxyError::Proxy(
"No ME writers available for target DC in failfast window".into(),
));
}
MeRouteNoWriterMode::InlineRecoveryLegacy => {
self.stats.increment_me_inline_recovery_total();
if emergency_attempts >= self.me_route_inline_recovery_attempts.max(1) {
self.stats.increment_me_no_writer_failfast_total();
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
}
emergency_attempts += 1;
for family in self.family_order() {
let map_guard = match family {
IpFamily::V4 => self.proxy_map_v4.read().await,
IpFamily::V6 => self.proxy_map_v6.read().await,
};
if let Some(addrs) = map_guard.get(&(target_dc as i32)) {
let mut shuffled = addrs.clone();
shuffled.shuffle(&mut rand::rng());
drop(map_guard);
for (ip, port) in shuffled {
let addr = SocketAddr::new(ip, port);
if self.connect_one(addr, self.rng.as_ref()).await.is_ok() {
break;
}
}
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts as u64)).await;
let ws2 = self.writers.read().await;
writers_snapshot = ws2.clone();
drop(ws2);
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, false)
.await;
if candidate_indices.is_empty() {
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, true)
.await;
}
if !candidate_indices.is_empty() {
break;
}
}
}
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts)).await;
let ws2 = self.writers.read().await;
writers_snapshot = ws2.clone();
drop(ws2);
candidate_indices = self.candidate_indices_for_dc(&writers_snapshot, target_dc).await;
if !candidate_indices.is_empty() {
break;
if candidate_indices.is_empty() {
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
}
}
}
if candidate_indices.is_empty() {
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
}
}
let writer_idle_since = self.registry.writer_idle_since_snapshot().await;
let now_epoch_secs = Self::now_epoch_secs();
if self.me_deterministic_writer_sort.load(Ordering::Relaxed) {
candidate_indices.sort_by(|lhs, rhs| {
let left = &writers_snapshot[*lhs];
let right = &writers_snapshot[*rhs];
let left_key = (
self.writer_contour_rank_for_selection(left),
(left.generation < self.current_generation()) as usize,
left.degraded.load(Ordering::Relaxed) as usize,
self.writer_idle_rank_for_selection(
left,
&writer_idle_since,
now_epoch_secs,
),
Reverse(left.tx.capacity()),
left.addr,
left.id,
);
let right_key = (
self.writer_contour_rank_for_selection(right),
(right.generation < self.current_generation()) as usize,
right.degraded.load(Ordering::Relaxed) as usize,
self.writer_idle_rank_for_selection(
right,
&writer_idle_since,
now_epoch_secs,
),
Reverse(right.tx.capacity()),
right.addr,
right.id,
);
left_key.cmp(&right_key)
});
} else {
candidate_indices.sort_by_key(|idx| {
let w = &writers_snapshot[*idx];
let degraded = w.degraded.load(Ordering::Relaxed);
let stale = (w.generation < self.current_generation()) as usize;
(
self.writer_contour_rank_for_selection(w),
stale,
degraded as usize,
self.writer_idle_rank_for_selection(
w,
&writer_idle_since,
now_epoch_secs,
),
Reverse(w.tx.capacity()),
)
});
}
candidate_indices.sort_by_key(|idx| {
let w = &writers_snapshot[*idx];
let degraded = w.degraded.load(Ordering::Relaxed);
let stale = (w.generation < self.current_generation()) as usize;
(stale, degraded as usize)
});
let start = self.rr.fetch_add(1, Ordering::Relaxed) as usize % candidate_indices.len();
let mut fallback_blocking_idx: Option<usize> = None;
for offset in 0..candidate_indices.len() {
let idx = candidate_indices[(start + offset) % candidate_indices.len()];
@@ -146,29 +280,41 @@ impl MePool {
if !self.writer_accepts_new_binding(w) {
continue;
}
if w.tx.send(WriterCommand::Data(payload.clone())).await.is_ok() {
self.registry
.bind_writer(conn_id, w.id, w.tx.clone(), meta.clone())
.await;
if w.generation < self.current_generation() {
self.stats.increment_pool_stale_pick_total();
debug!(
conn_id,
writer_id = w.id,
writer_generation = w.generation,
current_generation = self.current_generation(),
"Selected stale ME writer for fallback bind"
);
match w.tx.try_send(WriterCommand::Data(payload.clone())) {
Ok(()) => {
self.registry
.bind_writer(conn_id, w.id, w.tx.clone(), meta.clone())
.await;
if w.generation < self.current_generation() {
self.stats.increment_pool_stale_pick_total();
debug!(
conn_id,
writer_id = w.id,
writer_generation = w.generation,
current_generation = self.current_generation(),
"Selected stale ME writer for fallback bind"
);
}
return Ok(());
}
Err(TrySendError::Full(_)) => {
if fallback_blocking_idx.is_none() {
fallback_blocking_idx = Some(idx);
}
}
Err(TrySendError::Closed(_)) => {
warn!(writer_id = w.id, "ME writer channel closed");
self.remove_writer_and_close_clients(w.id).await;
continue;
}
return Ok(());
} else {
warn!(writer_id = w.id, "ME writer channel closed");
self.remove_writer_and_close_clients(w.id).await;
continue;
}
}
let w = writers_snapshot[candidate_indices[start]].clone();
let Some(blocking_idx) = fallback_blocking_idx else {
continue;
};
let w = writers_snapshot[blocking_idx].clone();
if !self.writer_accepts_new_binding(&w) {
continue;
}
@@ -190,6 +336,129 @@ impl MePool {
}
}
async fn wait_for_writer_until(&self, deadline: Instant) -> bool {
let waiter = self.writer_available.notified();
if !self.writers.read().await.is_empty() {
return true;
}
let now = Instant::now();
if now >= deadline {
return !self.writers.read().await.is_empty();
}
let timeout = deadline.saturating_duration_since(now);
if tokio::time::timeout(timeout, waiter).await.is_ok() {
return true;
}
!self.writers.read().await.is_empty()
}
async fn wait_for_candidate_until(&self, target_dc: i16, deadline: Instant) -> bool {
loop {
if self.has_candidate_for_target_dc(target_dc).await {
return true;
}
let now = Instant::now();
if now >= deadline {
return self.has_candidate_for_target_dc(target_dc).await;
}
let remaining = deadline.saturating_duration_since(now);
let sleep_for = remaining.min(Duration::from_millis(25));
let waiter = self.writer_available.notified();
tokio::select! {
_ = waiter => {}
_ = tokio::time::sleep(sleep_for) => {}
}
}
}
async fn has_candidate_for_target_dc(&self, target_dc: i16) -> bool {
let writers_snapshot = {
let ws = self.writers.read().await;
if ws.is_empty() {
return false;
}
ws.clone()
};
let mut candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, false)
.await;
if candidate_indices.is_empty() {
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, target_dc, true)
.await;
}
!candidate_indices.is_empty()
}
async fn trigger_async_recovery_for_target_dc(self: &Arc<Self>, target_dc: i16) -> bool {
let endpoints = self.endpoint_candidates_for_target_dc(target_dc).await;
if endpoints.is_empty() {
return false;
}
self.stats.increment_me_async_recovery_trigger_total();
for addr in endpoints.into_iter().take(8) {
self.trigger_immediate_refill(addr);
}
true
}
async fn trigger_async_recovery_global(self: &Arc<Self>) {
self.stats.increment_me_async_recovery_trigger_total();
let mut seen = HashSet::<SocketAddr>::new();
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
for addrs in map.values() {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
if seen.insert(addr) {
self.trigger_immediate_refill(addr);
}
if seen.len() >= 8 {
return;
}
}
}
}
}
async fn endpoint_candidates_for_target_dc(&self, target_dc: i16) -> Vec<SocketAddr> {
let key = target_dc as i32;
let mut preferred = Vec::<SocketAddr>::new();
let mut seen = HashSet::<SocketAddr>::new();
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
let mut lookup_keys = vec![key, key.abs(), -key.abs()];
let def = self.default_dc.load(Ordering::Relaxed);
if def != 0 {
lookup_keys.push(def);
}
for lookup in lookup_keys {
if let Some(addrs) = map.get(&lookup) {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
if seen.insert(addr) {
preferred.push(addr);
}
}
}
}
if !preferred.is_empty() && !self.decision.effective_multipath {
break;
}
}
preferred
}
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await {
let mut p = Vec::with_capacity(12);
@@ -207,6 +476,37 @@ impl MePool {
Ok(())
}
pub async fn send_close_conn(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await {
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_CLOSE_CONN_U32.to_le_bytes());
p.extend_from_slice(&conn_id.to_le_bytes());
match w.tx.try_send(WriterCommand::DataAndFlush(p)) {
Ok(()) => {}
Err(TrySendError::Full(cmd)) => {
let _ = tokio::time::timeout(Duration::from_millis(50), w.tx.send(cmd)).await;
}
Err(TrySendError::Closed(_)) => {
debug!(conn_id, "ME close_conn skipped: writer channel closed");
}
}
} else {
debug!(conn_id, "ME close_conn skipped (writer missing)");
}
self.registry.unregister(conn_id).await;
Ok(())
}
pub async fn shutdown_send_close_conn_all(self: &Arc<Self>) -> usize {
let conn_ids = self.registry.active_conn_ids().await;
let total = conn_ids.len();
for conn_id in conn_ids {
let _ = self.send_close_conn(conn_id).await;
}
total
}
pub fn connection_count(&self) -> usize {
self.conn_count.load(Ordering::Relaxed)
}
@@ -215,6 +515,7 @@ impl MePool {
&self,
writers: &[super::pool::MeWriter],
target_dc: i16,
include_warm: bool,
) -> Vec<usize> {
let key = target_dc as i32;
let mut preferred = Vec::<SocketAddr>::new();
@@ -258,13 +559,13 @@ impl MePool {
if preferred.is_empty() {
return (0..writers.len())
.filter(|i| self.writer_accepts_new_binding(&writers[*i]))
.filter(|i| self.writer_eligible_for_selection(&writers[*i], include_warm))
.collect();
}
let mut out = Vec::new();
for (idx, w) in writers.iter().enumerate() {
if !self.writer_accepts_new_binding(w) {
if !self.writer_eligible_for_selection(w, include_warm) {
continue;
}
if preferred.contains(&w.addr) {
@@ -273,10 +574,52 @@ impl MePool {
}
if out.is_empty() {
return (0..writers.len())
.filter(|i| self.writer_accepts_new_binding(&writers[*i]))
.filter(|i| self.writer_eligible_for_selection(&writers[*i], include_warm))
.collect();
}
out
}
fn writer_eligible_for_selection(
&self,
writer: &super::pool::MeWriter,
include_warm: bool,
) -> bool {
if !self.writer_accepts_new_binding(writer) {
return false;
}
match WriterContour::from_u8(writer.contour.load(Ordering::Relaxed)) {
WriterContour::Active => true,
WriterContour::Warm => include_warm,
WriterContour::Draining => true,
}
}
fn writer_contour_rank_for_selection(&self, writer: &super::pool::MeWriter) -> usize {
match WriterContour::from_u8(writer.contour.load(Ordering::Relaxed)) {
WriterContour::Active => 0,
WriterContour::Warm => 1,
WriterContour::Draining => 2,
}
}
fn writer_idle_rank_for_selection(
&self,
writer: &super::pool::MeWriter,
idle_since_by_writer: &HashMap<u64, u64>,
now_epoch_secs: u64,
) -> usize {
let Some(idle_since) = idle_since_by_writer.get(&writer.id).copied() else {
return 0;
};
let idle_age_secs = now_epoch_secs.saturating_sub(idle_since);
if idle_age_secs >= IDLE_WRITER_PENALTY_HIGH_SECS {
2
} else if idle_age_secs >= IDLE_WRITER_PENALTY_MID_SECS {
1
} else {
0
}
}
}

View File

@@ -4,7 +4,7 @@
#![allow(deprecated)]
use std::collections::HashMap;
use std::collections::{BTreeSet, HashMap};
use std::net::{SocketAddr, IpAddr};
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
@@ -19,6 +19,7 @@ use crate::config::{UpstreamConfig, UpstreamType};
use crate::error::{Result, ProxyError};
use crate::network::dns_overrides::{resolve_socket_addr, split_host_port};
use crate::protocol::constants::{TG_DATACENTERS_V4, TG_DATACENTERS_V6, TG_DATACENTER_PORT};
use crate::stats::Stats;
use crate::transport::socket::{create_outgoing_socket_bound, resolve_interface_ip};
use crate::transport::socks::{connect_socks4, connect_socks5};
@@ -29,6 +30,12 @@ const NUM_DCS: usize = 5;
const DC_PING_TIMEOUT_SECS: u64 = 5;
/// Timeout for direct TG DC TCP connect readiness.
const DIRECT_CONNECT_TIMEOUT_SECS: u64 = 10;
/// Interval between upstream health-check cycles.
const HEALTH_CHECK_INTERVAL_SECS: u64 = 30;
/// Timeout for a single health-check connect attempt.
const HEALTH_CHECK_CONNECT_TIMEOUT_SECS: u64 = 10;
/// Upstream is considered healthy when at least this many DC groups are reachable.
const MIN_HEALTHY_DC_GROUPS: usize = 3;
// ============= RTT Tracking =============
@@ -158,6 +165,43 @@ pub enum UpstreamRouteKind {
Socks5,
}
#[derive(Debug, Clone)]
pub struct UpstreamApiDcSnapshot {
pub dc: i16,
pub latency_ema_ms: Option<f64>,
pub ip_preference: IpPreference,
}
#[derive(Debug, Clone)]
pub struct UpstreamApiItemSnapshot {
pub upstream_id: usize,
pub route_kind: UpstreamRouteKind,
pub address: String,
pub weight: u16,
pub scopes: String,
pub healthy: bool,
pub fails: u32,
pub last_check_age_secs: u64,
pub effective_latency_ms: Option<f64>,
pub dc: Vec<UpstreamApiDcSnapshot>,
}
#[derive(Debug, Clone, Default)]
pub struct UpstreamApiSummarySnapshot {
pub configured_total: usize,
pub healthy_total: usize,
pub unhealthy_total: usize,
pub direct_total: usize,
pub socks4_total: usize,
pub socks5_total: usize,
}
#[derive(Debug, Clone)]
pub struct UpstreamApiSnapshot {
pub summary: UpstreamApiSummarySnapshot,
pub upstreams: Vec<UpstreamApiItemSnapshot>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct UpstreamEgressInfo {
pub route_kind: UpstreamRouteKind,
@@ -167,6 +211,13 @@ pub struct UpstreamEgressInfo {
pub socks_proxy_addr: Option<SocketAddr>,
}
#[derive(Debug, Clone)]
struct HealthCheckGroup {
dc_idx: i16,
primary: Vec<SocketAddr>,
fallback: Vec<SocketAddr>,
}
// ============= Upstream Manager =============
#[derive(Clone)]
@@ -175,6 +226,8 @@ pub struct UpstreamManager {
connect_retry_attempts: u32,
connect_retry_backoff: Duration,
unhealthy_fail_threshold: u32,
connect_failfast_hard_errors: bool,
stats: Arc<Stats>,
}
impl UpstreamManager {
@@ -183,6 +236,8 @@ impl UpstreamManager {
connect_retry_attempts: u32,
connect_retry_backoff_ms: u64,
unhealthy_fail_threshold: u32,
connect_failfast_hard_errors: bool,
stats: Arc<Stats>,
) -> Self {
let states = configs.into_iter()
.filter(|c| c.enabled)
@@ -194,9 +249,69 @@ impl UpstreamManager {
connect_retry_attempts: connect_retry_attempts.max(1),
connect_retry_backoff: Duration::from_millis(connect_retry_backoff_ms),
unhealthy_fail_threshold: unhealthy_fail_threshold.max(1),
connect_failfast_hard_errors,
stats,
}
}
pub fn try_api_snapshot(&self) -> Option<UpstreamApiSnapshot> {
let guard = self.upstreams.try_read().ok()?;
let now = std::time::Instant::now();
let mut summary = UpstreamApiSummarySnapshot {
configured_total: guard.len(),
..UpstreamApiSummarySnapshot::default()
};
let mut upstreams = Vec::with_capacity(guard.len());
for (idx, upstream) in guard.iter().enumerate() {
if upstream.healthy {
summary.healthy_total += 1;
} else {
summary.unhealthy_total += 1;
}
let (route_kind, address) = match &upstream.config.upstream_type {
UpstreamType::Direct { .. } => {
summary.direct_total += 1;
(UpstreamRouteKind::Direct, "direct".to_string())
}
UpstreamType::Socks4 { address, .. } => {
summary.socks4_total += 1;
(UpstreamRouteKind::Socks4, address.clone())
}
UpstreamType::Socks5 { address, .. } => {
summary.socks5_total += 1;
(UpstreamRouteKind::Socks5, address.clone())
}
};
let mut dc = Vec::with_capacity(NUM_DCS);
for dc_idx in 0..NUM_DCS {
dc.push(UpstreamApiDcSnapshot {
dc: (dc_idx + 1) as i16,
latency_ema_ms: upstream.dc_latency[dc_idx].get(),
ip_preference: upstream.dc_ip_pref[dc_idx],
});
}
upstreams.push(UpstreamApiItemSnapshot {
upstream_id: idx,
route_kind,
address,
weight: upstream.config.weight,
scopes: upstream.config.scopes.clone(),
healthy: upstream.healthy,
fails: upstream.fails,
last_check_age_secs: now.saturating_duration_since(upstream.last_check).as_secs(),
effective_latency_ms: upstream.effective_latency(None),
dc,
});
}
Some(UpstreamApiSnapshot { summary, upstreams })
}
#[cfg(unix)]
fn resolve_interface_addrs(name: &str, want_ipv6: bool) -> Vec<IpAddr> {
use nix::ifaddrs::getifaddrs;
@@ -336,6 +451,34 @@ impl UpstreamManager {
}
}
fn retry_backoff_with_jitter(&self) -> Duration {
if self.connect_retry_backoff.is_zero() {
return Duration::ZERO;
}
let base_ms = self.connect_retry_backoff.as_millis() as u64;
if base_ms == 0 {
return self.connect_retry_backoff;
}
let jitter_cap_ms = (base_ms / 2).max(1);
let jitter_ms = rand::rng().gen_range(0..=jitter_cap_ms);
Duration::from_millis(base_ms.saturating_add(jitter_ms))
}
fn is_hard_connect_error(error: &ProxyError) -> bool {
match error {
ProxyError::Config(_) | ProxyError::ConnectionRefused { .. } => true,
ProxyError::Io(ioe) => matches!(
ioe.kind(),
std::io::ErrorKind::ConnectionRefused
| std::io::ErrorKind::AddrInUse
| std::io::ErrorKind::AddrNotAvailable
| std::io::ErrorKind::InvalidInput
| std::io::ErrorKind::Unsupported
),
_ => false,
}
}
/// Select upstream using latency-weighted random selection.
async fn select_upstream(&self, dc_idx: Option<i16>, scope: Option<&str>) -> Option<usize> {
let upstreams = self.upstreams.read().await;
@@ -446,8 +589,12 @@ impl UpstreamManager {
guard.get(idx).map(|u| u.bind_rr.clone())
};
let connect_started_at = Instant::now();
let mut last_error: Option<ProxyError> = None;
let mut attempts_used = 0u32;
for attempt in 1..=self.connect_retry_attempts {
attempts_used = attempt;
self.stats.increment_upstream_connect_attempt_total();
let start = Instant::now();
match self
.connect_via_upstream(&upstream, target, bind_rr.clone())
@@ -455,6 +602,13 @@ impl UpstreamManager {
{
Ok((stream, egress)) => {
let rtt_ms = start.elapsed().as_secs_f64() * 1000.0;
self.stats.increment_upstream_connect_success_total();
self.stats
.observe_upstream_connect_attempts_per_request(attempts_used);
self.stats.observe_upstream_connect_duration_ms(
connect_started_at.elapsed().as_millis() as u64,
true,
);
let mut guard = self.upstreams.write().await;
if let Some(u) = guard.get_mut(idx) {
if !u.healthy {
@@ -478,7 +632,13 @@ impl UpstreamManager {
return Ok((stream, egress));
}
Err(e) => {
if attempt < self.connect_retry_attempts {
let hard_error =
self.connect_failfast_hard_errors && Self::is_hard_connect_error(&e);
if hard_error {
self.stats
.increment_upstream_connect_failfast_hard_error_total();
}
if attempt < self.connect_retry_attempts && !hard_error {
debug!(
attempt,
attempts = self.connect_retry_attempts,
@@ -486,21 +646,43 @@ impl UpstreamManager {
error = %e,
"Upstream connect attempt failed, retrying"
);
if !self.connect_retry_backoff.is_zero() {
tokio::time::sleep(self.connect_retry_backoff).await;
let backoff = self.retry_backoff_with_jitter();
if !backoff.is_zero() {
tokio::time::sleep(backoff).await;
}
} else if hard_error {
debug!(
attempt,
attempts = self.connect_retry_attempts,
target = %target,
error = %e,
"Upstream connect failed with hard error, failfast is active"
);
}
last_error = Some(e);
if hard_error {
break;
}
}
}
}
self.stats.increment_upstream_connect_fail_total();
self.stats
.observe_upstream_connect_attempts_per_request(attempts_used);
self.stats.observe_upstream_connect_duration_ms(
connect_started_at.elapsed().as_millis() as u64,
false,
);
let error = last_error.unwrap_or_else(|| {
ProxyError::Config("Upstream connect attempts exhausted".to_string())
});
let mut guard = self.upstreams.write().await;
if let Some(u) = guard.get_mut(idx) {
// Intermediate attempts are intentionally ignored here.
// Health state is degraded only when the entire connect cycle fails.
u.fails += 1;
warn!(
fails = u.fails,
@@ -987,41 +1169,144 @@ impl UpstreamManager {
Ok(start.elapsed().as_secs_f64() * 1000.0)
}
fn required_healthy_group_count(total_groups: usize) -> usize {
if total_groups == 0 {
0
} else {
total_groups.min(MIN_HEALTHY_DC_GROUPS)
}
}
fn build_health_check_groups(
prefer_ipv6: bool,
ipv4_enabled: bool,
ipv6_enabled: bool,
dc_overrides: &HashMap<String, Vec<String>>,
) -> Vec<HealthCheckGroup> {
let mut v4_by_dc: HashMap<i16, Vec<SocketAddr>> = HashMap::new();
let mut v6_by_dc: HashMap<i16, Vec<SocketAddr>> = HashMap::new();
if ipv4_enabled {
for (idx, dc_ip) in TG_DATACENTERS_V4.iter().enumerate() {
let dc_idx = (idx + 1) as i16;
v4_by_dc
.entry(dc_idx)
.or_default()
.push(SocketAddr::new(*dc_ip, TG_DATACENTER_PORT));
}
}
if ipv6_enabled {
for (idx, dc_ip) in TG_DATACENTERS_V6.iter().enumerate() {
let dc_idx = (idx + 1) as i16;
v6_by_dc
.entry(dc_idx)
.or_default()
.push(SocketAddr::new(*dc_ip, TG_DATACENTER_PORT));
}
}
for (dc_key, addrs) in dc_overrides {
let dc_idx = match dc_key.parse::<i16>() {
Ok(v) if v > 0 => v,
_ => {
warn!(dc = %dc_key, "Invalid dc_overrides key for health-check, skipping");
continue;
}
};
for addr_str in addrs {
match addr_str.parse::<SocketAddr>() {
Ok(addr) if addr.is_ipv6() => {
if ipv6_enabled {
v6_by_dc.entry(dc_idx).or_default().push(addr);
}
}
Ok(addr) => {
if ipv4_enabled {
v4_by_dc.entry(dc_idx).or_default().push(addr);
}
}
Err(_) => {
warn!(
dc = %dc_idx,
addr = %addr_str,
"Invalid dc_overrides address for health-check, skipping"
);
}
}
}
}
for addrs in v4_by_dc.values_mut() {
addrs.sort_unstable();
addrs.dedup();
}
for addrs in v6_by_dc.values_mut() {
addrs.sort_unstable();
addrs.dedup();
}
let mut all_dcs = BTreeSet::new();
all_dcs.extend(v4_by_dc.keys().copied());
all_dcs.extend(v6_by_dc.keys().copied());
let mut groups = Vec::with_capacity(all_dcs.len());
for dc_idx in all_dcs {
let v4_endpoints = v4_by_dc.remove(&dc_idx).unwrap_or_default();
let v6_endpoints = v6_by_dc.remove(&dc_idx).unwrap_or_default();
let (primary, fallback) = if prefer_ipv6 {
(v6_endpoints, v4_endpoints)
} else {
(v4_endpoints, v6_endpoints)
};
if primary.is_empty() && fallback.is_empty() {
continue;
}
groups.push(HealthCheckGroup {
dc_idx,
primary,
fallback,
});
}
groups
}
// ============= Health Checks =============
/// Background health check: rotates through DCs, 30s interval.
/// Uses preferred IP version based on config.
pub async fn run_health_checks(&self, prefer_ipv6: bool, ipv4_enabled: bool, ipv6_enabled: bool) {
let mut dc_rotation = 0usize;
/// Background health check based on reachable DC groups through each upstream.
/// Upstream stays healthy while at least `MIN_HEALTHY_DC_GROUPS` groups are reachable.
pub async fn run_health_checks(
&self,
prefer_ipv6: bool,
ipv4_enabled: bool,
ipv6_enabled: bool,
dc_overrides: HashMap<String, Vec<String>>,
) {
let groups = Self::build_health_check_groups(
prefer_ipv6,
ipv4_enabled,
ipv6_enabled,
&dc_overrides,
);
let required_healthy_groups = Self::required_healthy_group_count(groups.len());
let mut endpoint_rotation: HashMap<(usize, i16, bool), usize> = HashMap::new();
if groups.is_empty() {
warn!("No DC groups available for upstream health-checks");
}
loop {
tokio::time::sleep(Duration::from_secs(30)).await;
tokio::time::sleep(Duration::from_secs(HEALTH_CHECK_INTERVAL_SECS)).await;
let dc_zero_idx = dc_rotation % NUM_DCS;
dc_rotation += 1;
let primary_v6 = SocketAddr::new(TG_DATACENTERS_V6[dc_zero_idx], TG_DATACENTER_PORT);
let primary_v4 = SocketAddr::new(TG_DATACENTERS_V4[dc_zero_idx], TG_DATACENTER_PORT);
let dc_addr = if prefer_ipv6 && ipv6_enabled {
primary_v6
} else if ipv4_enabled {
primary_v4
} else if ipv6_enabled {
primary_v6
} else {
if groups.is_empty() || required_healthy_groups == 0 {
continue;
};
let fallback_addr = if dc_addr.is_ipv6() && ipv4_enabled {
Some(primary_v4)
} else if dc_addr.is_ipv4() && ipv6_enabled {
Some(primary_v6)
} else {
None
};
}
let count = self.upstreams.read().await.len();
for i in 0..count {
let (config, bind_rr) = {
let guard = self.upstreams.read().await;
@@ -1029,104 +1314,123 @@ impl UpstreamManager {
(u.config.clone(), u.bind_rr.clone())
};
let start = Instant::now();
let result = tokio::time::timeout(
Duration::from_secs(10),
self.connect_via_upstream(&config, dc_addr, Some(bind_rr.clone()))
).await;
let mut healthy_groups = 0usize;
let mut latency_updates: Vec<(usize, f64)> = Vec::new();
match result {
Ok(Ok(_stream)) => {
let rtt_ms = start.elapsed().as_secs_f64() * 1000.0;
let mut guard = self.upstreams.write().await;
let u = &mut guard[i];
u.dc_latency[dc_zero_idx].update(rtt_ms);
for group in &groups {
let mut group_ok = false;
let mut group_rtt_ms = None;
if !u.healthy {
info!(
rtt = format!("{:.0} ms", rtt_ms),
dc = dc_zero_idx + 1,
"Upstream recovered"
);
}
u.healthy = true;
u.fails = 0;
u.last_check = std::time::Instant::now();
}
Ok(Err(_)) | Err(_) => {
// Try fallback
debug!(dc = dc_zero_idx + 1, "Health check failed, trying fallback");
if let Some(fallback_addr) = fallback_addr {
let start2 = Instant::now();
let result2 = tokio::time::timeout(
Duration::from_secs(10),
self.connect_via_upstream(&config, fallback_addr, Some(bind_rr.clone()))
).await;
let mut guard = self.upstreams.write().await;
let u = &mut guard[i];
match result2 {
Ok(Ok(_stream)) => {
let rtt_ms = start2.elapsed().as_secs_f64() * 1000.0;
u.dc_latency[dc_zero_idx].update(rtt_ms);
if !u.healthy {
info!(
rtt = format!("{:.0} ms", rtt_ms),
dc = dc_zero_idx + 1,
"Upstream recovered (fallback)"
);
}
u.healthy = true;
u.fails = 0;
}
Ok(Err(e)) => {
u.fails += 1;
debug!(dc = dc_zero_idx + 1, fails = u.fails,
"Health check failed (both): {}", e);
if u.fails >= self.unhealthy_fail_threshold {
u.healthy = false;
warn!(
fails = u.fails,
threshold = self.unhealthy_fail_threshold,
"Upstream unhealthy (fails)"
);
}
}
Err(_) => {
u.fails += 1;
debug!(dc = dc_zero_idx + 1, fails = u.fails,
"Health check timeout (both)");
if u.fails >= self.unhealthy_fail_threshold {
u.healthy = false;
warn!(
fails = u.fails,
threshold = self.unhealthy_fail_threshold,
"Upstream unhealthy (timeout)"
);
}
}
}
u.last_check = std::time::Instant::now();
for (is_primary, endpoints) in [(true, &group.primary), (false, &group.fallback)] {
if endpoints.is_empty() {
continue;
}
let mut guard = self.upstreams.write().await;
let u = &mut guard[i];
u.fails += 1;
if u.fails >= self.unhealthy_fail_threshold {
u.healthy = false;
warn!(
fails = u.fails,
threshold = self.unhealthy_fail_threshold,
"Upstream unhealthy (no fallback family)"
);
let rotation_key = (i, group.dc_idx, is_primary);
let start_idx = *endpoint_rotation.entry(rotation_key).or_insert(0) % endpoints.len();
let mut next_idx = (start_idx + 1) % endpoints.len();
for step in 0..endpoints.len() {
let endpoint_idx = (start_idx + step) % endpoints.len();
let endpoint = endpoints[endpoint_idx];
let start = Instant::now();
let result = tokio::time::timeout(
Duration::from_secs(HEALTH_CHECK_CONNECT_TIMEOUT_SECS),
self.connect_via_upstream(&config, endpoint, Some(bind_rr.clone())),
)
.await;
match result {
Ok(Ok(_stream)) => {
group_ok = true;
group_rtt_ms = Some(start.elapsed().as_secs_f64() * 1000.0);
next_idx = (endpoint_idx + 1) % endpoints.len();
break;
}
Ok(Err(e)) => {
debug!(
upstream = i,
dc = group.dc_idx,
endpoint = %endpoint,
primary = is_primary,
error = %e,
"Health-check endpoint failed"
);
}
Err(_) => {
debug!(
upstream = i,
dc = group.dc_idx,
endpoint = %endpoint,
primary = is_primary,
"Health-check endpoint timed out"
);
}
}
}
endpoint_rotation.insert(rotation_key, next_idx);
if group_ok {
break;
}
}
if group_ok {
healthy_groups += 1;
if let (Some(dc_array_idx), Some(rtt_ms)) =
(UpstreamState::dc_array_idx(group.dc_idx), group_rtt_ms)
{
latency_updates.push((dc_array_idx, rtt_ms));
}
u.last_check = std::time::Instant::now();
}
}
let mut guard = self.upstreams.write().await;
let u = &mut guard[i];
for (dc_array_idx, rtt_ms) in latency_updates {
u.dc_latency[dc_array_idx].update(rtt_ms);
}
if healthy_groups >= required_healthy_groups {
if !u.healthy {
info!(
upstream = i,
healthy_groups,
total_groups = groups.len(),
required_groups = required_healthy_groups,
"Upstream recovered by DC-group health threshold"
);
}
u.healthy = true;
u.fails = 0;
} else {
u.fails += 1;
debug!(
upstream = i,
healthy_groups,
total_groups = groups.len(),
required_groups = required_healthy_groups,
fails = u.fails,
"Upstream health-check below DC-group threshold"
);
if u.fails >= self.unhealthy_fail_threshold {
u.healthy = false;
warn!(
upstream = i,
healthy_groups,
total_groups = groups.len(),
required_groups = required_healthy_groups,
fails = u.fails,
threshold = self.unhealthy_fail_threshold,
"Upstream unhealthy (insufficient reachable DC groups)"
);
}
}
u.last_check = std::time::Instant::now();
}
}
}
@@ -1157,3 +1461,92 @@ impl UpstreamManager {
Some(SocketAddr::new(ip, TG_DATACENTER_PORT))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn required_healthy_group_count_applies_three_group_threshold() {
assert_eq!(UpstreamManager::required_healthy_group_count(0), 0);
assert_eq!(UpstreamManager::required_healthy_group_count(1), 1);
assert_eq!(UpstreamManager::required_healthy_group_count(2), 2);
assert_eq!(UpstreamManager::required_healthy_group_count(3), 3);
assert_eq!(UpstreamManager::required_healthy_group_count(5), 3);
}
#[test]
fn build_health_check_groups_merges_family_endpoints_with_preference() {
let mut overrides = HashMap::new();
overrides.insert(
"2".to_string(),
vec![
"203.0.113.10:443".to_string(),
"203.0.113.11:443".to_string(),
"[2001:db8::10]:443".to_string(),
],
);
let groups = UpstreamManager::build_health_check_groups(true, true, true, &overrides);
let dc2 = groups
.iter()
.find(|g| g.dc_idx == 2)
.expect("dc2 must be present");
assert!(dc2.primary.iter().all(|addr| addr.is_ipv6()));
assert!(dc2.fallback.iter().all(|addr| addr.is_ipv4()));
assert!(dc2
.primary
.contains(&"[2001:db8::10]:443".parse::<SocketAddr>().unwrap()));
assert!(dc2
.fallback
.contains(&"203.0.113.10:443".parse::<SocketAddr>().unwrap()));
assert!(dc2
.fallback
.contains(&"203.0.113.11:443".parse::<SocketAddr>().unwrap()));
}
#[test]
fn build_health_check_groups_keeps_multiple_endpoints_per_group() {
let mut overrides = HashMap::new();
overrides.insert(
"9".to_string(),
vec![
"198.51.100.1:443".to_string(),
"198.51.100.2:443".to_string(),
"198.51.100.1:443".to_string(),
],
);
let groups = UpstreamManager::build_health_check_groups(false, true, false, &overrides);
let dc9 = groups
.iter()
.find(|g| g.dc_idx == 9)
.expect("override-only dc group must be present");
assert_eq!(dc9.primary.len(), 2);
assert!(dc9
.primary
.contains(&"198.51.100.1:443".parse::<SocketAddr>().unwrap()));
assert!(dc9
.primary
.contains(&"198.51.100.2:443".parse::<SocketAddr>().unwrap()));
assert!(dc9.fallback.is_empty());
}
#[test]
fn hard_connect_error_classification_detects_connection_refused() {
let error = ProxyError::ConnectionRefused {
addr: "127.0.0.1:443".to_string(),
};
assert!(UpstreamManager::is_hard_connect_error(&error));
}
#[test]
fn hard_connect_error_classification_skips_timeouts() {
let error = ProxyError::ConnectionTimeout {
addr: "127.0.0.1:443".to_string(),
};
assert!(!UpstreamManager::is_hard_connect_error(&error));
}
}

View File

@@ -47,6 +47,54 @@ zabbix_export:
tags:
- tag: Application
value: 'Server connections'
- uuid: 2af8ff0f27e4408db3f9798dc3141457
name: 'Full forensic desync logs emitted'
type: DEPENDENT
key: telemt.desync_full_logged_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_desync_full_logged_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: f4439948a49f4b1d85c3eeee963259bc
name: 'Suppressed desync forensic events'
type: DEPENDENT
key: telemt.desync_suppressed_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_desync_suppressed_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 721627b8c10a414a82be1e08873604c1
name: 'Total crypto-desync detections'
type: DEPENDENT
key: telemt.desync_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_desync_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 1618272cf68e44509425f5fab029db7b
name: 'Handshake timeouts total'
type: DEPENDENT
@@ -64,6 +112,152 @@ zabbix_export:
tags:
- tag: Application
value: 'Server connections'
- uuid: 4e5c0d10a4494c959445b4cd7a2e696e
name: 'ME CRC mismatches'
type: DEPENDENT
key: telemt.me_crc_mismatch_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_crc_mismatch_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Middle-End connections'
- uuid: 21a4a48b6e98457d87c56c3ae7b56c55
name: 'ME endpoint quarantines due to rapid flaps'
type: DEPENDENT
key: telemt.me_endpoint_quarantine_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_endpoint_quarantine_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: c8ffc30dc3d94a6d9085ac79413fbdd6
name: 'Runtime ME writer floor policy mode'
type: DEPENDENT
key: telemt.me_floor_mode
delay: '0'
value_type: TEXT
trends: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'telemt_me_floor_mode == 1'
- label
- mode
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 4814b52d5d184f63b64654e7635bdf6a
name: 'ME handshake rejects from upstream'
type: DEPENDENT
key: telemt.me_handshake_reject_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_handshake_reject_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 72d11caecefb4472b6c3e07f1ee90053
name: 'Hardswap cycles that reused an existing pending generation'
type: DEPENDENT
key: telemt.me_hardswap_pending_reuse_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_hardswap_pending_reuse_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 447030854e8840a393874f54e25861d5
name: 'Pending hardswap generations reset by TTL expiration'
type: DEPENDENT
key: telemt.me_hardswap_pending_ttl_expired_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_hardswap_pending_ttl_expired_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 47f55dd7d9394405b1c0eba6e6eb3e5c
name: 'ME idle writers closed by peer'
type: DEPENDENT
key: telemt.me_idle_close_by_peer_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_idle_close_by_peer_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 9e4598efbfe246fab9360270002b0cfa
name: 'ME KDF input drift detections'
type: DEPENDENT
key: telemt.me_kdf_drift_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_kdf_drift_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 565cc9780c5541bfb7acbb1f4973b5fc
name: 'ME KDF client-port changes with stable non-port material'
type: DEPENDENT
key: telemt.me_kdf_port_only_drift_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_kdf_port_only_drift_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: fb95391c7f894e3eb6984b92885813d2
name: 'ME keepalive send failures'
type: DEPENDENT
@@ -81,6 +275,22 @@ zabbix_export:
tags:
- tag: Application
value: 'Middle-End connections'
- uuid: 7b5995401195430e9f9e02e5dd8c3313
name: 'ME keepalive pong replies'
type: DEPENDENT
key: telemt.me_keepalive_pong_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_keepalive_pong_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Middle-End connections'
- uuid: fb95391c7f894e3eb6984b92885813c2
name: 'ME keepalive frames sent'
type: DEPENDENT
@@ -98,6 +308,38 @@ zabbix_export:
tags:
- tag: Application
value: 'Middle-End connections'
- uuid: da5af5fd691d4f40bc6cad78b4758eac
name: 'ME keepalive ping timeouts'
type: DEPENDENT
key: telemt.me_keepalive_timeout_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_keepalive_timeout_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Middle-End connections'
- uuid: 50b45e494d584a7b86fca8b80c727411
name: 'ME reader EOF terminations'
type: DEPENDENT
key: telemt.me_reader_eof_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_reader_eof_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: fb95391c7f894e3eb6984b92885811a2
name: 'ME reconnect attempts'
type: DEPENDENT
@@ -132,6 +374,470 @@ zabbix_export:
tags:
- tag: Application
value: 'Middle-End connections'
- uuid: 6288b537b7964aadb8a483abd716855a
name: 'Immediate ME refill failures'
type: DEPENDENT
key: telemt.me_refill_failed_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_refill_failed_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 8450bdb48f9b4505beb8fdfc665b37c5
name: 'Immediate ME refill skips due to inflight dedup'
type: DEPENDENT
key: telemt.me_refill_skipped_inflight_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_refill_skipped_inflight_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: cb192264c03a40578140863970333515
name: 'Immediate ME refill runs started'
type: DEPENDENT
key: telemt.me_refill_triggered_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_refill_triggered_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 8f46b374332848fba0daba72e17eaad0
name: 'ME route drops: channel closed'
type: DEPENDENT
key: telemt.me_route_drop_channel_closed_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_route_drop_channel_closed_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Middle-End connections'
- uuid: de5fa7a316554d099bcf5e000b33bfed
name: 'ME route drops: no conn'
type: DEPENDENT
key: telemt.me_route_drop_no_conn_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_route_drop_no_conn_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Middle-End connections'
- uuid: d9e1630ce38946f7a8d179187793f12c
name: 'ME route drops: queue full by adaptive profile'
type: DEPENDENT
key: telemt.me_route_drop_queue_full_profile_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'telemt_me_route_drop_queue_full_profile_total == 1'
- label
- profile
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: d5caefb8978e4f3eac4dcdecd4655c46
name: 'ME route drops: queue full'
type: DEPENDENT
key: telemt.me_route_drop_queue_full_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_route_drop_queue_full_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: f682298c2dfc46dda45771a58faa9ffa
name: 'Service RPC_CLOSE_EXT sent after activity signals'
type: DEPENDENT
key: telemt.me_rpc_proxy_req_signal_close_sent_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_rpc_proxy_req_signal_close_sent_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 5db4bdc93959473eade9281c221e34b6
name: 'Service RPC_PROXY_REQ activity signal failures'
type: DEPENDENT
key: telemt.me_rpc_proxy_req_signal_failed_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_rpc_proxy_req_signal_failed_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 4e75611bc3854415b63a1863e9bf176f
name: 'Service RPC_PROXY_REQ responses observed'
type: DEPENDENT
key: telemt.me_rpc_proxy_req_signal_response_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_rpc_proxy_req_signal_response_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: ecbffb29f2784839bea0ce2a38393438
name: 'Service RPC_PROXY_REQ activity signals sent'
type: DEPENDENT
key: telemt.me_rpc_proxy_req_signal_sent_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_rpc_proxy_req_signal_sent_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 078eff3deeec435597f0c531457bb906
name: 'Service RPC_PROXY_REQ skipped due to missing writer metadata'
type: DEPENDENT
key: telemt.me_rpc_proxy_req_signal_skipped_no_meta_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_rpc_proxy_req_signal_skipped_no_meta_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 7429ffbd94a340d7a600bc1690eb57e7
name: 'ME sequence mismatches'
type: DEPENDENT
key: telemt.me_seq_mismatch_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_seq_mismatch_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 0f1f77ae34df4a48b36ad263359b5ad3
name: 'Single-endpoint DC outage transitions to active state'
type: DEPENDENT
key: telemt.me_single_endpoint_outage_enter_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_single_endpoint_outage_enter_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 63d44ef672ff4df288914eb98f6fa72c
name: 'Single-endpoint DC outage recovery transitions'
type: DEPENDENT
key: telemt.me_single_endpoint_outage_exit_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_single_endpoint_outage_exit_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 1b72ff95f1ba4fb2924aa3a129b22f4d
name: 'Reconnect attempts performed during single-endpoint outages'
type: DEPENDENT
key: telemt.me_single_endpoint_outage_reconnect_attempt_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_single_endpoint_outage_reconnect_attempt_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 466bb352d55946a0bb78efc63e1ed71e
name: 'Successful reconnect attempts during single-endpoint outages'
type: DEPENDENT
key: telemt.me_single_endpoint_outage_reconnect_success_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_single_endpoint_outage_reconnect_success_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 295b4a519a4d46f7b1ddbdf5b5268751
name: 'Outage reconnect attempts that bypassed quarantine'
type: DEPENDENT
key: telemt.me_single_endpoint_quarantine_bypass_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_single_endpoint_quarantine_bypass_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: bffa4861f83f4445bb0b2259e100e04c
name: 'Shadow rotations skipped because endpoint is quarantined'
type: DEPENDENT
key: telemt.me_single_endpoint_shadow_rotate_skipped_quarantine_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_single_endpoint_shadow_rotate_skipped_quarantine_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: f80ce02b50824f8ea0ddabac9ff97757
name: 'Successful periodic shadow rotations for single-endpoint DC groups'
type: DEPENDENT
key: telemt.me_single_endpoint_shadow_rotate_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_single_endpoint_shadow_rotate_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: bf2a0ff89c314f78904aa43351601111
name: 'Total ME writer removals'
type: DEPENDENT
key: telemt.me_writer_removed_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_writer_removed_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 0d12ea02187745eba55498dfb16daa5c
name: 'Unexpected writer removals not yet compensated by restore'
type: DEPENDENT
key: telemt.me_writer_removed_unexpected_minus_restored_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_writer_removed_unexpected_minus_restored_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 644278e7f87947e1a49483ba4487e32b
name: 'Unexpected ME writer removals that triggered refill'
type: DEPENDENT
key: telemt.me_writer_removed_unexpected_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_writer_removed_unexpected_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: a6c24dfc85d643dab1c81fc1e63fe3cc
name: 'Refilled ME writer restored via fallback endpoint'
type: DEPENDENT
key: telemt.me_writer_restored_fallback_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_writer_restored_fallback_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: d7d0a78ca6da4bb9b4a0991fd83149cf
name: 'Refilled ME writer restored on the same endpoint'
type: DEPENDENT
key: telemt.me_writer_restored_same_endpoint_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_me_writer_restored_same_endpoint_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: beb906ab89564cf9adfbb7b1d4553c44
name: 'Active draining ME writers'
type: DEPENDENT
key: telemt.pool_drain_active
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_pool_drain_active
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 2f0926e00d7a4e5aa1783cb33b1192ea
name: 'Forced close events for draining writers'
type: DEPENDENT
key: telemt.pool_force_close_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_pool_force_close_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 70d0b4da6079435ebe978e99bda8f1d3
name: 'Stale writer fallback picks for new binds'
type: DEPENDENT
key: telemt.pool_stale_pick_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_pool_stale_pick_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 8a1d240b9b554905a8add9bf730bf1f4
name: 'Successful ME pool swaps'
type: DEPENDENT
key: telemt.pool_swap_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_pool_swap_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 991b1858e3f94b3098ff0f84859efc41
name: 'Prometheus metrics'
type: HTTP_AGENT
@@ -139,11 +845,158 @@ zabbix_export:
value_type: TEXT
trends: '0'
url: '{$TELEMT_URL}'
- uuid: cef2547bb9464d10b11b6c19beac089d
name: 'Invalid secure frame lengths'
type: DEPENDENT
key: telemt.secure_padding_invalid_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_secure_padding_invalid_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: c164d7b59bdc4429a23b908558de8cf4
name: 'Runtime core telemetry switch'
type: DEPENDENT
key: telemt.telemetry_core_enabled
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_telemetry_core_enabled
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: ff16438417d842178d26033d13520833
name: 'Runtime ME telemetry level flag'
type: DEPENDENT
key: telemt.telemetry_me_level
delay: '0'
value_type: TEXT
trends: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'telemt_telemetry_me_level == 1'
- label
- level
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 9fec0bb7c3c84ada96668b74d5849556
name: 'Runtime per-user telemetry switch'
type: DEPENDENT
key: telemt.telemetry_user_enabled
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_telemetry_user_enabled
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 378b765aa7bc4a4ea87d3bc876c50d12
name: 'User-labeled metric series suppression flag'
type: DEPENDENT
key: telemt.telemetry_user_series_suppressed
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_telemetry_user_series_suppressed
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 17972d992fa84fc1b53fdefed123ccd8
name: 'Upstream connect attempts across all requests'
type: DEPENDENT
key: telemt.upstream_connect_attempt_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_upstream_connect_attempt_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 38627dd1cb7145e180d111bdee1d2c23
name: 'Hard errors that triggered upstream connect failfast'
type: DEPENDENT
key: telemt.upstream_connect_failfast_hard_error_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_upstream_connect_failfast_hard_error_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 0ffd4c35b6734c83bd77c59f30bf3246
name: 'Failed upstream connect request cycles'
type: DEPENDENT
key: telemt.upstream_connect_fail_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_upstream_connect_fail_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: 7da255f4f38c4095921bc876d16d3586
name: 'Successful upstream connect request cycles'
type: DEPENDENT
key: telemt.upstream_connect_success_total
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- telemt_upstream_connect_success_total
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Telemt other'
- uuid: fb95391c7f894e3eb6984b92885813b2
name: 'Telemt Uptime'
type: DEPENDENT
key: telemt.uptime
delay: '0'
value_type: FLOAT
trends: '0'
units: s
preprocessing:
@@ -180,6 +1033,56 @@ zabbix_export:
tags:
- tag: Application
value: 'Users connections'
- uuid: f7ad02d1635542b584bba5941375ae41
name: 'Current number of unique active IPs by {#TELEMT_USER}'
type: DEPENDENT
key: 'telemt.ips_current_[{#TELEMT_USER}]'
delay: '0'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'telemt_user_unique_ips_current{user="{#TELEMT_USER}"}'
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Users IPs'
- uuid: 100b09bf1cff420495c5c105bdb0af6c
name: 'Configured unique IP limit to {#TELEMT_USER}'
type: DEPENDENT
key: 'telemt.ips_limit_[{#TELEMT_USER}]'
delay: '0'
description: '0 means unlimited'
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'telemt_user_unique_ips_limit{user="{#TELEMT_USER}"}'
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Users IPs'
- uuid: ef3ac8f5c5d746bbaa4b0b698ba0d9f6
name: 'Unique IP usage ratio by {#TELEMT_USER}'
type: DEPENDENT
key: 'telemt.ips_utilization_[{#TELEMT_USER}]'
delay: '0'
value_type: FLOAT
preprocessing:
- type: PROMETHEUS_PATTERN
parameters:
- 'telemt_user_unique_ips_utilization{user="{#TELEMT_USER}"}'
- value
- ''
master_item:
key: telemt.prom_metrics
tags:
- tag: Application
value: 'Users IPs'
- uuid: 3ccce91ab5d54b4d972280c7b7bda910
name: 'Messages received from {#TELEMT_USER}'
type: DEPENDENT