Compare commits

..

43 Commits

Author SHA1 Message Date
Alexey
342b0119dd Merge pull request #509 from telemt/bump
Update Cargo.toml
2026-03-20 16:27:39 +03:00
Alexey
2605929b93 Update Cargo.toml 2026-03-20 16:26:57 +03:00
Alexey
36814b6355 ME Draining on Dual-Stack + TLS Fetcher Upstream Selection: merge pull request #508 from telemt/flow
ME Draining on Dual-Stack + TLS Fetcher Upstream Selection
2026-03-20 16:24:17 +03:00
Alexey
269ba537ad ME Draining on Dual-Stack
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-20 16:07:12 +03:00
Alexey
5c0eb6dbe8 TLS Fetcher Upstream Selection
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-20 16:05:24 +03:00
Alexey
dd07fa9453 Merge pull request #505 from telemt/flow-me
Teardown Monitoring in API and Metrics
2026-03-20 12:59:39 +03:00
Alexey
bb1a372ac4 Merge branch 'main' into flow-me 2026-03-20 12:59:32 +03:00
Alexey
6661401a34 Merge pull request #506 from telemt/about-releases
Update README.md
2026-03-20 12:59:09 +03:00
Alexey
cd65fb432b Update README.md 2026-03-20 12:58:55 +03:00
Alexey
caf0717789 Merge branch 'main' into flow-me 2026-03-20 12:57:27 +03:00
Alexey
4a610d83a3 Update Cargo.toml
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-20 12:56:13 +03:00
Alexey
aba4205dcc Teardown Monitoring in Metrics
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-20 12:46:35 +03:00
Alexey
ef9b7b1492 Teardown Monitoring in API
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-20 12:45:53 +03:00
Alexey
d112f15b90 ME Writers Anti-stuck + Quarantine fixes + ME Writers Advanced Cleanup + Authoritative Teardown + Orphan Watchdog + Force-Close Safery Policy: merge pull request #504 from telemt/flow-me
ME Writers Anti-stuck + Quarantine fixes + ME Writers Advanced Cleanup + Authoritative Teardown + Orphan Watchdog + Force-Close Safery Policy
2026-03-20 12:41:45 +03:00
Alexey
b55b264345 Merge branch 'main' into flow-me 2026-03-20 12:20:51 +03:00
Alexey
f61d25ebe0 Authoritative Teardown + Orphan Watchdog + Force-Close Safery Policy
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-20 12:11:47 +03:00
Alexey
ed4d1167dd ME Writers Advanced Cleanup
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-20 12:09:23 +03:00
Alexey
dc6948cf39 Merge pull request #502 from telemt/about-releases
Update README.md
2026-03-20 11:25:19 +03:00
Alexey
4f11aa0772 Update README.md 2026-03-20 11:25:07 +03:00
Alexey
e40361b171 Cargo.toml + Cargo.lock
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-20 00:45:04 +03:00
Alexey
1c6c73beda ME Writers Anti-stuck and Quarantine fixes
Co-Authored-By: Nook Scheel <nook@live.ru>
2026-03-20 00:41:40 +03:00
Alexey
67dc1e8d18 Merge pull request #498 from telemt/bump
Update Cargo.toml
2026-03-19 18:25:14 +03:00
Alexey
ad8ada33c9 Update Cargo.toml 2026-03-19 18:24:01 +03:00
Alexey
bbb201b433 Instadrain + Hard-remove for long draining-state: merge pull request #497 from telemt/flow-stuck-writer
Instadrain + Hard-remove for long draining-state
2026-03-19 18:23:38 +03:00
Alexey
8d1faece60 Instadrain + Hard-remove for long draining-state
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-19 17:45:17 +03:00
Alexey
a603505f90 Merge pull request #492 from temandroid/main
fix(docker): expose port 9091 and allow external API access
2026-03-19 17:32:49 +03:00
Alexey
f8c42c324f Merge pull request #494 from Dimasssss/patch-1
Update install.sh
2026-03-19 17:32:05 +03:00
Dimasssss
dc3363aa0d Update install.sh 2026-03-19 16:23:32 +03:00
Alexey
f655924323 Update health.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-19 16:15:00 +03:00
TEMAndroid
05c066c676 fix(docker): expose port 9091 and allow external API access
Add 9091 port mapping to compose.yml to make the REST API reachable
from outside the container. Previously only port 9090 (metrics) was
published, making the documented curl commands non-functional.

fixes #434
2026-03-19 15:54:01 +03:00
Alexey
1e000c2e7e ME Writer stuck-up in draining-state fixes: merge pull request #491 from telemt/flow-stuck-writer
ME Writer stuck-up in draining-state fixes
2026-03-19 14:44:43 +03:00
Alexey
fa17e719f6 Merge pull request #490 from telemt/bump
Update Cargo.toml
2026-03-19 14:43:15 +03:00
Alexey
ae3ced8e7c Update Cargo.toml 2026-03-19 14:42:59 +03:00
Alexey
3279f6d46a Cleanup-path as non-blocking
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-19 14:07:20 +03:00
Alexey
6f9aef7bb4 ME Writer stuck-up in draining-state fixes
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-19 13:08:35 +03:00
Alexey
049db1196f Merge pull request #487 from telemt/code-of-conduct
Update CODE_OF_CONDUCT.md
2026-03-19 01:18:14 +03:00
Alexey
c8ffc23cf7 Update CODE_OF_CONDUCT.md 2026-03-19 01:18:02 +03:00
Alexey
f230f2ce0e Merge pull request #486 from telemt/code-of-conduct
Create CODE_OF_CONDUCT.md
2026-03-19 00:59:50 +03:00
Alexey
bdac6e3480 Create CODE_OF_CONDUCT.md 2026-03-19 00:59:37 +03:00
Alexey
a4e9746dc7 Merge pull request #485 from Dimasssss/patch-3
Update install.sh
2026-03-19 00:43:14 +03:00
Dimasssss
c47495d671 Update install.sh
Вернул старый функционал + добавил новый:
- Вернул автоматическое создание конфига с секретом
- Вернул автоматическое создание службы
- Добавил удаление службы и telemt через `install.sh uninstall`
- Полное удаление вместе с конфигом через `install.sh --purge`
- Добавил установку нужной версии `install.sh 3.3.15`
2026-03-19 00:36:02 +03:00
Alexey
5ae3a90d5e Merge pull request #483 from Dimasssss/patch-1
Update CONFIG_PARAMS.en.md
2026-03-18 23:02:33 +03:00
Dimasssss
1935455256 Update CONFIG_PARAMS.en.md 2026-03-18 18:20:23 +03:00
38 changed files with 3733 additions and 660 deletions

208
CODE_OF_CONDUCT.md Normal file
View File

@@ -0,0 +1,208 @@
# Code of Conduct
## 1. Purpose
Telemt exists to solve technical problems.
Telemt is open to contributors who want to learn, improve and build meaningful systems together.
It is a place for building, testing, reasoning, documenting, and improving systems.
Discussions that advance this work are in scope. Discussions that divert it are not.
Technology has consequences. Responsibility is inherent.
> **Zweck bestimmt die Form.**
> Purpose defines form.
---
## 2. Principles
* **Technical over emotional**
Arguments are grounded in data, logs, reproducible cases, or clear reasoning.
* **Clarity over noise**
Communication is structured, concise, and relevant.
* **Openness with standards**
Participation is open. The work remains disciplined.
* **Independence of judgment**
Claims are evaluated on technical merit, not affiliation or posture.
* **Responsibility over capability**
Capability does not justify careless use.
* **Cooperation over friction**
Progress depends on coordination, mutual support, and honest review.
* **Good intent, rigorous method**
Assume good intent, but require rigor.
> **Aussagen gelten nach ihrer Begründung.**
> Claims are weighed by evidence.
---
## 3. Expected Behavior
Participants are expected to:
* Communicate directly and respectfully
* Support claims with evidence
* Stay within technical scope
* Accept critique and provide it constructively
* Reduce noise, duplication, and ambiguity
* Help others reach correct and reproducible outcomes
* Act in a way that improves the system as a whole
Precision is learned.
New contributors are welcome. They are expected to grow into these standards. Existing contributors are expected to make that growth possible.
> **Wer behauptet, belegt.**
> Whoever claims, proves.
---
## 4. Unacceptable Behavior
The following is not allowed:
* Personal attacks, insults, harassment, or intimidation
* Repeatedly derailing discussion away from Telemts purpose
* Spam, flooding, or repeated low-quality input
* Misinformation presented as fact
* Attempts to degrade, destabilize, or exhaust Telemt or its participants
* Use of Telemt or its spaces to enable harm
Telemt is not a venue for disputes that displace technical work.
Such discussions may be closed, removed, or redirected.
> **Störung ist kein Beitrag.**
> Disruption is not contribution.
---
## 5. Security and Misuse
Telemt is intended for responsible use.
* Do not use it to plan, coordinate, or execute harm
* Do not publish vulnerabilities without responsible disclosure
* Report security issues privately where possible
Security is both technical and behavioral.
> **Verantwortung endet nicht am Code.**
> Responsibility does not end at the code.
---
## 6. Openness
Telemt is open to contributors of different backgrounds, experience levels, and working styles.
Standards are public, legible, and applied to the work itself.
Questions are welcome. Careful disagreement is welcome. Honest correction is welcome.
Gatekeeping by obscurity, status signaling, or hostility is not.
---
## 7. Scope
This Code of Conduct applies to all official spaces:
* Source repositories (issues, pull requests, discussions)
* Documentation
* Communication channels associated with Telemt
---
## 8. Maintainer Stewardship
Maintainers are responsible for final decisions in matters of conduct, scope, and direction.
This responsibility is stewardship: preserving continuity, protecting signal, maintaining standards, and keeping Telemt workable for others.
Judgment should be exercised with restraint, consistency, and institutional responsibility.
Not every decision requires extended debate.
Not every intervention requires public explanation.
All decisions are expected to serve the durability, clarity, and integrity of Telemt.
> **Ordnung ist Voraussetzung der Funktion.**
> Order is the precondition of function.
---
## 9. Enforcement
Maintainers may act to preserve the integrity of Telemt, including by:
* Removing content
* Locking discussions
* Rejecting contributions
* Restricting or banning participants
Actions are taken to maintain function, continuity, and signal quality.
Where possible, correction is preferred to exclusion.
Where necessary, exclusion is preferred to decay.
---
## 10. Final
Telemt is built on discipline, structure, and shared intent.
Signal over noise.
Facts over opinion.
Systems over rhetoric.
Work is collective.
Outcomes are shared.
Responsibility is distributed.
Precision is learned.
Rigor is expected.
Help is part of the work.
> **Ordnung ist Voraussetzung der Freiheit.**
If you contribute — contribute with care.
If you speak — speak with substance.
If you engage — engage constructively.
---
## 11. After All
Systems outlive intentions.
What is built will be used.
What is released will propagate.
What is maintained will define the future state.
There is no neutral infrastructure, only infrastructure shaped well or poorly.
> **Jedes System trägt Verantwortung.**
> Every system carries responsibility.
Stability requires discipline.
Freedom requires structure.
Trust requires honesty.
In the end, the system reflects its contributors.

302
Cargo.lock generated
View File

@@ -45,15 +45,24 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]] [[package]]
name = "anstyle" name = "anstyle"
version = "1.0.13" version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.101" version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "arc-swap"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5"
dependencies = [
"rustversion",
]
[[package]] [[package]]
name = "asn1-rs" name = "asn1-rs"
@@ -135,9 +144,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.10.0" version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]] [[package]]
name = "block-buffer" name = "block-buffer"
@@ -159,9 +168,9 @@ dependencies = [
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.19.1" version = "3.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]] [[package]]
name = "bytes" name = "bytes"
@@ -186,9 +195,9 @@ dependencies = [
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.55" version = "1.2.57"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
dependencies = [ dependencies = [
"find-msvc-tools", "find-msvc-tools",
"shlex", "shlex",
@@ -214,9 +223,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]] [[package]]
name = "chrono" name = "chrono"
version = "0.4.43" version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
dependencies = [ dependencies = [
"iana-time-zone", "iana-time-zone",
"js-sys", "js-sys",
@@ -265,18 +274,18 @@ dependencies = [
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.5.58" version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
dependencies = [ dependencies = [
"clap_builder", "clap_builder",
] ]
[[package]] [[package]]
name = "clap_builder" name = "clap_builder"
version = "4.5.58" version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
dependencies = [ dependencies = [
"anstyle", "anstyle",
"clap_lex", "clap_lex",
@@ -284,9 +293,9 @@ dependencies = [
[[package]] [[package]]
name = "clap_lex" name = "clap_lex"
version = "1.0.0" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
[[package]] [[package]]
name = "core-foundation-sys" name = "core-foundation-sys"
@@ -486,7 +495,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -572,9 +581,9 @@ dependencies = [
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
dependencies = [ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
@@ -587,9 +596,9 @@ dependencies = [
[[package]] [[package]]
name = "futures-channel" name = "futures-channel"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-sink", "futures-sink",
@@ -597,15 +606,15 @@ dependencies = [
[[package]] [[package]]
name = "futures-core" name = "futures-core"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
[[package]] [[package]]
name = "futures-executor" name = "futures-executor"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
dependencies = [ dependencies = [
"futures-core", "futures-core",
"futures-task", "futures-task",
@@ -614,38 +623,38 @@ dependencies = [
[[package]] [[package]]
name = "futures-io" name = "futures-io"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
[[package]] [[package]]
name = "futures-macro" name = "futures-macro"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
name = "futures-sink" name = "futures-sink"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
[[package]] [[package]]
name = "futures-task" name = "futures-task"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
[[package]] [[package]]
name = "futures-util" name = "futures-util"
version = "0.3.31" version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
dependencies = [ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
@@ -655,7 +664,6 @@ dependencies = [
"futures-task", "futures-task",
"memchr", "memchr",
"pin-project-lite", "pin-project-lite",
"pin-utils",
"slab", "slab",
] ]
@@ -691,20 +699,20 @@ dependencies = [
"cfg-if", "cfg-if",
"js-sys", "js-sys",
"libc", "libc",
"r-efi", "r-efi 5.3.0",
"wasip2", "wasip2",
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.4.1" version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"r-efi", "r-efi 6.0.0",
"wasip2", "wasip2",
"wasip3", "wasip3",
] ]
@@ -894,7 +902,7 @@ dependencies = [
"libc", "libc",
"percent-encoding", "percent-encoding",
"pin-project-lite", "pin-project-lite",
"socket2 0.6.2", "socket2 0.6.3",
"tokio", "tokio",
"tower-service", "tower-service",
"tracing", "tracing",
@@ -1076,9 +1084,9 @@ dependencies = [
[[package]] [[package]]
name = "ipnet" name = "ipnet"
version = "2.11.0" version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
[[package]] [[package]]
name = "ipnetwork" name = "ipnetwork"
@@ -1127,9 +1135,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.85" version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
dependencies = [ dependencies = [
"once_cell", "once_cell",
"wasm-bindgen", "wasm-bindgen",
@@ -1169,26 +1177,27 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.181" version = "0.2.183"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
[[package]] [[package]]
name = "libredox" name = "libredox"
version = "0.1.12" version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
dependencies = [ dependencies = [
"bitflags 2.10.0", "bitflags 2.11.0",
"libc", "libc",
"redox_syscall 0.7.1", "plain",
"redox_syscall 0.7.3",
] ]
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.11.0" version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
[[package]] [[package]]
name = "litemap" name = "litemap"
@@ -1295,7 +1304,7 @@ version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
dependencies = [ dependencies = [
"bitflags 2.10.0", "bitflags 2.11.0",
"cfg-if", "cfg-if",
"cfg_aliases 0.1.1", "cfg_aliases 0.1.1",
"libc", "libc",
@@ -1318,7 +1327,7 @@ version = "6.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
dependencies = [ dependencies = [
"bitflags 2.10.0", "bitflags 2.11.0",
"crossbeam-channel", "crossbeam-channel",
"filetime", "filetime",
"fsevent-sys", "fsevent-sys",
@@ -1385,9 +1394,9 @@ dependencies = [
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.21.3" version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]] [[package]]
name = "oorandom" name = "oorandom"
@@ -1426,9 +1435,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]] [[package]]
name = "pin-project-lite" name = "pin-project-lite"
version = "0.2.16" version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
[[package]] [[package]]
name = "pin-utils" name = "pin-utils"
@@ -1436,6 +1445,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "plain"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
[[package]] [[package]]
name = "plotters" name = "plotters"
version = "0.3.7" version = "0.3.7"
@@ -1495,7 +1510,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -1515,7 +1530,7 @@ checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532"
dependencies = [ dependencies = [
"bit-set", "bit-set",
"bit-vec", "bit-vec",
"bitflags 2.10.0", "bitflags 2.11.0",
"num-traits", "num-traits",
"rand", "rand",
"rand_chacha", "rand_chacha",
@@ -1545,7 +1560,7 @@ dependencies = [
"quinn-udp", "quinn-udp",
"rustc-hash", "rustc-hash",
"rustls", "rustls",
"socket2 0.6.2", "socket2 0.6.3",
"thiserror 2.0.18", "thiserror 2.0.18",
"tokio", "tokio",
"tracing", "tracing",
@@ -1554,9 +1569,9 @@ dependencies = [
[[package]] [[package]]
name = "quinn-proto" name = "quinn-proto"
version = "0.11.13" version = "0.11.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
dependencies = [ dependencies = [
"bytes", "bytes",
"getrandom 0.3.4", "getrandom 0.3.4",
@@ -1582,16 +1597,16 @@ dependencies = [
"cfg_aliases 0.2.1", "cfg_aliases 0.2.1",
"libc", "libc",
"once_cell", "once_cell",
"socket2 0.6.2", "socket2 0.6.3",
"tracing", "tracing",
"windows-sys 0.60.2", "windows-sys 0.60.2",
] ]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.44" version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
] ]
@@ -1602,6 +1617,12 @@ version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "r-efi"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
[[package]] [[package]]
name = "rand" name = "rand"
version = "0.9.2" version = "0.9.2"
@@ -1666,16 +1687,16 @@ version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [ dependencies = [
"bitflags 2.10.0", "bitflags 2.11.0",
] ]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.7.1" version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b" checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16"
dependencies = [ dependencies = [
"bitflags 2.10.0", "bitflags 2.11.0",
] ]
[[package]] [[package]]
@@ -1703,9 +1724,9 @@ dependencies = [
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.9" version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]] [[package]]
name = "reqwest" name = "reqwest"
@@ -1785,11 +1806,11 @@ dependencies = [
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "1.1.3" version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
dependencies = [ dependencies = [
"bitflags 2.10.0", "bitflags 2.11.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@@ -1798,9 +1819,9 @@ dependencies = [
[[package]] [[package]]
name = "rustls" name = "rustls"
version = "0.23.36" version = "0.23.37"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
dependencies = [ dependencies = [
"once_cell", "once_cell",
"ring", "ring",
@@ -1903,7 +1924,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -2011,12 +2032,12 @@ dependencies = [
[[package]] [[package]]
name = "socket2" name = "socket2"
version = "0.6.2" version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
dependencies = [ dependencies = [
"libc", "libc",
"windows-sys 0.60.2", "windows-sys 0.61.2",
] ]
[[package]] [[package]]
@@ -2044,9 +2065,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.114" version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -2082,15 +2103,16 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
name = "telemt" name = "telemt"
version = "3.3.19" version = "3.3.25"
dependencies = [ dependencies = [
"aes", "aes",
"anyhow", "anyhow",
"arc-swap",
"base64", "base64",
"bytes", "bytes",
"cbc", "cbc",
@@ -2143,12 +2165,12 @@ dependencies = [
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.25.0" version = "3.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
dependencies = [ dependencies = [
"fastrand", "fastrand",
"getrandom 0.4.1", "getrandom 0.4.2",
"once_cell", "once_cell",
"rustix", "rustix",
"windows-sys 0.61.2", "windows-sys 0.61.2",
@@ -2180,7 +2202,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -2191,7 +2213,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -2256,9 +2278,9 @@ dependencies = [
[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.10.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
dependencies = [ dependencies = [
"tinyvec_macros", "tinyvec_macros",
] ]
@@ -2271,9 +2293,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tokio" name = "tokio"
version = "1.49.0" version = "1.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
dependencies = [ dependencies = [
"bytes", "bytes",
"libc", "libc",
@@ -2281,7 +2303,7 @@ dependencies = [
"parking_lot", "parking_lot",
"pin-project-lite", "pin-project-lite",
"signal-hook-registry", "signal-hook-registry",
"socket2 0.6.2", "socket2 0.6.3",
"tokio-macros", "tokio-macros",
"tracing", "tracing",
"windows-sys 0.61.2", "windows-sys 0.61.2",
@@ -2289,13 +2311,13 @@ dependencies = [
[[package]] [[package]]
name = "tokio-macros" name = "tokio-macros"
version = "2.6.0" version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -2409,7 +2431,7 @@ version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [ dependencies = [
"bitflags 2.10.0", "bitflags 2.11.0",
"bytes", "bytes",
"futures-util", "futures-util",
"http", "http",
@@ -2452,7 +2474,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -2478,9 +2500,9 @@ dependencies = [
[[package]] [[package]]
name = "tracing-subscriber" name = "tracing-subscriber"
version = "0.3.22" version = "0.3.23"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319"
dependencies = [ dependencies = [
"matchers", "matchers",
"nu-ansi-term", "nu-ansi-term",
@@ -2514,9 +2536,9 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.23" version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]] [[package]]
name = "unicode-xid" name = "unicode-xid"
@@ -2614,9 +2636,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.108" version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"once_cell", "once_cell",
@@ -2627,9 +2649,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-futures" name = "wasm-bindgen-futures"
version = "0.4.58" version = "0.4.64"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"futures-util", "futures-util",
@@ -2641,9 +2663,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.108" version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
dependencies = [ dependencies = [
"quote", "quote",
"wasm-bindgen-macro-support", "wasm-bindgen-macro-support",
@@ -2651,22 +2673,22 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro-support" name = "wasm-bindgen-macro-support"
version = "0.2.108" version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
[[package]] [[package]]
name = "wasm-bindgen-shared" name = "wasm-bindgen-shared"
version = "0.2.108" version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
@@ -2699,7 +2721,7 @@ version = "0.244.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
dependencies = [ dependencies = [
"bitflags 2.10.0", "bitflags 2.11.0",
"hashbrown 0.15.5", "hashbrown 0.15.5",
"indexmap", "indexmap",
"semver", "semver",
@@ -2707,9 +2729,9 @@ dependencies = [
[[package]] [[package]]
name = "web-sys" name = "web-sys"
version = "0.3.85" version = "0.3.91"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
dependencies = [ dependencies = [
"js-sys", "js-sys",
"wasm-bindgen", "wasm-bindgen",
@@ -2773,7 +2795,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -2784,7 +2806,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -3035,9 +3057,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
[[package]] [[package]]
name = "winnow" name = "winnow"
version = "0.7.14" version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]
@@ -3072,7 +3094,7 @@ dependencies = [
"heck", "heck",
"indexmap", "indexmap",
"prettyplease", "prettyplease",
"syn 2.0.114", "syn 2.0.117",
"wasm-metadata", "wasm-metadata",
"wit-bindgen-core", "wit-bindgen-core",
"wit-component", "wit-component",
@@ -3088,7 +3110,7 @@ dependencies = [
"prettyplease", "prettyplease",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
"wit-bindgen-core", "wit-bindgen-core",
"wit-bindgen-rust", "wit-bindgen-rust",
] ]
@@ -3100,7 +3122,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bitflags 2.10.0", "bitflags 2.11.0",
"indexmap", "indexmap",
"log", "log",
"serde", "serde",
@@ -3172,28 +3194,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
"synstructure 0.13.2", "synstructure 0.13.2",
] ]
[[package]] [[package]]
name = "zerocopy" name = "zerocopy"
version = "0.8.39" version = "0.8.47"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87"
dependencies = [ dependencies = [
"zerocopy-derive", "zerocopy-derive",
] ]
[[package]] [[package]]
name = "zerocopy-derive" name = "zerocopy-derive"
version = "0.8.39" version = "0.8.47"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -3213,7 +3235,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
"synstructure 0.13.2", "synstructure 0.13.2",
] ]
@@ -3234,7 +3256,7 @@ checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
@@ -3267,11 +3289,11 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.114", "syn 2.0.117",
] ]
[[package]] [[package]]
name = "zmij" name = "zmij"
version = "1.0.20" version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "telemt" name = "telemt"
version = "3.3.23" version = "3.3.28"
edition = "2024" edition = "2024"
[dependencies] [dependencies]
@@ -40,6 +40,7 @@ tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] }
parking_lot = "0.12" parking_lot = "0.12"
dashmap = "5.5" dashmap = "5.5"
arc-swap = "1.7"
lru = "0.16" lru = "0.16"
rand = "0.9" rand = "0.9"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }

View File

@@ -38,6 +38,7 @@ USER telemt
EXPOSE 443 EXPOSE 443
EXPOSE 9090 EXPOSE 9090
EXPOSE 9091
ENTRYPOINT ["/app/telemt"] ENTRYPOINT ["/app/telemt"]
CMD ["config.toml"] CMD ["config.toml"]

View File

@@ -19,9 +19,9 @@
### 🇷🇺 RU ### 🇷🇺 RU
#### Релиз 3.3.15 Semistable #### О релизах
[3.3.15](https://github.com/telemt/telemt/releases/tag/3.3.15) по итогам работы в продакшн признан одним из самых стабильных и рекомендуется к использованию, когда cutting-edge фичи некритичны! [3.3.27](https://github.com/telemt/telemt/releases/tag/3.3.27) даёт баланс стабильности и передового функционала, а так же последние исправления по безопасности и багам
Будем рады вашему фидбеку и предложениям по улучшению — особенно в части **API**, **статистики**, **UX** Будем рады вашему фидбеку и предложениям по улучшению — особенно в части **API**, **статистики**, **UX**
@@ -40,9 +40,9 @@
### 🇬🇧 EN ### 🇬🇧 EN
#### Release 3.3.15 Semistable #### About releases
[3.3.15](https://github.com/telemt/telemt/releases/tag/3.3.15) is, based on the results of his work in production, recognized as one of the most stable and recommended for use when cutting-edge features are not so necessary! [3.3.27](https://github.com/telemt/telemt/releases/tag/3.3.27) provides a balance of stability and advanced functionality, as well as the latest security and bug fixes
We are looking forward to your feedback and improvement proposals — especially regarding **API**, **statistics**, **UX** We are looking forward to your feedback and improvement proposals — especially regarding **API**, **statistics**, **UX**

View File

@@ -7,6 +7,7 @@ services:
ports: ports:
- "443:443" - "443:443"
- "127.0.0.1:9090:9090" - "127.0.0.1:9090:9090"
- "127.0.0.1:9091:9091"
# Allow caching 'proxy-secret' in read-only container # Allow caching 'proxy-secret' in read-only container
working_dir: /run/telemt working_dir: /run/telemt
volumes: volumes:

View File

@@ -8,282 +8,287 @@ This document lists all configuration keys accepted by `config.toml`.
## Top-level keys ## Top-level keys
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| include | `String` (special directive) | Includes another TOML file with `include = "relative/or/absolute/path.toml"`; includes are processed recursively before parsing. | | include | `String` (special directive) | `null` | — | Includes another TOML file with `include = "relative/or/absolute/path.toml"`; includes are processed recursively before parsing. |
| show_link | `"*" \| String[]` | Legacy top-level link visibility selector (`"*"` for all users or explicit usernames list). | | show_link | `"*" \| String[]` | `[]` (`ShowLink::None`) | — | Legacy top-level link visibility selector (`"*"` for all users or explicit usernames list). |
| dc_overrides | `Map<String, String[]>` | Overrides DC endpoints for non-standard DCs; key is DC id string, value is `ip:port` list. | | dc_overrides | `Map<String, String[]>` | `{}` | — | Overrides DC endpoints for non-standard DCs; key is DC id string, value is `ip:port` list. |
| default_dc | `u8` | Default DC index used for unmapped non-standard DCs. | | default_dc | `u8 \| null` | `null` (effective fallback: `2` in ME routing) | — | Default DC index used for unmapped non-standard DCs. |
## [general] ## [general]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| data_path | `String` | Optional runtime data directory path. | | data_path | `String \| null` | `null` | — | Optional runtime data directory path. |
| prefer_ipv6 | `bool` | Prefer IPv6 where applicable in runtime logic. | | prefer_ipv6 | `bool` | `false` | — | Prefer IPv6 where applicable in runtime logic. |
| fast_mode | `bool` | Enables fast-path optimizations for traffic processing. | | fast_mode | `bool` | `true` | — | Enables fast-path optimizations for traffic processing. |
| use_middle_proxy | `bool` | Enables Middle Proxy mode. | | use_middle_proxy | `bool` | `true` | none | Enables ME transport mode; if `false`, runtime falls back to direct DC routing. |
| proxy_secret_path | `String` | Path to proxy secret binary; can be auto-downloaded if absent. | | proxy_secret_path | `String \| null` | `"proxy-secret"` | Path may be `null`. | Path to Telegram infrastructure proxy-secret file used by ME handshake logic. |
| proxy_config_v4_cache_path | `String` | Optional cache path for raw `getProxyConfig` (IPv4) snapshot. | | proxy_config_v4_cache_path | `String \| null` | `"cache/proxy-config-v4.txt"` | — | Optional cache path for raw `getProxyConfig` (IPv4) snapshot. |
| proxy_config_v6_cache_path | `String` | Optional cache path for raw `getProxyConfigV6` (IPv6) snapshot. | | proxy_config_v6_cache_path | `String \| null` | `"cache/proxy-config-v6.txt"` | — | Optional cache path for raw `getProxyConfigV6` (IPv6) snapshot. |
| ad_tag | `String` | Global fallback ad tag (32 hex characters). | | ad_tag | `String \| null` | `null` | — | Global fallback ad tag (32 hex characters). |
| middle_proxy_nat_ip | `IpAddr` | Explicit public IP override for NAT environments. | | middle_proxy_nat_ip | `IpAddr \| null` | `null` | Must be a valid IP when set. | Manual public NAT IP override used as ME address material when set. |
| middle_proxy_nat_probe | `bool` | Enables NAT probing for Middle Proxy KDF/public address discovery. | | middle_proxy_nat_probe | `bool` | `true` | Auto-forced to `true` when `use_middle_proxy = true`. | Enables ME NAT probing; runtime may force it on when ME mode is active. |
| middle_proxy_nat_stun | `String` | Deprecated legacy single STUN server for NAT probing. | | middle_proxy_nat_stun | `String \| null` | `null` | Deprecated. Use `network.stun_servers`. | Deprecated legacy single STUN server for NAT probing. |
| middle_proxy_nat_stun_servers | `String[]` | Deprecated legacy STUN list for NAT probing fallback. | | middle_proxy_nat_stun_servers | `String[]` | `[]` | Deprecated. Use `network.stun_servers`. | Deprecated legacy STUN list for NAT probing fallback. |
| stun_nat_probe_concurrency | `usize` | Maximum concurrent STUN probes during NAT detection. | | stun_nat_probe_concurrency | `usize` | `8` | Must be `> 0`. | Maximum number of parallel STUN probes during NAT/public endpoint discovery. |
| middle_proxy_pool_size | `usize` | Target size of active Middle Proxy writer pool. | | middle_proxy_pool_size | `usize` | `8` | none | Target size of active ME writer pool. |
| middle_proxy_warm_standby | `usize` | Number of warm standby Middle-End connections. | | middle_proxy_warm_standby | `usize` | `16` | none | Reserved compatibility field in current runtime revision. |
| me_init_retry_attempts | `u32` | Startup retries for ME pool initialization (`0` means unlimited). | | me_init_retry_attempts | `u32` | `0` | `0..=1_000_000`. | Startup retries for ME pool initialization (`0` means unlimited). |
| me2dc_fallback | `bool` | Allows fallback from ME mode to direct DC when ME startup fails. | | me2dc_fallback | `bool` | `true` | — | Allows fallback from ME mode to direct DC when ME startup fails. |
| me_keepalive_enabled | `bool` | Enables ME keepalive padding frames. | | me_keepalive_enabled | `bool` | `true` | none | Enables periodic ME keepalive/ping traffic. |
| me_keepalive_interval_secs | `u64` | Keepalive interval in seconds. | | me_keepalive_interval_secs | `u64` | `8` | none | Base ME keepalive interval in seconds. |
| me_keepalive_jitter_secs | `u64` | Keepalive jitter in seconds. | | me_keepalive_jitter_secs | `u64` | `2` | none | Keepalive jitter in seconds to reduce synchronized bursts. |
| me_keepalive_payload_random | `bool` | Randomizes keepalive payload bytes instead of zero payload. | | me_keepalive_payload_random | `bool` | `true` | none | Randomizes keepalive payload bytes instead of fixed zero payload. |
| rpc_proxy_req_every | `u64` | Interval for service `RPC_PROXY_REQ` activity signals (`0` disables). | | rpc_proxy_req_every | `u64` | `0` | `0` or `10..=300`. | Interval for service `RPC_PROXY_REQ` activity signals (`0` disables). |
| me_writer_cmd_channel_capacity | `usize` | Capacity of per-writer command channel. | | me_writer_cmd_channel_capacity | `usize` | `4096` | Must be `> 0`. | Capacity of per-writer command channel. |
| me_route_channel_capacity | `usize` | Capacity of per-connection ME response route channel. | | me_route_channel_capacity | `usize` | `768` | Must be `> 0`. | Capacity of per-connection ME response route channel. |
| me_c2me_channel_capacity | `usize` | Capacity of per-client command queue (client reader -> ME sender). | | me_c2me_channel_capacity | `usize` | `1024` | Must be `> 0`. | Capacity of per-client command queue (client reader -> ME sender). |
| me_reader_route_data_wait_ms | `u64` | Bounded wait for routing ME DATA to per-connection queue (`0` = no wait). | | me_reader_route_data_wait_ms | `u64` | `2` | `0..=20`. | Bounded wait for routing ME DATA to per-connection queue (`0` = no wait). |
| me_d2c_flush_batch_max_frames | `usize` | Max ME->client frames coalesced before flush. | | me_d2c_flush_batch_max_frames | `usize` | `32` | `1..=512`. | Max ME->client frames coalesced before flush. |
| me_d2c_flush_batch_max_bytes | `usize` | Max ME->client payload bytes coalesced before flush. | | me_d2c_flush_batch_max_bytes | `usize` | `131072` | `4096..=2_097_152`. | Max ME->client payload bytes coalesced before flush. |
| me_d2c_flush_batch_max_delay_us | `u64` | Max microsecond wait for coalescing more ME->client frames (`0` disables timed coalescing). | | me_d2c_flush_batch_max_delay_us | `u64` | `500` | `0..=5000`. | Max microsecond wait for coalescing more ME->client frames (`0` disables timed coalescing). |
| me_d2c_ack_flush_immediate | `bool` | Flushes client writer immediately after quick-ack write. | | me_d2c_ack_flush_immediate | `bool` | `true` | — | Flushes client writer immediately after quick-ack write. |
| direct_relay_copy_buf_c2s_bytes | `usize` | Copy buffer size for client->DC direction in direct relay. | | direct_relay_copy_buf_c2s_bytes | `usize` | `65536` | `4096..=1_048_576`. | Copy buffer size for client->DC direction in direct relay. |
| direct_relay_copy_buf_s2c_bytes | `usize` | Copy buffer size for DC->client direction in direct relay. | | direct_relay_copy_buf_s2c_bytes | `usize` | `262144` | `8192..=2_097_152`. | Copy buffer size for DC->client direction in direct relay. |
| crypto_pending_buffer | `usize` | Max pending ciphertext buffer per client writer (bytes). | | crypto_pending_buffer | `usize` | `262144` | — | Max pending ciphertext buffer per client writer (bytes). |
| max_client_frame | `usize` | Maximum allowed client MTProto frame size (bytes). | | max_client_frame | `usize` | `16777216` | — | Maximum allowed client MTProto frame size (bytes). |
| desync_all_full | `bool` | Emits full crypto-desync forensic logs for every event. | | desync_all_full | `bool` | `false` | — | Emits full crypto-desync forensic logs for every event. |
| beobachten | `bool` | Enables per-IP forensic observation buckets. | | beobachten | `bool` | `true` | — | Enables per-IP forensic observation buckets. |
| beobachten_minutes | `u64` | Retention window (minutes) for per-IP observation buckets. | | beobachten_minutes | `u64` | `10` | Must be `> 0`. | Retention window (minutes) for per-IP observation buckets. |
| beobachten_flush_secs | `u64` | Snapshot flush interval (seconds) for observation output file. | | beobachten_flush_secs | `u64` | `15` | Must be `> 0`. | Snapshot flush interval (seconds) for observation output file. |
| beobachten_file | `String` | Observation snapshot output file path. | | beobachten_file | `String` | `"cache/beobachten.txt"` | — | Observation snapshot output file path. |
| hardswap | `bool` | Enables hard-swap generation switching for ME pool updates. | | hardswap | `bool` | `true` | none | Enables generation-based ME hardswap strategy. |
| me_warmup_stagger_enabled | `bool` | Enables staggered warmup for extra ME writers. | | me_warmup_stagger_enabled | `bool` | `true` | none | Staggers extra ME warmup dials to avoid connection spikes. |
| me_warmup_step_delay_ms | `u64` | Base delay between warmup connections (ms). | | me_warmup_step_delay_ms | `u64` | `500` | none | Base delay in milliseconds between warmup dial steps. |
| me_warmup_step_jitter_ms | `u64` | Jitter for warmup delay (ms). | | me_warmup_step_jitter_ms | `u64` | `300` | none | Additional random delay in milliseconds for warmup steps. |
| me_reconnect_max_concurrent_per_dc | `u32` | Max concurrent reconnect attempts per DC. | | me_reconnect_max_concurrent_per_dc | `u32` | `8` | none | Limits concurrent reconnect workers per DC during health recovery. |
| me_reconnect_backoff_base_ms | `u64` | Base reconnect backoff in ms. | | me_reconnect_backoff_base_ms | `u64` | `500` | none | Initial reconnect backoff in milliseconds. |
| me_reconnect_backoff_cap_ms | `u64` | Cap reconnect backoff in ms. | | me_reconnect_backoff_cap_ms | `u64` | `30000` | none | Maximum reconnect backoff cap in milliseconds. |
| me_reconnect_fast_retry_count | `u32` | Number of fast retry attempts before backoff. | | me_reconnect_fast_retry_count | `u32` | `16` | none | Immediate retry budget before long backoff behavior applies. |
| me_single_endpoint_shadow_writers | `u8` | Additional reserve writers for one-endpoint DC groups. | | me_single_endpoint_shadow_writers | `u8` | `2` | `0..=32`. | Additional reserve writers for one-endpoint DC groups. |
| me_single_endpoint_outage_mode_enabled | `bool` | Enables aggressive outage recovery for one-endpoint DC groups. | | me_single_endpoint_outage_mode_enabled | `bool` | `true` | — | Enables aggressive outage recovery for one-endpoint DC groups. |
| me_single_endpoint_outage_disable_quarantine | `bool` | Ignores endpoint quarantine in one-endpoint outage mode. | | me_single_endpoint_outage_disable_quarantine | `bool` | `true` | — | Ignores endpoint quarantine in one-endpoint outage mode. |
| me_single_endpoint_outage_backoff_min_ms | `u64` | Minimum reconnect backoff in outage mode (ms). | | me_single_endpoint_outage_backoff_min_ms | `u64` | `250` | Must be `> 0`; also `<= me_single_endpoint_outage_backoff_max_ms`. | Minimum reconnect backoff in outage mode (ms). |
| me_single_endpoint_outage_backoff_max_ms | `u64` | Maximum reconnect backoff in outage mode (ms). | | me_single_endpoint_outage_backoff_max_ms | `u64` | `3000` | Must be `> 0`; also `>= me_single_endpoint_outage_backoff_min_ms`. | Maximum reconnect backoff in outage mode (ms). |
| me_single_endpoint_shadow_rotate_every_secs | `u64` | Periodic shadow writer rotation interval (`0` disables). | | me_single_endpoint_shadow_rotate_every_secs | `u64` | `900` | — | Periodic shadow writer rotation interval (`0` disables). |
| me_floor_mode | `"static" \| "adaptive"` | Writer floor policy mode. | | me_floor_mode | `"static" \| "adaptive"` | `"adaptive"` | — | Writer floor policy mode. |
| me_adaptive_floor_idle_secs | `u64` | Idle time before adaptive floor may reduce one-endpoint target. | | me_adaptive_floor_idle_secs | `u64` | `90` | — | Idle time before adaptive floor may reduce one-endpoint target. |
| me_adaptive_floor_min_writers_single_endpoint | `u8` | Minimum adaptive writer target for one-endpoint DC groups. | | me_adaptive_floor_min_writers_single_endpoint | `u8` | `1` | `1..=32`. | Minimum adaptive writer target for one-endpoint DC groups. |
| me_adaptive_floor_min_writers_multi_endpoint | `u8` | Minimum adaptive writer target for multi-endpoint DC groups. | | me_adaptive_floor_min_writers_multi_endpoint | `u8` | `1` | `1..=32`. | Minimum adaptive writer target for multi-endpoint DC groups. |
| me_adaptive_floor_recover_grace_secs | `u64` | Grace period to hold static floor after activity. | | me_adaptive_floor_recover_grace_secs | `u64` | `180` | — | Grace period to hold static floor after activity. |
| me_adaptive_floor_writers_per_core_total | `u16` | Global writer budget per logical CPU core in adaptive mode. | | me_adaptive_floor_writers_per_core_total | `u16` | `48` | Must be `> 0`. | Global writer budget per logical CPU core in adaptive mode. |
| me_adaptive_floor_cpu_cores_override | `u16` | Manual CPU core count override (`0` uses auto-detection). | | me_adaptive_floor_cpu_cores_override | `u16` | `0` | — | Manual CPU core count override (`0` uses auto-detection). |
| me_adaptive_floor_max_extra_writers_single_per_core | `u16` | Per-core max extra writers above base floor for one-endpoint DCs. | | me_adaptive_floor_max_extra_writers_single_per_core | `u16` | `1` | — | Per-core max extra writers above base floor for one-endpoint DCs. |
| me_adaptive_floor_max_extra_writers_multi_per_core | `u16` | Per-core max extra writers above base floor for multi-endpoint DCs. | | me_adaptive_floor_max_extra_writers_multi_per_core | `u16` | `2` | — | Per-core max extra writers above base floor for multi-endpoint DCs. |
| me_adaptive_floor_max_active_writers_per_core | `u16` | Hard cap for active ME writers per logical CPU core. | | me_adaptive_floor_max_active_writers_per_core | `u16` | `64` | Must be `> 0`. | Hard cap for active ME writers per logical CPU core. |
| me_adaptive_floor_max_warm_writers_per_core | `u16` | Hard cap for warm ME writers per logical CPU core. | | me_adaptive_floor_max_warm_writers_per_core | `u16` | `64` | Must be `> 0`. | Hard cap for warm ME writers per logical CPU core. |
| me_adaptive_floor_max_active_writers_global | `u32` | Hard global cap for active ME writers. | | me_adaptive_floor_max_active_writers_global | `u32` | `256` | Must be `> 0`. | Hard global cap for active ME writers. |
| me_adaptive_floor_max_warm_writers_global | `u32` | Hard global cap for warm ME writers. | | me_adaptive_floor_max_warm_writers_global | `u32` | `256` | Must be `> 0`. | Hard global cap for warm ME writers. |
| upstream_connect_retry_attempts | `u32` | Connect attempts for selected upstream before error/fallback. | | upstream_connect_retry_attempts | `u32` | `2` | Must be `> 0`. | Connect attempts for selected upstream before error/fallback. |
| upstream_connect_retry_backoff_ms | `u64` | Delay between upstream connect attempts (ms). | | upstream_connect_retry_backoff_ms | `u64` | `100` | — | Delay between upstream connect attempts (ms). |
| upstream_connect_budget_ms | `u64` | Total wall-clock budget for one upstream connect request (ms). | | upstream_connect_budget_ms | `u64` | `3000` | Must be `> 0`. | Total wall-clock budget for one upstream connect request (ms). |
| upstream_unhealthy_fail_threshold | `u32` | Consecutive failed requests before upstream is marked unhealthy. | | upstream_unhealthy_fail_threshold | `u32` | `5` | Must be `> 0`. | Consecutive failed requests before upstream is marked unhealthy. |
| upstream_connect_failfast_hard_errors | `bool` | Skips additional retries for hard non-transient connect errors. | | upstream_connect_failfast_hard_errors | `bool` | `false` | — | Skips additional retries for hard non-transient connect errors. |
| stun_iface_mismatch_ignore | `bool` | Ignores STUN/interface mismatch and keeps Middle Proxy mode. | | stun_iface_mismatch_ignore | `bool` | `false` | none | Reserved compatibility flag in current runtime revision. |
| unknown_dc_log_path | `String` | File path for unknown-DC request logging (`null` disables file path). | | unknown_dc_log_path | `String \| null` | `"unknown-dc.txt"` | — | File path for unknown-DC request logging (`null` disables file path). |
| unknown_dc_file_log_enabled | `bool` | Enables unknown-DC file logging. | | unknown_dc_file_log_enabled | `bool` | `false` | — | Enables unknown-DC file logging. |
| log_level | `"debug" \| "verbose" \| "normal" \| "silent"` | Runtime logging verbosity. | | log_level | `"debug" \| "verbose" \| "normal" \| "silent"` | `"normal"` | — | Runtime logging verbosity. |
| disable_colors | `bool` | Disables ANSI colors in logs. | | disable_colors | `bool` | `false` | — | Disables ANSI colors in logs. |
| me_socks_kdf_policy | `"strict" \| "compat"` | SOCKS-bound KDF fallback policy for ME handshake. | | me_socks_kdf_policy | `"strict" \| "compat"` | `"strict"` | — | SOCKS-bound KDF fallback policy for ME handshake. |
| me_route_backpressure_base_timeout_ms | `u64` | Base backpressure timeout for route-channel send (ms). | | me_route_backpressure_base_timeout_ms | `u64` | `25` | Must be `> 0`. | Base backpressure timeout for route-channel send (ms). |
| me_route_backpressure_high_timeout_ms | `u64` | High backpressure timeout when queue occupancy exceeds watermark (ms). | | me_route_backpressure_high_timeout_ms | `u64` | `120` | Must be `>= me_route_backpressure_base_timeout_ms`. | High backpressure timeout when queue occupancy exceeds watermark (ms). |
| me_route_backpressure_high_watermark_pct | `u8` | Queue occupancy threshold (%) for high timeout mode. | | me_route_backpressure_high_watermark_pct | `u8` | `80` | `1..=100`. | Queue occupancy threshold (%) for high timeout mode. |
| me_health_interval_ms_unhealthy | `u64` | Health monitor interval while writer coverage is degraded (ms). | | me_health_interval_ms_unhealthy | `u64` | `1000` | Must be `> 0`. | Health monitor interval while writer coverage is degraded (ms). |
| me_health_interval_ms_healthy | `u64` | Health monitor interval while writer coverage is healthy (ms). | | me_health_interval_ms_healthy | `u64` | `3000` | Must be `> 0`. | Health monitor interval while writer coverage is healthy (ms). |
| me_admission_poll_ms | `u64` | Poll interval for conditional-admission checks (ms). | | me_admission_poll_ms | `u64` | `1000` | Must be `> 0`. | Poll interval for conditional-admission checks (ms). |
| me_warn_rate_limit_ms | `u64` | Cooldown for repetitive ME warning logs (ms). | | me_warn_rate_limit_ms | `u64` | `5000` | Must be `> 0`. | Cooldown for repetitive ME warning logs (ms). |
| me_route_no_writer_mode | `"async_recovery_failfast" \| "inline_recovery_legacy" \| "hybrid_async_persistent"` | Route behavior when no writer is immediately available. | | me_route_no_writer_mode | `"async_recovery_failfast" \| "inline_recovery_legacy" \| "hybrid_async_persistent"` | `"hybrid_async_persistent"` | — | Route behavior when no writer is immediately available. |
| me_route_no_writer_wait_ms | `u64` | Max wait in async-recovery failfast mode (ms). | | me_route_no_writer_wait_ms | `u64` | `250` | `10..=5000`. | Max wait in async-recovery failfast mode (ms). |
| me_route_inline_recovery_attempts | `u32` | Inline recovery attempts in legacy mode. | | me_route_inline_recovery_attempts | `u32` | `3` | Must be `> 0`. | Inline recovery attempts in legacy mode. |
| me_route_inline_recovery_wait_ms | `u64` | Max inline recovery wait in legacy mode (ms). | | me_route_inline_recovery_wait_ms | `u64` | `3000` | `10..=30000`. | Max inline recovery wait in legacy mode (ms). |
| fast_mode_min_tls_record | `usize` | Minimum TLS record size when fast-mode coalescing is enabled (`0` disables). | | fast_mode_min_tls_record | `usize` | `0` | — | Minimum TLS record size when fast-mode coalescing is enabled (`0` disables). |
| update_every | `u64` | Unified interval for config/secret updater tasks. | | update_every | `u64 \| null` | `300` | If set: must be `> 0`; if `null`: legacy fallback path is used. | Unified refresh interval for ME config and proxy-secret updater tasks. |
| me_reinit_every_secs | `u64` | Periodic ME pool reinitialization interval (seconds). | | me_reinit_every_secs | `u64` | `900` | Must be `> 0`. | Periodic interval for zero-downtime ME reinit cycle. |
| me_hardswap_warmup_delay_min_ms | `u64` | Minimum delay between hardswap warmup connects (ms). | | me_hardswap_warmup_delay_min_ms | `u64` | `1000` | Must be `<= me_hardswap_warmup_delay_max_ms`. | Lower bound for hardswap warmup dial spacing. |
| me_hardswap_warmup_delay_max_ms | `u64` | Maximum delay between hardswap warmup connects (ms). | | me_hardswap_warmup_delay_max_ms | `u64` | `2000` | Must be `> 0`. | Upper bound for hardswap warmup dial spacing. |
| me_hardswap_warmup_extra_passes | `u8` | Additional warmup passes per hardswap cycle. | | me_hardswap_warmup_extra_passes | `u8` | `3` | Must be within `[0, 10]`. | Additional warmup passes after the base pass in one hardswap cycle. |
| me_hardswap_warmup_pass_backoff_base_ms | `u64` | Base backoff between hardswap warmup passes (ms). | | me_hardswap_warmup_pass_backoff_base_ms | `u64` | `500` | Must be `> 0`. | Base backoff between extra hardswap warmup passes. |
| me_config_stable_snapshots | `u8` | Number of identical config snapshots required before apply. | | me_config_stable_snapshots | `u8` | `2` | Must be `> 0`. | Number of identical ME config snapshots required before apply. |
| me_config_apply_cooldown_secs | `u64` | Cooldown between applied ME map updates (seconds). | | me_config_apply_cooldown_secs | `u64` | `300` | none | Cooldown between applied ME endpoint-map updates. |
| me_snapshot_require_http_2xx | `bool` | Requires 2xx HTTP responses for applying config snapshots. | | me_snapshot_require_http_2xx | `bool` | `true` | — | Requires 2xx HTTP responses for applying config snapshots. |
| me_snapshot_reject_empty_map | `bool` | Rejects empty config snapshots. | | me_snapshot_reject_empty_map | `bool` | `true` | — | Rejects empty config snapshots. |
| me_snapshot_min_proxy_for_lines | `u32` | Minimum parsed `proxy_for` rows required to accept snapshot. | | me_snapshot_min_proxy_for_lines | `u32` | `1` | Must be `> 0`. | Minimum parsed `proxy_for` rows required to accept snapshot. |
| proxy_secret_stable_snapshots | `u8` | Number of identical secret snapshots required before runtime rotation. | | proxy_secret_stable_snapshots | `u8` | `2` | Must be `> 0`. | Number of identical proxy-secret snapshots required before rotation. |
| proxy_secret_rotate_runtime | `bool` | Enables runtime proxy-secret rotation from remote source. | | proxy_secret_rotate_runtime | `bool` | `true` | none | Enables runtime proxy-secret rotation from updater snapshots. |
| me_secret_atomic_snapshot | `bool` | Keeps selector and secret bytes from the same snapshot atomically. | | me_secret_atomic_snapshot | `bool` | `true` | — | Keeps selector and secret bytes from the same snapshot atomically. |
| proxy_secret_len_max | `usize` | Maximum allowed proxy-secret length (bytes). | | proxy_secret_len_max | `usize` | `256` | Must be within `[32, 4096]`. | Upper length limit for accepted proxy-secret bytes. |
| me_pool_drain_ttl_secs | `u64` | Drain TTL for stale ME writers after endpoint-map changes (seconds). | | me_pool_drain_ttl_secs | `u64` | `90` | none | Time window where stale writers remain fallback-eligible after map change. |
| me_pool_drain_threshold | `u64` | Max draining stale writers before batch force-close (`0` disables threshold cleanup). | | me_pool_drain_threshold | `u64` | `128` | — | Max draining stale writers before batch force-close (`0` disables threshold cleanup). |
| me_bind_stale_mode | `"never" \| "ttl" \| "always"` | Policy for new binds on stale draining writers. | | me_pool_drain_soft_evict_enabled | `bool` | `true` | — | Enables gradual soft-eviction of stale writers during drain/reinit instead of immediate hard close. |
| me_bind_stale_ttl_secs | `u64` | TTL for stale bind allowance when stale mode is `ttl`. | | me_pool_drain_soft_evict_grace_secs | `u64` | `30` | `0..=3600`. | Grace period before stale writers become soft-evict candidates. |
| me_pool_min_fresh_ratio | `f32` | Minimum desired-DC fresh coverage ratio before draining stale writers. | | me_pool_drain_soft_evict_per_writer | `u8` | `1` | `1..=16`. | Maximum stale routes soft-evicted per writer in one eviction pass. |
| me_reinit_drain_timeout_secs | `u64` | Force-close timeout for stale writers after endpoint-map changes (`0` disables force-close). | | me_pool_drain_soft_evict_budget_per_core | `u16` | `8` | `1..=64`. | Per-core budget limiting aggregate soft-eviction work per pass. |
| proxy_secret_auto_reload_secs | `u64` | Deprecated legacy secret reload interval (fallback when `update_every` is not set). | | me_pool_drain_soft_evict_cooldown_ms | `u64` | `5000` | Must be `> 0`. | Cooldown between consecutive soft-eviction passes (ms). |
| proxy_config_auto_reload_secs | `u64` | Deprecated legacy config reload interval (fallback when `update_every` is not set). | | me_bind_stale_mode | `"never" \| "ttl" \| "always"` | `"ttl"` | — | Policy for new binds on stale draining writers. |
| me_reinit_singleflight | `bool` | Serializes ME reinit cycles across trigger sources. | | me_bind_stale_ttl_secs | `u64` | `90` | — | TTL for stale bind allowance when stale mode is `ttl`. |
| me_reinit_trigger_channel | `usize` | Trigger queue capacity for reinit scheduler. | | me_pool_min_fresh_ratio | `f32` | `0.8` | Must be within `[0.0, 1.0]`. | Minimum fresh desired-DC coverage ratio before stale writers are drained. |
| me_reinit_coalesce_window_ms | `u64` | Trigger coalescing window before starting reinit (ms). | | me_reinit_drain_timeout_secs | `u64` | `120` | `0` disables force-close; if `> 0` and `< me_pool_drain_ttl_secs`, runtime bumps it to TTL. | Force-close timeout for draining stale writers (`0` keeps indefinite draining). |
| me_deterministic_writer_sort | `bool` | Enables deterministic candidate sort for writer binding path. | | proxy_secret_auto_reload_secs | `u64` | `3600` | Deprecated. Use `general.update_every`. | Deprecated legacy secret reload interval (fallback when `update_every` is not set). |
| me_writer_pick_mode | `"sorted_rr" \| "p2c"` | Writer selection mode for route bind path. | | proxy_config_auto_reload_secs | `u64` | `3600` | Deprecated. Use `general.update_every`. | Deprecated legacy config reload interval (fallback when `update_every` is not set). |
| me_writer_pick_sample_size | `u8` | Number of candidates sampled by picker in `p2c` mode. | | me_reinit_singleflight | `bool` | `true` | — | Serializes ME reinit cycles across trigger sources. |
| ntp_check | `bool` | Enables NTP drift check at startup. | | me_reinit_trigger_channel | `usize` | `64` | Must be `> 0`. | Trigger queue capacity for reinit scheduler. |
| ntp_servers | `String[]` | NTP servers used for drift check. | | me_reinit_coalesce_window_ms | `u64` | `200` | — | Trigger coalescing window before starting reinit (ms). |
| auto_degradation_enabled | `bool` | Enables automatic degradation from ME to direct DC. | | me_deterministic_writer_sort | `bool` | `true` | — | Enables deterministic candidate sort for writer binding path. |
| degradation_min_unavailable_dc_groups | `u8` | Minimum unavailable ME DC groups required before degrading. | | me_writer_pick_mode | `"sorted_rr" \| "p2c"` | `"p2c"` | — | Writer selection mode for route bind path. |
| me_writer_pick_sample_size | `u8` | `3` | `2..=4`. | Number of candidates sampled by picker in `p2c` mode. |
| ntp_check | `bool` | `true` | — | Enables NTP drift check at startup. |
| ntp_servers | `String[]` | `["pool.ntp.org"]` | — | NTP servers used for drift check. |
| auto_degradation_enabled | `bool` | `true` | none | Reserved compatibility flag in current runtime revision. |
| degradation_min_unavailable_dc_groups | `u8` | `2` | none | Reserved compatibility threshold in current runtime revision. |
## [general.modes] ## [general.modes]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| classic | `bool` | Enables classic MTProxy mode. | | classic | `bool` | `false` | — | Enables classic MTProxy mode. |
| secure | `bool` | Enables secure mode. | | secure | `bool` | `false` | — | Enables secure mode. |
| tls | `bool` | Enables TLS mode. | | tls | `bool` | `true` | — | Enables TLS mode. |
## [general.links] ## [general.links]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| show | `"*" \| String[]` | Selects users whose tg:// links are shown at startup. | | show | `"*" \| String[]` | `"*"` | — | Selects users whose tg:// links are shown at startup. |
| public_host | `String` | Public hostname/IP override for generated tg:// links. | | public_host | `String \| null` | `null` | — | Public hostname/IP override for generated tg:// links. |
| public_port | `u16` | Public port override for generated tg:// links. | | public_port | `u16 \| null` | `null` | — | Public port override for generated tg:// links. |
## [general.telemetry] ## [general.telemetry]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| core_enabled | `bool` | Enables core hot-path telemetry counters. | | core_enabled | `bool` | `true` | — | Enables core hot-path telemetry counters. |
| user_enabled | `bool` | Enables per-user telemetry counters. | | user_enabled | `bool` | `true` | — | Enables per-user telemetry counters. |
| me_level | `"silent" \| "normal" \| "debug"` | Middle-End telemetry verbosity level. | | me_level | `"silent" \| "normal" \| "debug"` | `"normal"` | — | Middle-End telemetry verbosity level. |
## [network] ## [network]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| ipv4 | `bool` | Enables IPv4 networking. | | ipv4 | `bool` | `true` | — | Enables IPv4 networking. |
| ipv6 | `bool` | Enables/disables IPv6 (`null` = auto-detect availability). | | ipv6 | `bool` | `false` | — | Enables/disables IPv6 when set |
| prefer | `u8` | Preferred IP family for selection (`4` or `6`). | | prefer | `u8` | `4` | Must be `4` or `6`. | Preferred IP family for selection (`4` or `6`). |
| multipath | `bool` | Enables multipath behavior where supported. | | multipath | `bool` | `false` | — | Enables multipath behavior where supported. |
| stun_use | `bool` | Global switch for STUN probing. | | stun_use | `bool` | `true` | none | Global STUN switch; when `false`, STUN probing path is disabled. |
| stun_servers | `String[]` | STUN server list for public IP detection. | | stun_servers | `String[]` | Built-in STUN list (13 hosts) | Deduplicated; empty values are removed. | Primary STUN server list for NAT/public endpoint discovery. |
| stun_tcp_fallback | `bool` | Enables TCP STUN fallback when UDP STUN is blocked. | | stun_tcp_fallback | `bool` | `true` | none | Enables TCP fallback for STUN when UDP path is blocked. |
| http_ip_detect_urls | `String[]` | HTTP endpoints used as fallback public IP detectors. | | http_ip_detect_urls | `String[]` | `["https://ifconfig.me/ip", "https://api.ipify.org"]` | none | HTTP fallback endpoints for public IP detection when STUN is unavailable. |
| cache_public_ip_path | `String` | File path for caching detected public IP. | | cache_public_ip_path | `String` | `"cache/public_ip.txt"` | — | File path for caching detected public IP. |
| dns_overrides | `String[]` | Runtime DNS overrides in `host:port:ip` format. | | dns_overrides | `String[]` | `[]` | Must match `host:port:ip`; IPv6 must be bracketed. | Runtime DNS overrides in `host:port:ip` format. |
## [server] ## [server]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| port | `u16` | Main proxy listen port. | | port | `u16` | `443` | — | Main proxy listen port. |
| listen_addr_ipv4 | `String` | IPv4 bind address for TCP listener. | | listen_addr_ipv4 | `String \| null` | `"0.0.0.0"` | — | IPv4 bind address for TCP listener. |
| listen_addr_ipv6 | `String` | IPv6 bind address for TCP listener. | | listen_addr_ipv6 | `String \| null` | `"::"` | — | IPv6 bind address for TCP listener. |
| listen_unix_sock | `String` | Unix socket path for listener. | | listen_unix_sock | `String \| null` | `null` | — | Unix socket path for listener. |
| listen_unix_sock_perm | `String` | Unix socket permissions in octal string (e.g., `"0666"`). | | listen_unix_sock_perm | `String \| null` | `null` | — | Unix socket permissions in octal string (e.g., `"0666"`). |
| listen_tcp | `bool` | Explicit TCP listener enable/disable override. | | listen_tcp | `bool \| null` | `null` (auto) | — | Explicit TCP listener enable/disable override. |
| proxy_protocol | `bool` | Enables HAProxy PROXY protocol parsing on incoming client connections. | | proxy_protocol | `bool` | `false` | — | Enables HAProxy PROXY protocol parsing on incoming client connections. |
| proxy_protocol_header_timeout_ms | `u64` | Timeout for PROXY protocol header read/parse (ms). | | proxy_protocol_header_timeout_ms | `u64` | `500` | Must be `> 0`. | Timeout for PROXY protocol header read/parse (ms). |
| metrics_port | `u16` | Metrics endpoint port (enables metrics listener). | | metrics_port | `u16 \| null` | `null` | — | Metrics endpoint port (enables metrics listener). |
| metrics_listen | `String` | Full metrics bind address (`IP:PORT`), overrides `metrics_port`. | | metrics_listen | `String \| null` | `null` | — | Full metrics bind address (`IP:PORT`), overrides `metrics_port`. |
| metrics_whitelist | `IpNetwork[]` | CIDR whitelist for metrics endpoint access. | | metrics_whitelist | `IpNetwork[]` | `["127.0.0.1/32", "::1/128"]` | — | CIDR whitelist for metrics endpoint access. |
| max_connections | `u32` | Max concurrent client connections (`0` = unlimited). | | max_connections | `u32` | `10000` | — | Max concurrent client connections (`0` = unlimited). |
## [server.api] ## [server.api]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| enabled | `bool` | Enables control-plane REST API. | | enabled | `bool` | `true` | — | Enables control-plane REST API. |
| listen | `String` | API bind address in `IP:PORT` format. | | listen | `String` | `"0.0.0.0:9091"` | Must be valid `IP:PORT`. | API bind address in `IP:PORT` format. |
| whitelist | `IpNetwork[]` | CIDR whitelist allowed to access API. | | whitelist | `IpNetwork[]` | `["127.0.0.0/8"]` | — | CIDR whitelist allowed to access API. |
| auth_header | `String` | Exact expected `Authorization` header value (empty = disabled). | | auth_header | `String` | `""` | — | Exact expected `Authorization` header value (empty = disabled). |
| request_body_limit_bytes | `usize` | Maximum accepted HTTP request body size. | | request_body_limit_bytes | `usize` | `65536` | Must be `> 0`. | Maximum accepted HTTP request body size. |
| minimal_runtime_enabled | `bool` | Enables minimal runtime snapshots endpoint logic. | | minimal_runtime_enabled | `bool` | `true` | — | Enables minimal runtime snapshots endpoint logic. |
| minimal_runtime_cache_ttl_ms | `u64` | Cache TTL for minimal runtime snapshots (ms; `0` disables cache). | | minimal_runtime_cache_ttl_ms | `u64` | `1000` | `0..=60000`. | Cache TTL for minimal runtime snapshots (ms; `0` disables cache). |
| runtime_edge_enabled | `bool` | Enables runtime edge endpoints. | | runtime_edge_enabled | `bool` | `false` | — | Enables runtime edge endpoints. |
| runtime_edge_cache_ttl_ms | `u64` | Cache TTL for runtime edge aggregation payloads (ms). | | runtime_edge_cache_ttl_ms | `u64` | `1000` | `0..=60000`. | Cache TTL for runtime edge aggregation payloads (ms). |
| runtime_edge_top_n | `usize` | Top-N size for edge connection leaderboard. | | runtime_edge_top_n | `usize` | `10` | `1..=1000`. | Top-N size for edge connection leaderboard. |
| runtime_edge_events_capacity | `usize` | Ring-buffer capacity for runtime edge events. | | runtime_edge_events_capacity | `usize` | `256` | `16..=4096`. | Ring-buffer capacity for runtime edge events. |
| read_only | `bool` | Rejects mutating API endpoints when enabled. | | read_only | `bool` | `false` | — | Rejects mutating API endpoints when enabled. |
## [[server.listeners]] ## [[server.listeners]]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| ip | `IpAddr` | Listener bind IP. | | ip | `IpAddr` | — | — | Listener bind IP. |
| announce | `String` | Public IP/domain announced in proxy links (priority over `announce_ip`). | | announce | `String \| null` | — | — | Public IP/domain announced in proxy links (priority over `announce_ip`). |
| announce_ip | `IpAddr` | Deprecated legacy announce IP (migrated to `announce` if needed). | | announce_ip | `IpAddr \| null` | — | — | Deprecated legacy announce IP (migrated to `announce` if needed). |
| proxy_protocol | `bool` | Per-listener override for PROXY protocol enable flag. | | proxy_protocol | `bool \| null` | `null` | — | Per-listener override for PROXY protocol enable flag. |
| reuse_allow | `bool` | Enables `SO_REUSEPORT` for multi-instance bind sharing. | | reuse_allow | `bool` | `false` | — | Enables `SO_REUSEPORT` for multi-instance bind sharing. |
## [timeouts] ## [timeouts]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| client_handshake | `u64` | Client handshake timeout. | | client_handshake | `u64` | `30` | — | Client handshake timeout. |
| tg_connect | `u64` | Upstream Telegram connect timeout. | | tg_connect | `u64` | `10` | — | Upstream Telegram connect timeout. |
| client_keepalive | `u64` | Client keepalive timeout. | | client_keepalive | `u64` | `15` | — | Client keepalive timeout. |
| client_ack | `u64` | Client ACK timeout. | | client_ack | `u64` | `90` | — | Client ACK timeout. |
| me_one_retry | `u8` | Quick ME reconnect attempts for single-address DC. | | me_one_retry | `u8` | `12` | none | Fast reconnect attempts budget for single-endpoint DC scenarios. |
| me_one_timeout_ms | `u64` | Timeout per quick attempt for single-address DC (ms). | | me_one_timeout_ms | `u64` | `1200` | none | Timeout in milliseconds for each quick single-endpoint reconnect attempt. |
## [censorship] ## [censorship]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| tls_domain | `String` | Primary TLS domain used in fake TLS handshake profile. | | tls_domain | `String` | `"petrovich.ru"` | — | Primary TLS domain used in fake TLS handshake profile. |
| tls_domains | `String[]` | Additional TLS domains for generating multiple links. | | tls_domains | `String[]` | `[]` | — | Additional TLS domains for generating multiple links. |
| mask | `bool` | Enables masking/fronting relay mode. | | mask | `bool` | `true` | — | Enables masking/fronting relay mode. |
| mask_host | `String` | Upstream mask host for TLS fronting relay. | | mask_host | `String \| null` | `null` | — | Upstream mask host for TLS fronting relay. |
| mask_port | `u16` | Upstream mask port for TLS fronting relay. | | mask_port | `u16` | `443` | — | Upstream mask port for TLS fronting relay. |
| mask_unix_sock | `String` | Unix socket path for mask backend instead of TCP host/port. | | mask_unix_sock | `String \| null` | `null` | — | Unix socket path for mask backend instead of TCP host/port. |
| fake_cert_len | `usize` | Length of synthetic certificate payload when emulation data is unavailable. | | fake_cert_len | `usize` | `2048` | — | Length of synthetic certificate payload when emulation data is unavailable. |
| tls_emulation | `bool` | Enables certificate/TLS behavior emulation from cached real fronts. | | tls_emulation | `bool` | `true` | — | Enables certificate/TLS behavior emulation from cached real fronts. |
| tls_front_dir | `String` | Directory path for TLS front cache storage. | | tls_front_dir | `String` | `"tlsfront"` | — | Directory path for TLS front cache storage. |
| server_hello_delay_min_ms | `u64` | Minimum server_hello delay for anti-fingerprint behavior (ms). | | server_hello_delay_min_ms | `u64` | `0` | — | Minimum server_hello delay for anti-fingerprint behavior (ms). |
| server_hello_delay_max_ms | `u64` | Maximum server_hello delay for anti-fingerprint behavior (ms). | | server_hello_delay_max_ms | `u64` | `0` | — | Maximum server_hello delay for anti-fingerprint behavior (ms). |
| tls_new_session_tickets | `u8` | Number of `NewSessionTicket` messages to emit after handshake. | | tls_new_session_tickets | `u8` | `0` | — | Number of `NewSessionTicket` messages to emit after handshake. |
| tls_full_cert_ttl_secs | `u64` | TTL for sending full cert payload per (domain, client IP) tuple. | | tls_full_cert_ttl_secs | `u64` | `90` | — | TTL for sending full cert payload per (domain, client IP) tuple. |
| alpn_enforce | `bool` | Enforces ALPN echo behavior based on client preference. | | alpn_enforce | `bool` | `true` | — | Enforces ALPN echo behavior based on client preference. |
| mask_proxy_protocol | `u8` | PROXY protocol mode for mask backend (`0` disabled, `1` v1, `2` v2). | | mask_proxy_protocol | `u8` | `0` | — | PROXY protocol mode for mask backend (`0` disabled, `1` v1, `2` v2). |
## [access] ## [access]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | TOML shape example | Description |
|---|---|---| |---|---|---|---|---|---|
| users | `Map<String, String>` | Username -> 32-hex secret mapping. | | users | `Map<String, String>` | `{"default": "000…000"}` | Secret must be 32 hex characters. | `[access.users]`<br>`user = "32-hex secret"`<br>`user2 = "32-hex secret"` | User credentials map used for client authentication. |
| user_ad_tags | `Map<String, String>` | Per-user ad tags (32 hex chars). | | user_ad_tags | `Map<String, String>` | `{}` | Every value must be exactly 32 hex characters. | `[access.user_ad_tags]`<br>`user = "32-hex ad_tag"` | Per-user ad tags used as override over `general.ad_tag`. |
| user_max_tcp_conns | `Map<String, usize>` | Per-user maximum concurrent TCP connections. | | user_max_tcp_conns | `Map<String, usize>` | `{}` | — | `[access.user_max_tcp_conns]`<br>`user = 500` | Per-user maximum concurrent TCP connections. |
| user_expirations | `Map<String, DateTime<Utc>>` | Per-user account expiration timestamps. | | user_expirations | `Map<String, DateTime<Utc>>` | `{}` | Timestamp must be valid RFC3339/ISO-8601 datetime. | `[access.user_expirations]`<br>`user = "2026-12-31T23:59:59Z"` | Per-user account expiration timestamps. |
| user_data_quota | `Map<String, u64>` | Per-user data quota limits. | | user_data_quota | `Map<String, u64>` | `{}` | — | `[access.user_data_quota]`<br>`user = 1073741824` | Per-user traffic quota in bytes. |
| user_max_unique_ips | `Map<String, usize>` | Per-user unique source IP limits. | | user_max_unique_ips | `Map<String, usize>` | `{}` | — | `[access.user_max_unique_ips]`<br>`user = 16` | Per-user unique source IP limits. |
| user_max_unique_ips_global_each | `usize` | Global fallback per-user unique IP limit when no per-user override exists. | | user_max_unique_ips_global_each | `usize` | `0` | — | `user_max_unique_ips_global_each = 0` | Global fallback used when `[access.user_max_unique_ips]` has no per-user override. |
| user_max_unique_ips_mode | `"active_window" \| "time_window" \| "combined"` | Unique source IP limit accounting mode. | | user_max_unique_ips_mode | `"active_window" \| "time_window" \| "combined"` | `"active_window"` | — | `user_max_unique_ips_mode = "active_window"` | Unique source IP limit accounting mode. |
| user_max_unique_ips_window_secs | `u64` | Recent-window size for unique IP accounting (seconds). | | user_max_unique_ips_window_secs | `u64` | `30` | Must be `> 0`. | `user_max_unique_ips_window_secs = 30` | Window size (seconds) used by unique-IP accounting modes that use time windows. |
| replay_check_len | `usize` | Replay check storage length. | | replay_check_len | `usize` | `65536` | — | `replay_check_len = 65536` | Replay-protection storage length. |
| replay_window_secs | `u64` | Replay protection time window in seconds. | | replay_window_secs | `u64` | `1800` | — | `replay_window_secs = 1800` | Replay-protection window in seconds. |
| ignore_time_skew | `bool` | Ignores client/server timestamp skew in replay validation. | | ignore_time_skew | `bool` | `false` | — | `ignore_time_skew = false` | Disables client/server timestamp skew checks in replay validation when enabled. |
## [[upstreams]] ## [[upstreams]]
| Parameter | Type | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---| |---|---|---|---|---|
| type | `"direct" \| "socks4" \| "socks5"` | Upstream transport type selector. | | type | `"direct" \| "socks4" \| "socks5"` | — | Required field. | Upstream transport type selector. |
| weight | `u16` | Weighted selection coefficient for this upstream. | | weight | `u16` | `1` | none | Base weight used by weighted-random upstream selection. |
| enabled | `bool` | Enables/disables this upstream entry. | | enabled | `bool` | `true` | none | Disabled entries are excluded from upstream selection at runtime. |
| scopes | `String` | Comma-separated scope tags for routing. | | scopes | `String` | `""` | none | Comma-separated scope tags used for request-level upstream filtering. |
| interface | `String` | Optional outgoing interface name (`direct`, `socks4`, `socks5`). | | interface | `String \| null` | `null` | Optional; type-specific runtime rules apply. | Optional outbound interface/local bind hint (supported with type-specific rules). |
| bind_addresses | `String[]` | Optional source bind addresses for `direct` upstream. | | bind_addresses | `String[] \| null` | `null` | Applies to `type = "direct"`. | Optional explicit local source bind addresses for `type = "direct"`. |
| address | `String` | Upstream proxy address (`host:port`) for SOCKS upstreams. | | address | `String` | — | Required for `type = "socks4"` and `type = "socks5"`. | SOCKS server endpoint (`host:port` or `ip:port`) for SOCKS upstream types. |
| user_id | `String` | SOCKS4 user ID (only for `type = "socks4"`). | | user_id | `String \| null` | `null` | Only for `type = "socks4"`. | SOCKS4 CONNECT user ID (`type = "socks4"` only). |
| username | `String` | SOCKS5 username (only for `type = "socks5"`). | | username | `String \| null` | `null` | Only for `type = "socks5"`. | SOCKS5 username (`type = "socks5"` only). |
| password | `String` | SOCKS5 password (only for `type = "socks5"`). | | password | `String \| null` | `null` | Only for `type = "socks5"`. | SOCKS5 password (`type = "socks5"` only). |

View File

@@ -181,6 +181,8 @@ docker compose down
docker build -t telemt:local . docker build -t telemt:local .
docker run --name telemt --restart unless-stopped \ docker run --name telemt --restart unless-stopped \
-p 443:443 \ -p 443:443 \
-p 9090:9090 \
-p 9091:9091 \
-e RUST_LOG=info \ -e RUST_LOG=info \
-v "$PWD/config.toml:/app/config.toml:ro" \ -v "$PWD/config.toml:/app/config.toml:ro" \
--read-only \ --read-only \

View File

@@ -183,6 +183,8 @@ docker compose down
docker build -t telemt:local . docker build -t telemt:local .
docker run --name telemt --restart unless-stopped \ docker run --name telemt --restart unless-stopped \
-p 443:443 \ -p 443:443 \
-p 9090:9090 \
-p 9091:9091 \
-e RUST_LOG=info \ -e RUST_LOG=info \
-v "$PWD/config.toml:/app/config.toml:ro" \ -v "$PWD/config.toml:/app/config.toml:ro" \
--read-only \ --read-only \

View File

@@ -3,113 +3,554 @@ set -eu
REPO="${REPO:-telemt/telemt}" REPO="${REPO:-telemt/telemt}"
BIN_NAME="${BIN_NAME:-telemt}" BIN_NAME="${BIN_NAME:-telemt}"
VERSION="${1:-${VERSION:-latest}}" INSTALL_DIR="${INSTALL_DIR:-/bin}"
INSTALL_DIR="${INSTALL_DIR:-/usr/local/bin}" CONFIG_DIR="${CONFIG_DIR:-/etc/telemt}"
CONFIG_FILE="${CONFIG_FILE:-${CONFIG_DIR}/telemt.toml}"
WORK_DIR="${WORK_DIR:-/opt/telemt}"
TLS_DOMAIN="${TLS_DOMAIN:-petrovich.ru}"
SERVICE_NAME="telemt"
TEMP_DIR=""
SUDO=""
CONFIG_PARENT_DIR=""
SERVICE_START_FAILED=0
ACTION="install"
TARGET_VERSION="${VERSION:-latest}"
while [ $# -gt 0 ]; do
case "$1" in
-h|--help) ACTION="help"; shift ;;
uninstall|--uninstall)
if [ "$ACTION" != "purge" ]; then ACTION="uninstall"; fi
shift ;;
purge|--purge) ACTION="purge"; shift ;;
install|--install) ACTION="install"; shift ;;
-*) printf '[ERROR] Unknown option: %s\n' "$1" >&2; exit 1 ;;
*)
if [ "$ACTION" = "install" ]; then TARGET_VERSION="$1"
else printf '[WARNING] Ignoring extra argument: %s\n' "$1" >&2; fi
shift ;;
esac
done
say() { say() {
printf '%s\n' "$*" if [ "$#" -eq 0 ] || [ -z "${1:-}" ]; then
printf '\n'
else
printf '[INFO] %s\n' "$*"
fi
}
die() { printf '[ERROR] %s\n' "$*" >&2; exit 1; }
write_root() { $SUDO sh -c 'cat > "$1"' _ "$1"; }
cleanup() {
if [ -n "${TEMP_DIR:-}" ] && [ -d "$TEMP_DIR" ]; then
rm -rf -- "$TEMP_DIR"
fi
}
trap cleanup EXIT INT TERM
show_help() {
say "Usage: $0 [ <version> | install | uninstall | purge | --help ]"
say " <version> Install specific version (e.g. 3.3.15, default: latest)"
say " install Install the latest version"
say " uninstall Remove the binary and service (keeps config and user)"
say " purge Remove everything including configuration, data, and user"
exit 0
} }
die() { check_os_entity() {
printf 'Error: %s\n' "$*" >&2 if command -v getent >/dev/null 2>&1; then getent "$1" "$2" >/dev/null 2>&1
exit 1 else grep -q "^${2}:" "/etc/$1" 2>/dev/null; fi
} }
need_cmd() { normalize_path() {
command -v "$1" >/dev/null 2>&1 || die "required command not found: $1" printf '%s\n' "$1" | tr -s '/' | sed 's|/$||; s|^$|/|'
} }
detect_os() { get_realpath() {
os="$(uname -s)" path_in="$1"
case "$os" in case "$path_in" in /*) ;; *) path_in="$(pwd)/$path_in" ;; esac
Linux) printf 'linux\n' ;;
OpenBSD) printf 'openbsd\n' ;; if command -v realpath >/dev/null 2>&1; then
*) printf '%s\n' "$os" ;; if realpath_out="$(realpath -m "$path_in" 2>/dev/null)"; then
printf '%s\n' "$realpath_out"
return
fi
fi
if command -v readlink >/dev/null 2>&1; then
resolved_path="$(readlink -f "$path_in" 2>/dev/null || true)"
if [ -n "$resolved_path" ]; then
printf '%s\n' "$resolved_path"
return
fi
fi
d="${path_in%/*}"; b="${path_in##*/}"
if [ -z "$d" ]; then d="/"; fi
if [ "$d" = "$path_in" ]; then d="/"; b="$path_in"; fi
if [ -d "$d" ]; then
abs_d="$(cd "$d" >/dev/null 2>&1 && pwd || true)"
if [ -n "$abs_d" ]; then
if [ "$b" = "." ] || [ -z "$b" ]; then printf '%s\n' "$abs_d"
elif [ "$abs_d" = "/" ]; then printf '/%s\n' "$b"
else printf '%s/%s\n' "$abs_d" "$b"; fi
else
normalize_path "$path_in"
fi
else
normalize_path "$path_in"
fi
}
get_svc_mgr() {
if command -v systemctl >/dev/null 2>&1 && [ -d /run/systemd/system ]; then echo "systemd"
elif command -v rc-service >/dev/null 2>&1; then echo "openrc"
else echo "none"; fi
}
verify_common() {
[ -n "$BIN_NAME" ] || die "BIN_NAME cannot be empty."
[ -n "$INSTALL_DIR" ] || die "INSTALL_DIR cannot be empty."
[ -n "$CONFIG_DIR" ] || die "CONFIG_DIR cannot be empty."
[ -n "$CONFIG_FILE" ] || die "CONFIG_FILE cannot be empty."
case "${INSTALL_DIR}${CONFIG_DIR}${WORK_DIR}${CONFIG_FILE}" in
*[!a-zA-Z0-9_./-]*) die "Invalid characters in paths. Only alphanumeric, _, ., -, and / allowed." ;;
esac esac
case "$TARGET_VERSION" in *[!a-zA-Z0-9_.-]*) die "Invalid characters in version." ;; esac
case "$BIN_NAME" in *[!a-zA-Z0-9_-]*) die "Invalid characters in BIN_NAME." ;; esac
INSTALL_DIR="$(get_realpath "$INSTALL_DIR")"
CONFIG_DIR="$(get_realpath "$CONFIG_DIR")"
WORK_DIR="$(get_realpath "$WORK_DIR")"
CONFIG_FILE="$(get_realpath "$CONFIG_FILE")"
CONFIG_PARENT_DIR="${CONFIG_FILE%/*}"
if [ -z "$CONFIG_PARENT_DIR" ]; then CONFIG_PARENT_DIR="/"; fi
if [ "$CONFIG_PARENT_DIR" = "$CONFIG_FILE" ]; then CONFIG_PARENT_DIR="."; fi
if [ "$(id -u)" -eq 0 ]; then
SUDO=""
else
command -v sudo >/dev/null 2>&1 || die "This script requires root or sudo. Neither found."
SUDO="sudo"
if ! sudo -n true 2>/dev/null; then
if ! [ -t 0 ]; then
die "sudo requires a password, but no TTY detected. Aborting to prevent hang."
fi
fi
fi
if [ -n "$SUDO" ]; then
if $SUDO sh -c '[ -d "$1" ]' _ "$CONFIG_FILE"; then
die "Safety check failed: CONFIG_FILE '$CONFIG_FILE' is a directory."
fi
elif [ -d "$CONFIG_FILE" ]; then
die "Safety check failed: CONFIG_FILE '$CONFIG_FILE' is a directory."
fi
for path in "$CONFIG_DIR" "$CONFIG_PARENT_DIR" "$WORK_DIR"; do
check_path="$(get_realpath "$path")"
case "$check_path" in
/|/bin|/sbin|/usr|/usr/bin|/usr/sbin|/usr/local|/usr/local/bin|/usr/local/sbin|/usr/local/etc|/usr/local/share|/etc|/var|/var/lib|/var/log|/var/run|/home|/root|/tmp|/lib|/lib64|/opt|/run|/boot|/dev|/sys|/proc)
die "Safety check failed: '$path' (resolved to '$check_path') is a critical system directory." ;;
esac
done
check_install_dir="$(get_realpath "$INSTALL_DIR")"
case "$check_install_dir" in
/|/etc|/var|/home|/root|/tmp|/usr|/usr/local|/opt|/boot|/dev|/sys|/proc|/run)
die "Safety check failed: INSTALL_DIR '$INSTALL_DIR' is a critical system directory." ;;
esac
for cmd in id uname grep find rm chown chmod mv mktemp mkdir tr dd sed ps head sleep cat tar gzip rmdir; do
command -v "$cmd" >/dev/null 2>&1 || die "Required command not found: $cmd"
done
}
verify_install_deps() {
command -v curl >/dev/null 2>&1 || command -v wget >/dev/null 2>&1 || die "Neither curl nor wget is installed."
command -v cp >/dev/null 2>&1 || command -v install >/dev/null 2>&1 || die "Need cp or install"
if ! command -v setcap >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
$SUDO apk add --no-cache libcap-utils >/dev/null 2>&1 || $SUDO apk add --no-cache libcap >/dev/null 2>&1 || true
elif command -v apt-get >/dev/null 2>&1; then
$SUDO apt-get update -q >/dev/null 2>&1 || true
$SUDO apt-get install -y -q libcap2-bin >/dev/null 2>&1 || true
elif command -v dnf >/dev/null 2>&1; then $SUDO dnf install -y -q libcap >/dev/null 2>&1 || true
elif command -v yum >/dev/null 2>&1; then $SUDO yum install -y -q libcap >/dev/null 2>&1 || true
fi
fi
} }
detect_arch() { detect_arch() {
arch="$(uname -m)" sys_arch="$(uname -m)"
case "$arch" in case "$sys_arch" in
x86_64|amd64) printf 'x86_64\n' ;; x86_64|amd64) echo "x86_64" ;;
aarch64|arm64) printf 'aarch64\n' ;; aarch64|arm64) echo "aarch64" ;;
*) die "unsupported architecture: $arch" ;; *) die "Unsupported architecture: $sys_arch" ;;
esac esac
} }
detect_libc() { detect_libc() {
case "$(ldd --version 2>&1 || true)" in for f in /lib/ld-musl-*.so.* /lib64/ld-musl-*.so.*; do
*musl*) printf 'musl\n' ;; if [ -e "$f" ]; then echo "musl"; return 0; fi
*) printf 'gnu\n' ;; done
esac if grep -qE '^ID="?alpine"?' /etc/os-release 2>/dev/null; then echo "musl"; return 0; fi
if command -v ldd >/dev/null 2>&1 && (ldd --version 2>&1 || true) | grep -qi musl; then echo "musl"; return 0; fi
echo "gnu"
} }
fetch_to_stdout() { fetch_file() {
url="$1" if command -v curl >/dev/null 2>&1; then curl -fsSL "$1" -o "$2"
if command -v curl >/dev/null 2>&1; then else wget -q -O "$2" "$1"; fi
curl -fsSL "$url" }
elif command -v wget >/dev/null 2>&1; then
wget -qO- "$url" ensure_user_group() {
nologin_bin="$(command -v nologin 2>/dev/null || command -v false 2>/dev/null || echo /bin/false)"
if ! check_os_entity group telemt; then
if command -v groupadd >/dev/null 2>&1; then $SUDO groupadd -r telemt
elif command -v addgroup >/dev/null 2>&1; then $SUDO addgroup -S telemt
else die "Cannot create group"; fi
fi
if ! check_os_entity passwd telemt; then
if command -v useradd >/dev/null 2>&1; then
$SUDO useradd -r -g telemt -d "$WORK_DIR" -s "$nologin_bin" -c "Telemt Proxy" telemt
elif command -v adduser >/dev/null 2>&1; then
if adduser --help 2>&1 | grep -q -- '-S'; then
$SUDO adduser -S -D -H -h "$WORK_DIR" -s "$nologin_bin" -G telemt telemt
else else
die "neither curl nor wget is installed" $SUDO adduser --system --home "$WORK_DIR" --shell "$nologin_bin" --no-create-home --ingroup telemt --disabled-password telemt
fi
else die "Cannot create user"; fi
fi
}
setup_dirs() {
$SUDO mkdir -p "$WORK_DIR" "$CONFIG_DIR" "$CONFIG_PARENT_DIR" || die "Failed to create directories"
$SUDO chown telemt:telemt "$WORK_DIR" && $SUDO chmod 750 "$WORK_DIR"
$SUDO chown root:telemt "$CONFIG_DIR" && $SUDO chmod 750 "$CONFIG_DIR"
if [ "$CONFIG_PARENT_DIR" != "$CONFIG_DIR" ] && [ "$CONFIG_PARENT_DIR" != "." ] && [ "$CONFIG_PARENT_DIR" != "/" ]; then
$SUDO chown root:telemt "$CONFIG_PARENT_DIR" && $SUDO chmod 750 "$CONFIG_PARENT_DIR"
fi
}
stop_service() {
svc="$(get_svc_mgr)"
if [ "$svc" = "systemd" ] && systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then
$SUDO systemctl stop "$SERVICE_NAME" 2>/dev/null || true
elif [ "$svc" = "openrc" ] && rc-service "$SERVICE_NAME" status >/dev/null 2>&1; then
$SUDO rc-service "$SERVICE_NAME" stop 2>/dev/null || true
fi fi
} }
install_binary() { install_binary() {
src="$1" bin_src="$1"; bin_dst="$2"
dst="$2" if [ -e "$INSTALL_DIR" ] && [ ! -d "$INSTALL_DIR" ]; then
die "'$INSTALL_DIR' is not a directory."
fi
if [ -w "$INSTALL_DIR" ] || { [ ! -e "$INSTALL_DIR" ] && [ -w "$(dirname "$INSTALL_DIR")" ]; }; then $SUDO mkdir -p "$INSTALL_DIR" || die "Failed to create install directory"
mkdir -p "$INSTALL_DIR" if command -v install >/dev/null 2>&1; then
install -m 0755 "$src" "$dst" $SUDO install -m 0755 "$bin_src" "$bin_dst" || die "Failed to install binary"
elif command -v sudo >/dev/null 2>&1; then
sudo mkdir -p "$INSTALL_DIR"
sudo install -m 0755 "$src" "$dst"
else else
die "cannot write to $INSTALL_DIR and sudo is not available" $SUDO rm -f "$bin_dst" 2>/dev/null || true
$SUDO cp "$bin_src" "$bin_dst" && $SUDO chmod 0755 "$bin_dst" || die "Failed to copy binary"
fi
$SUDO sh -c '[ -x "$1" ]' _ "$bin_dst" || die "Binary not executable: $bin_dst"
if command -v setcap >/dev/null 2>&1; then
$SUDO setcap cap_net_bind_service=+ep "$bin_dst" 2>/dev/null || true
fi fi
} }
need_cmd uname generate_secret() {
need_cmd tar secret="$(command -v openssl >/dev/null 2>&1 && openssl rand -hex 16 2>/dev/null || true)"
need_cmd mktemp if [ -z "$secret" ] || [ "${#secret}" -ne 32 ]; then
need_cmd grep if command -v od >/dev/null 2>&1; then secret="$(dd if=/dev/urandom bs=16 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n')"
need_cmd install elif command -v hexdump >/dev/null 2>&1; then secret="$(dd if=/dev/urandom bs=16 count=1 2>/dev/null | hexdump -e '1/1 "%02x"')"
elif command -v xxd >/dev/null 2>&1; then secret="$(dd if=/dev/urandom bs=16 count=1 2>/dev/null | xxd -p | tr -d '\n')"
fi
fi
if [ "${#secret}" -eq 32 ]; then echo "$secret"; else return 1; fi
}
ARCH="$(detect_arch)" generate_config_content() {
OS="$(detect_os)" escaped_tls_domain="$(printf '%s\n' "$TLS_DOMAIN" | tr -d '[:cntrl:]' | sed 's/\\/\\\\/g; s/"/\\"/g')"
if [ "$OS" != "linux" ]; then cat <<EOF
case "$OS" in [general]
openbsd) use_middle_proxy = false
die "install.sh installs only Linux release artifacts. On OpenBSD, build from source (see docs/OPENBSD.en.md)."
;; [general.modes]
*) classic = false
die "unsupported operating system for install.sh: $OS" secure = false
;; tls = true
esac
[server]
port = 443
[server.api]
enabled = true
listen = "127.0.0.1:9091"
whitelist = ["127.0.0.1/32"]
[censorship]
tls_domain = "${escaped_tls_domain}"
[access.users]
hello = "$1"
EOF
}
install_config() {
if [ -n "$SUDO" ]; then
if $SUDO sh -c '[ -f "$1" ]' _ "$CONFIG_FILE"; then
say " -> Config already exists at $CONFIG_FILE. Skipping creation."
return 0
fi
elif [ -f "$CONFIG_FILE" ]; then
say " -> Config already exists at $CONFIG_FILE. Skipping creation."
return 0
fi fi
LIBC="$(detect_libc)" toml_secret="$(generate_secret)" || die "Failed to generate secret."
case "$VERSION" in generate_config_content "$toml_secret" | write_root "$CONFIG_FILE" || die "Failed to install config"
latest) $SUDO chown root:telemt "$CONFIG_FILE" && $SUDO chmod 640 "$CONFIG_FILE"
URL="https://github.com/$REPO/releases/latest/download/${BIN_NAME}-${ARCH}-linux-${LIBC}.tar.gz"
;; say " -> Config created successfully."
*) say " -> Generated secret for default user 'hello': $toml_secret"
URL="https://github.com/$REPO/releases/download/${VERSION}/${BIN_NAME}-${ARCH}-linux-${LIBC}.tar.gz" }
generate_systemd_content() {
cat <<EOF
[Unit]
Description=Telemt
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=telemt
Group=telemt
WorkingDirectory=$WORK_DIR
ExecStart="${INSTALL_DIR}/${BIN_NAME}" "${CONFIG_FILE}"
Restart=on-failure
LimitNOFILE=65536
AmbientCapabilities=CAP_NET_BIND_SERVICE
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
[Install]
WantedBy=multi-user.target
EOF
}
generate_openrc_content() {
cat <<EOF
#!/sbin/openrc-run
name="$SERVICE_NAME"
description="Telemt Proxy Service"
command="${INSTALL_DIR}/${BIN_NAME}"
command_args="${CONFIG_FILE}"
command_background=true
command_user="telemt:telemt"
pidfile="/run/\${RC_SVCNAME}.pid"
directory="${WORK_DIR}"
rc_ulimit="-n 65536"
depend() { need net; use logger; }
EOF
}
install_service() {
svc="$(get_svc_mgr)"
if [ "$svc" = "systemd" ]; then
generate_systemd_content | write_root "/etc/systemd/system/${SERVICE_NAME}.service"
$SUDO chown root:root "/etc/systemd/system/${SERVICE_NAME}.service" && $SUDO chmod 644 "/etc/systemd/system/${SERVICE_NAME}.service"
$SUDO systemctl daemon-reload || true
$SUDO systemctl enable "$SERVICE_NAME" || true
if ! $SUDO systemctl start "$SERVICE_NAME"; then
say "[WARNING] Failed to start service"
SERVICE_START_FAILED=1
fi
elif [ "$svc" = "openrc" ]; then
generate_openrc_content | write_root "/etc/init.d/${SERVICE_NAME}"
$SUDO chown root:root "/etc/init.d/${SERVICE_NAME}" && $SUDO chmod 0755 "/etc/init.d/${SERVICE_NAME}"
$SUDO rc-update add "$SERVICE_NAME" default 2>/dev/null || true
if ! $SUDO rc-service "$SERVICE_NAME" start 2>/dev/null; then
say "[WARNING] Failed to start service"
SERVICE_START_FAILED=1
fi
else
cmd="\"${INSTALL_DIR}/${BIN_NAME}\" \"${CONFIG_FILE}\""
if [ -n "$SUDO" ]; then
say " -> Service manager not found. Start manually: sudo -u telemt $cmd"
else
say " -> Service manager not found. Start manually: su -s /bin/sh telemt -c '$cmd'"
fi
fi
}
kill_user_procs() {
if command -v pkill >/dev/null 2>&1; then
$SUDO pkill -u telemt "$BIN_NAME" 2>/dev/null || true
sleep 1
$SUDO pkill -9 -u telemt "$BIN_NAME" 2>/dev/null || true
else
if command -v pgrep >/dev/null 2>&1; then
pids="$(pgrep -u telemt 2>/dev/null || true)"
else
pids="$(ps -u telemt -o pid= 2>/dev/null || true)"
fi
if [ -n "$pids" ]; then
for pid in $pids; do
case "$pid" in ''|*[!0-9]*) continue ;; *) $SUDO kill "$pid" 2>/dev/null || true ;; esac
done
sleep 1
for pid in $pids; do
case "$pid" in ''|*[!0-9]*) continue ;; *) $SUDO kill -9 "$pid" 2>/dev/null || true ;; esac
done
fi
fi
}
uninstall() {
say "Starting uninstallation of $BIN_NAME..."
say ">>> Stage 1: Stopping services"
stop_service
say ">>> Stage 2: Removing service configuration"
svc="$(get_svc_mgr)"
if [ "$svc" = "systemd" ]; then
$SUDO systemctl disable "$SERVICE_NAME" 2>/dev/null || true
$SUDO rm -f "/etc/systemd/system/${SERVICE_NAME}.service"
$SUDO systemctl daemon-reload 2>/dev/null || true
elif [ "$svc" = "openrc" ]; then
$SUDO rc-update del "$SERVICE_NAME" 2>/dev/null || true
$SUDO rm -f "/etc/init.d/${SERVICE_NAME}"
fi
say ">>> Stage 3: Terminating user processes"
kill_user_procs
say ">>> Stage 4: Removing binary"
$SUDO rm -f "${INSTALL_DIR}/${BIN_NAME}"
if [ "$ACTION" = "purge" ]; then
say ">>> Stage 5: Purging configuration, data, and user"
$SUDO rm -rf "$CONFIG_DIR" "$WORK_DIR"
$SUDO rm -f "$CONFIG_FILE"
if [ "$CONFIG_PARENT_DIR" != "$CONFIG_DIR" ] && [ "$CONFIG_PARENT_DIR" != "." ] && [ "$CONFIG_PARENT_DIR" != "/" ]; then
$SUDO rmdir "$CONFIG_PARENT_DIR" 2>/dev/null || true
fi
$SUDO userdel telemt 2>/dev/null || $SUDO deluser telemt 2>/dev/null || true
$SUDO groupdel telemt 2>/dev/null || $SUDO delgroup telemt 2>/dev/null || true
else
say "Note: Configuration and user kept. Run with 'purge' to remove completely."
fi
printf '\n====================================================================\n'
printf ' UNINSTALLATION COMPLETE\n'
printf '====================================================================\n\n'
exit 0
}
case "$ACTION" in
help) show_help ;;
uninstall|purge) verify_common; uninstall ;;
install)
say "Starting installation of $BIN_NAME (Version: $TARGET_VERSION)"
say ">>> Stage 1: Verifying environment and dependencies"
verify_common; verify_install_deps
if [ "$TARGET_VERSION" != "latest" ]; then
TARGET_VERSION="${TARGET_VERSION#v}"
fi
ARCH="$(detect_arch)"; LIBC="$(detect_libc)"
FILE_NAME="${BIN_NAME}-${ARCH}-linux-${LIBC}.tar.gz"
if [ "$TARGET_VERSION" = "latest" ]; then
DL_URL="https://github.com/${REPO}/releases/latest/download/${FILE_NAME}"
else
DL_URL="https://github.com/${REPO}/releases/download/${TARGET_VERSION}/${FILE_NAME}"
fi
say ">>> Stage 2: Downloading archive"
TEMP_DIR="$(mktemp -d)" || die "Temp directory creation failed"
if [ -z "$TEMP_DIR" ] || [ ! -d "$TEMP_DIR" ]; then
die "Temp directory is invalid or was not created"
fi
fetch_file "$DL_URL" "${TEMP_DIR}/${FILE_NAME}" || die "Download failed"
say ">>> Stage 3: Extracting archive"
if ! gzip -dc "${TEMP_DIR}/${FILE_NAME}" | tar -xf - -C "$TEMP_DIR" 2>/dev/null; then
die "Extraction failed (downloaded archive might be invalid or 404)."
fi
EXTRACTED_BIN="$(find "$TEMP_DIR" -type f -name "$BIN_NAME" -print 2>/dev/null | head -n 1 || true)"
[ -n "$EXTRACTED_BIN" ] || die "Binary '$BIN_NAME' not found in archive"
say ">>> Stage 4: Setting up environment (User, Group, Directories)"
ensure_user_group; setup_dirs; stop_service
say ">>> Stage 5: Installing binary"
install_binary "$EXTRACTED_BIN" "${INSTALL_DIR}/${BIN_NAME}"
say ">>> Stage 6: Generating configuration"
install_config
say ">>> Stage 7: Installing and starting service"
install_service
if [ "${SERVICE_START_FAILED:-0}" -eq 1 ]; then
printf '\n====================================================================\n'
printf ' INSTALLATION COMPLETED WITH WARNINGS\n'
printf '====================================================================\n\n'
printf 'The service was installed but failed to start automatically.\n'
printf 'Please check the logs to determine the issue.\n\n'
else
printf '\n====================================================================\n'
printf ' INSTALLATION SUCCESS\n'
printf '====================================================================\n\n'
fi
svc="$(get_svc_mgr)"
if [ "$svc" = "systemd" ]; then
printf 'To check the status of your proxy service, run:\n'
printf ' systemctl status %s\n\n' "$SERVICE_NAME"
elif [ "$svc" = "openrc" ]; then
printf 'To check the status of your proxy service, run:\n'
printf ' rc-service %s status\n\n' "$SERVICE_NAME"
fi
printf 'To get your user connection links (for Telegram), run:\n'
if command -v jq >/dev/null 2>&1; then
printf ' curl -s http://127.0.0.1:9091/v1/users | jq -r '\''.data[] | "User: \\(.username)\\n\\(.links.tls[0] // empty)\\n"'\''\n'
else
printf ' curl -s http://127.0.0.1:9091/v1/users\n'
printf ' (Tip: Install '\''jq'\'' for a much cleaner output)\n'
fi
printf '\n====================================================================\n'
;; ;;
esac esac
TMPDIR="$(mktemp -d)"
trap 'rm -rf "$TMPDIR"' EXIT INT TERM
say "Installing $BIN_NAME ($VERSION) for $ARCH-linux-$LIBC..."
fetch_to_stdout "$URL" | tar -xzf - -C "$TMPDIR"
[ -f "$TMPDIR/$BIN_NAME" ] || die "archive did not contain $BIN_NAME"
install_binary "$TMPDIR/$BIN_NAME" "$INSTALL_DIR/$BIN_NAME"
say "Installed: $INSTALL_DIR/$BIN_NAME"
"$INSTALL_DIR/$BIN_NAME" --version 2>/dev/null || true

View File

@@ -205,6 +205,16 @@ pub(super) struct ZeroPoolData {
pub(super) refill_failed_total: u64, pub(super) refill_failed_total: u64,
pub(super) writer_restored_same_endpoint_total: u64, pub(super) writer_restored_same_endpoint_total: u64,
pub(super) writer_restored_fallback_total: u64, pub(super) writer_restored_fallback_total: u64,
pub(super) teardown_attempt_total_normal: u64,
pub(super) teardown_attempt_total_hard_detach: u64,
pub(super) teardown_success_total_normal: u64,
pub(super) teardown_success_total_hard_detach: u64,
pub(super) teardown_timeout_total: u64,
pub(super) teardown_escalation_total: u64,
pub(super) teardown_noop_total: u64,
pub(super) teardown_cleanup_side_effect_failures_total: u64,
pub(super) teardown_duration_count_total: u64,
pub(super) teardown_duration_sum_seconds_total: f64,
} }
#[derive(Serialize, Clone)] #[derive(Serialize, Clone)]
@@ -364,6 +374,7 @@ pub(super) struct MinimalMeRuntimeData {
pub(super) me_reconnect_backoff_cap_ms: u64, pub(super) me_reconnect_backoff_cap_ms: u64,
pub(super) me_reconnect_fast_retry_count: u32, pub(super) me_reconnect_fast_retry_count: u32,
pub(super) me_pool_drain_ttl_secs: u64, pub(super) me_pool_drain_ttl_secs: u64,
pub(super) me_instadrain: bool,
pub(super) me_pool_drain_soft_evict_enabled: bool, pub(super) me_pool_drain_soft_evict_enabled: bool,
pub(super) me_pool_drain_soft_evict_grace_secs: u64, pub(super) me_pool_drain_soft_evict_grace_secs: u64,
pub(super) me_pool_drain_soft_evict_per_writer: u8, pub(super) me_pool_drain_soft_evict_per_writer: u8,

View File

@@ -4,6 +4,9 @@ use std::time::{SystemTime, UNIX_EPOCH};
use serde::Serialize; use serde::Serialize;
use crate::config::ProxyConfig; use crate::config::ProxyConfig;
use crate::stats::{
MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason, Stats,
};
use super::ApiShared; use super::ApiShared;
@@ -98,6 +101,50 @@ pub(super) struct RuntimeMeQualityCountersData {
pub(super) reconnect_success_total: u64, pub(super) reconnect_success_total: u64,
} }
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownAttemptData {
pub(super) reason: &'static str,
pub(super) mode: &'static str,
pub(super) total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownSuccessData {
pub(super) mode: &'static str,
pub(super) total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownSideEffectData {
pub(super) step: &'static str,
pub(super) total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownDurationBucketData {
pub(super) le_seconds: &'static str,
pub(super) total: u64,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownDurationData {
pub(super) mode: &'static str,
pub(super) count: u64,
pub(super) sum_seconds: f64,
pub(super) buckets: Vec<RuntimeMeQualityTeardownDurationBucketData>,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityTeardownData {
pub(super) attempts: Vec<RuntimeMeQualityTeardownAttemptData>,
pub(super) success: Vec<RuntimeMeQualityTeardownSuccessData>,
pub(super) timeout_total: u64,
pub(super) escalation_total: u64,
pub(super) noop_total: u64,
pub(super) cleanup_side_effect_failures: Vec<RuntimeMeQualityTeardownSideEffectData>,
pub(super) duration: Vec<RuntimeMeQualityTeardownDurationData>,
}
#[derive(Serialize)] #[derive(Serialize)]
pub(super) struct RuntimeMeQualityRouteDropData { pub(super) struct RuntimeMeQualityRouteDropData {
pub(super) no_conn_total: u64, pub(super) no_conn_total: u64,
@@ -107,6 +154,25 @@ pub(super) struct RuntimeMeQualityRouteDropData {
pub(super) queue_full_high_total: u64, pub(super) queue_full_high_total: u64,
} }
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityFamilyStateData {
pub(super) family: &'static str,
pub(super) state: &'static str,
pub(super) state_since_epoch_secs: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) suppressed_until_epoch_secs: Option<u64>,
pub(super) fail_streak: u32,
pub(super) recover_success_streak: u32,
}
#[derive(Serialize)]
pub(super) struct RuntimeMeQualityDrainGateData {
pub(super) route_quorum_ok: bool,
pub(super) redundancy_ok: bool,
pub(super) block_reason: &'static str,
pub(super) updated_at_epoch_secs: u64,
}
#[derive(Serialize)] #[derive(Serialize)]
pub(super) struct RuntimeMeQualityDcRttData { pub(super) struct RuntimeMeQualityDcRttData {
pub(super) dc: i16, pub(super) dc: i16,
@@ -120,7 +186,10 @@ pub(super) struct RuntimeMeQualityDcRttData {
#[derive(Serialize)] #[derive(Serialize)]
pub(super) struct RuntimeMeQualityPayload { pub(super) struct RuntimeMeQualityPayload {
pub(super) counters: RuntimeMeQualityCountersData, pub(super) counters: RuntimeMeQualityCountersData,
pub(super) teardown: RuntimeMeQualityTeardownData,
pub(super) route_drops: RuntimeMeQualityRouteDropData, pub(super) route_drops: RuntimeMeQualityRouteDropData,
pub(super) family_states: Vec<RuntimeMeQualityFamilyStateData>,
pub(super) drain_gate: RuntimeMeQualityDrainGateData,
pub(super) dc_rtt: Vec<RuntimeMeQualityDcRttData>, pub(super) dc_rtt: Vec<RuntimeMeQualityDcRttData>,
} }
@@ -361,6 +430,19 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
}; };
let status = pool.api_status_snapshot().await; let status = pool.api_status_snapshot().await;
let family_states = pool
.api_family_state_snapshot()
.into_iter()
.map(|entry| RuntimeMeQualityFamilyStateData {
family: entry.family,
state: entry.state,
state_since_epoch_secs: entry.state_since_epoch_secs,
suppressed_until_epoch_secs: entry.suppressed_until_epoch_secs,
fail_streak: entry.fail_streak,
recover_success_streak: entry.recover_success_streak,
})
.collect();
let drain_gate_snapshot = pool.api_drain_gate_snapshot();
RuntimeMeQualityData { RuntimeMeQualityData {
enabled: true, enabled: true,
reason: None, reason: None,
@@ -374,6 +456,7 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
reconnect_attempt_total: shared.stats.get_me_reconnect_attempts(), reconnect_attempt_total: shared.stats.get_me_reconnect_attempts(),
reconnect_success_total: shared.stats.get_me_reconnect_success(), reconnect_success_total: shared.stats.get_me_reconnect_success(),
}, },
teardown: build_runtime_me_teardown_data(shared),
route_drops: RuntimeMeQualityRouteDropData { route_drops: RuntimeMeQualityRouteDropData {
no_conn_total: shared.stats.get_me_route_drop_no_conn(), no_conn_total: shared.stats.get_me_route_drop_no_conn(),
channel_closed_total: shared.stats.get_me_route_drop_channel_closed(), channel_closed_total: shared.stats.get_me_route_drop_channel_closed(),
@@ -381,6 +464,13 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
queue_full_base_total: shared.stats.get_me_route_drop_queue_full_base(), queue_full_base_total: shared.stats.get_me_route_drop_queue_full_base(),
queue_full_high_total: shared.stats.get_me_route_drop_queue_full_high(), queue_full_high_total: shared.stats.get_me_route_drop_queue_full_high(),
}, },
family_states,
drain_gate: RuntimeMeQualityDrainGateData {
route_quorum_ok: drain_gate_snapshot.route_quorum_ok,
redundancy_ok: drain_gate_snapshot.redundancy_ok,
block_reason: drain_gate_snapshot.block_reason,
updated_at_epoch_secs: drain_gate_snapshot.updated_at_epoch_secs,
},
dc_rtt: status dc_rtt: status
.dcs .dcs
.into_iter() .into_iter()
@@ -397,6 +487,81 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime
} }
} }
fn build_runtime_me_teardown_data(shared: &ApiShared) -> RuntimeMeQualityTeardownData {
let attempts = MeWriterTeardownReason::ALL
.iter()
.copied()
.flat_map(|reason| {
MeWriterTeardownMode::ALL
.iter()
.copied()
.map(move |mode| RuntimeMeQualityTeardownAttemptData {
reason: reason.as_str(),
mode: mode.as_str(),
total: shared.stats.get_me_writer_teardown_attempt_total(reason, mode),
})
})
.collect();
let success = MeWriterTeardownMode::ALL
.iter()
.copied()
.map(|mode| RuntimeMeQualityTeardownSuccessData {
mode: mode.as_str(),
total: shared.stats.get_me_writer_teardown_success_total(mode),
})
.collect();
let cleanup_side_effect_failures = MeWriterCleanupSideEffectStep::ALL
.iter()
.copied()
.map(|step| RuntimeMeQualityTeardownSideEffectData {
step: step.as_str(),
total: shared
.stats
.get_me_writer_cleanup_side_effect_failures_total(step),
})
.collect();
let duration = MeWriterTeardownMode::ALL
.iter()
.copied()
.map(|mode| {
let count = shared.stats.get_me_writer_teardown_duration_count(mode);
let mut buckets: Vec<RuntimeMeQualityTeardownDurationBucketData> = Stats::me_writer_teardown_duration_bucket_labels()
.iter()
.enumerate()
.map(|(bucket_idx, label)| RuntimeMeQualityTeardownDurationBucketData {
le_seconds: label,
total: shared
.stats
.get_me_writer_teardown_duration_bucket_total(mode, bucket_idx),
})
.collect();
buckets.push(RuntimeMeQualityTeardownDurationBucketData {
le_seconds: "+Inf",
total: count,
});
RuntimeMeQualityTeardownDurationData {
mode: mode.as_str(),
count,
sum_seconds: shared.stats.get_me_writer_teardown_duration_sum_seconds(mode),
buckets,
}
})
.collect();
RuntimeMeQualityTeardownData {
attempts,
success,
timeout_total: shared.stats.get_me_writer_teardown_timeout_total(),
escalation_total: shared.stats.get_me_writer_teardown_escalation_total(),
noop_total: shared.stats.get_me_writer_teardown_noop_total(),
cleanup_side_effect_failures,
duration,
}
}
pub(super) async fn build_runtime_upstream_quality_data( pub(super) async fn build_runtime_upstream_quality_data(
shared: &ApiShared, shared: &ApiShared,
) -> RuntimeUpstreamQualityData { ) -> RuntimeUpstreamQualityData {

View File

@@ -1,7 +1,7 @@
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use crate::config::ApiConfig; use crate::config::ApiConfig;
use crate::stats::Stats; use crate::stats::{MeWriterTeardownMode, Stats};
use crate::transport::upstream::IpPreference; use crate::transport::upstream::IpPreference;
use crate::transport::UpstreamRouteKind; use crate::transport::UpstreamRouteKind;
@@ -106,6 +106,29 @@ pub(super) fn build_zero_all_data(stats: &Stats, configured_users: usize) -> Zer
refill_failed_total: stats.get_me_refill_failed_total(), refill_failed_total: stats.get_me_refill_failed_total(),
writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(), writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(),
writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(), writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(),
teardown_attempt_total_normal: stats
.get_me_writer_teardown_attempt_total_by_mode(MeWriterTeardownMode::Normal),
teardown_attempt_total_hard_detach: stats
.get_me_writer_teardown_attempt_total_by_mode(MeWriterTeardownMode::HardDetach),
teardown_success_total_normal: stats
.get_me_writer_teardown_success_total(MeWriterTeardownMode::Normal),
teardown_success_total_hard_detach: stats
.get_me_writer_teardown_success_total(MeWriterTeardownMode::HardDetach),
teardown_timeout_total: stats.get_me_writer_teardown_timeout_total(),
teardown_escalation_total: stats.get_me_writer_teardown_escalation_total(),
teardown_noop_total: stats.get_me_writer_teardown_noop_total(),
teardown_cleanup_side_effect_failures_total: stats
.get_me_writer_cleanup_side_effect_failures_total_all(),
teardown_duration_count_total: stats
.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal)
.saturating_add(
stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::HardDetach),
),
teardown_duration_sum_seconds_total: stats
.get_me_writer_teardown_duration_sum_seconds(MeWriterTeardownMode::Normal)
+ stats.get_me_writer_teardown_duration_sum_seconds(
MeWriterTeardownMode::HardDetach,
),
}, },
desync: ZeroDesyncData { desync: ZeroDesyncData {
secure_padding_invalid_total: stats.get_secure_padding_invalid(), secure_padding_invalid_total: stats.get_secure_padding_invalid(),
@@ -431,6 +454,7 @@ async fn get_minimal_payload_cached(
me_reconnect_backoff_cap_ms: runtime.me_reconnect_backoff_cap_ms, me_reconnect_backoff_cap_ms: runtime.me_reconnect_backoff_cap_ms,
me_reconnect_fast_retry_count: runtime.me_reconnect_fast_retry_count, me_reconnect_fast_retry_count: runtime.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: runtime.me_pool_drain_ttl_secs, me_pool_drain_ttl_secs: runtime.me_pool_drain_ttl_secs,
me_instadrain: runtime.me_instadrain,
me_pool_drain_soft_evict_enabled: runtime.me_pool_drain_soft_evict_enabled, me_pool_drain_soft_evict_enabled: runtime.me_pool_drain_soft_evict_enabled,
me_pool_drain_soft_evict_grace_secs: runtime.me_pool_drain_soft_evict_grace_secs, me_pool_drain_soft_evict_grace_secs: runtime.me_pool_drain_soft_evict_grace_secs,
me_pool_drain_soft_evict_per_writer: runtime.me_pool_drain_soft_evict_per_writer, me_pool_drain_soft_evict_per_writer: runtime.me_pool_drain_soft_evict_per_writer,

View File

@@ -198,8 +198,15 @@ desync_all_full = false
update_every = 43200 update_every = 43200
hardswap = false hardswap = false
me_pool_drain_ttl_secs = 90 me_pool_drain_ttl_secs = 90
me_instadrain = false
me_pool_drain_threshold = 32
me_pool_drain_soft_evict_grace_secs = 10
me_pool_drain_soft_evict_per_writer = 2
me_pool_drain_soft_evict_budget_per_core = 16
me_pool_drain_soft_evict_cooldown_ms = 1000
me_bind_stale_mode = "never"
me_pool_min_fresh_ratio = 0.8 me_pool_min_fresh_ratio = 0.8
me_reinit_drain_timeout_secs = 120 me_reinit_drain_timeout_secs = 90
[network] [network]
ipv4 = true ipv4 = true

View File

@@ -40,10 +40,10 @@ const DEFAULT_ME_ROUTE_HYBRID_MAX_WAIT_MS: u64 = 3000;
const DEFAULT_ME_ROUTE_BLOCKING_SEND_TIMEOUT_MS: u64 = 250; const DEFAULT_ME_ROUTE_BLOCKING_SEND_TIMEOUT_MS: u64 = 250;
const DEFAULT_ME_C2ME_SEND_TIMEOUT_MS: u64 = 4000; const DEFAULT_ME_C2ME_SEND_TIMEOUT_MS: u64 = 4000;
const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_ENABLED: bool = true; const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_ENABLED: bool = true;
const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_GRACE_SECS: u64 = 30; const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_GRACE_SECS: u64 = 10;
const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_PER_WRITER: u8 = 1; const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_PER_WRITER: u8 = 2;
const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_BUDGET_PER_CORE: u16 = 8; const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_BUDGET_PER_CORE: u16 = 16;
const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_COOLDOWN_MS: u64 = 5000; const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_COOLDOWN_MS: u64 = 1000;
const DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS: u64 = 30; const DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS: u64 = 30;
const DEFAULT_ACCEPT_PERMIT_TIMEOUT_MS: u64 = 250; const DEFAULT_ACCEPT_PERMIT_TIMEOUT_MS: u64 = 250;
const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 2; const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 2;
@@ -65,6 +65,10 @@ pub(crate) fn default_tls_domain() -> String {
"petrovich.ru".to_string() "petrovich.ru".to_string()
} }
pub(crate) fn default_tls_fetch_scope() -> String {
String::new()
}
pub(crate) fn default_mask_port() -> u16 { pub(crate) fn default_mask_port() -> u16 {
443 443
} }
@@ -606,15 +610,19 @@ pub(crate) fn default_proxy_secret_len_max() -> usize {
} }
pub(crate) fn default_me_reinit_drain_timeout_secs() -> u64 { pub(crate) fn default_me_reinit_drain_timeout_secs() -> u64 {
120 90
} }
pub(crate) fn default_me_pool_drain_ttl_secs() -> u64 { pub(crate) fn default_me_pool_drain_ttl_secs() -> u64 {
90 90
} }
pub(crate) fn default_me_instadrain() -> bool {
false
}
pub(crate) fn default_me_pool_drain_threshold() -> u64 { pub(crate) fn default_me_pool_drain_threshold() -> u64 {
128 32
} }
pub(crate) fn default_me_pool_drain_soft_evict_enabled() -> bool { pub(crate) fn default_me_pool_drain_soft_evict_enabled() -> bool {

View File

@@ -56,6 +56,7 @@ pub struct HotFields {
pub me_reinit_coalesce_window_ms: u64, pub me_reinit_coalesce_window_ms: u64,
pub hardswap: bool, pub hardswap: bool,
pub me_pool_drain_ttl_secs: u64, pub me_pool_drain_ttl_secs: u64,
pub me_instadrain: bool,
pub me_pool_drain_threshold: u64, pub me_pool_drain_threshold: u64,
pub me_pool_drain_soft_evict_enabled: bool, pub me_pool_drain_soft_evict_enabled: bool,
pub me_pool_drain_soft_evict_grace_secs: u64, pub me_pool_drain_soft_evict_grace_secs: u64,
@@ -143,6 +144,7 @@ impl HotFields {
me_reinit_coalesce_window_ms: cfg.general.me_reinit_coalesce_window_ms, me_reinit_coalesce_window_ms: cfg.general.me_reinit_coalesce_window_ms,
hardswap: cfg.general.hardswap, hardswap: cfg.general.hardswap,
me_pool_drain_ttl_secs: cfg.general.me_pool_drain_ttl_secs, me_pool_drain_ttl_secs: cfg.general.me_pool_drain_ttl_secs,
me_instadrain: cfg.general.me_instadrain,
me_pool_drain_threshold: cfg.general.me_pool_drain_threshold, me_pool_drain_threshold: cfg.general.me_pool_drain_threshold,
me_pool_drain_soft_evict_enabled: cfg.general.me_pool_drain_soft_evict_enabled, me_pool_drain_soft_evict_enabled: cfg.general.me_pool_drain_soft_evict_enabled,
me_pool_drain_soft_evict_grace_secs: cfg.general.me_pool_drain_soft_evict_grace_secs, me_pool_drain_soft_evict_grace_secs: cfg.general.me_pool_drain_soft_evict_grace_secs,
@@ -477,6 +479,7 @@ fn overlay_hot_fields(old: &ProxyConfig, new: &ProxyConfig) -> ProxyConfig {
cfg.general.me_reinit_coalesce_window_ms = new.general.me_reinit_coalesce_window_ms; cfg.general.me_reinit_coalesce_window_ms = new.general.me_reinit_coalesce_window_ms;
cfg.general.hardswap = new.general.hardswap; cfg.general.hardswap = new.general.hardswap;
cfg.general.me_pool_drain_ttl_secs = new.general.me_pool_drain_ttl_secs; cfg.general.me_pool_drain_ttl_secs = new.general.me_pool_drain_ttl_secs;
cfg.general.me_instadrain = new.general.me_instadrain;
cfg.general.me_pool_drain_threshold = new.general.me_pool_drain_threshold; cfg.general.me_pool_drain_threshold = new.general.me_pool_drain_threshold;
cfg.general.me_pool_drain_soft_evict_enabled = new.general.me_pool_drain_soft_evict_enabled; cfg.general.me_pool_drain_soft_evict_enabled = new.general.me_pool_drain_soft_evict_enabled;
cfg.general.me_pool_drain_soft_evict_grace_secs = cfg.general.me_pool_drain_soft_evict_grace_secs =
@@ -620,6 +623,7 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
} }
if old.censorship.tls_domain != new.censorship.tls_domain if old.censorship.tls_domain != new.censorship.tls_domain
|| old.censorship.tls_domains != new.censorship.tls_domains || old.censorship.tls_domains != new.censorship.tls_domains
|| old.censorship.tls_fetch_scope != new.censorship.tls_fetch_scope
|| old.censorship.mask != new.censorship.mask || old.censorship.mask != new.censorship.mask
|| old.censorship.mask_host != new.censorship.mask_host || old.censorship.mask_host != new.censorship.mask_host
|| old.censorship.mask_port != new.censorship.mask_port || old.censorship.mask_port != new.censorship.mask_port
@@ -869,6 +873,12 @@ fn log_changes(
old_hot.me_pool_drain_ttl_secs, new_hot.me_pool_drain_ttl_secs, old_hot.me_pool_drain_ttl_secs, new_hot.me_pool_drain_ttl_secs,
); );
} }
if old_hot.me_instadrain != new_hot.me_instadrain {
info!(
"config reload: me_instadrain: {} → {}",
old_hot.me_instadrain, new_hot.me_instadrain,
);
}
if old_hot.me_pool_drain_threshold != new_hot.me_pool_drain_threshold { if old_hot.me_pool_drain_threshold != new_hot.me_pool_drain_threshold {
info!( info!(

View File

@@ -612,6 +612,11 @@ impl ProxyConfig {
"general.me_route_backpressure_base_timeout_ms must be > 0".to_string(), "general.me_route_backpressure_base_timeout_ms must be > 0".to_string(),
)); ));
} }
if config.general.me_route_backpressure_base_timeout_ms > 5000 {
return Err(ProxyError::Config(
"general.me_route_backpressure_base_timeout_ms must be within [1, 5000]".to_string(),
));
}
if config.general.me_route_backpressure_high_timeout_ms if config.general.me_route_backpressure_high_timeout_ms
< config.general.me_route_backpressure_base_timeout_ms < config.general.me_route_backpressure_base_timeout_ms
@@ -620,6 +625,11 @@ impl ProxyConfig {
"general.me_route_backpressure_high_timeout_ms must be >= general.me_route_backpressure_base_timeout_ms".to_string(), "general.me_route_backpressure_high_timeout_ms must be >= general.me_route_backpressure_base_timeout_ms".to_string(),
)); ));
} }
if config.general.me_route_backpressure_high_timeout_ms > 5000 {
return Err(ProxyError::Config(
"general.me_route_backpressure_high_timeout_ms must be within [1, 5000]".to_string(),
));
}
if !(1..=100).contains(&config.general.me_route_backpressure_high_watermark_pct) { if !(1..=100).contains(&config.general.me_route_backpressure_high_watermark_pct) {
return Err(ProxyError::Config( return Err(ProxyError::Config(
@@ -769,6 +779,9 @@ impl ProxyConfig {
config.censorship.mask_host = Some(config.censorship.tls_domain.clone()); config.censorship.mask_host = Some(config.censorship.tls_domain.clone());
} }
// Normalize optional TLS fetch scope: whitespace-only values disable scoped routing.
config.censorship.tls_fetch_scope = config.censorship.tls_fetch_scope.trim().to_string();
// Merge primary + extra TLS domains, deduplicate (primary always first). // Merge primary + extra TLS domains, deduplicate (primary always first).
if !config.censorship.tls_domains.is_empty() { if !config.censorship.tls_domains.is_empty() {
let mut all = Vec::with_capacity(1 + config.censorship.tls_domains.len()); let mut all = Vec::with_capacity(1 + config.censorship.tls_domains.len());
@@ -1624,6 +1637,47 @@ mod tests {
let _ = std::fs::remove_file(path_valid); let _ = std::fs::remove_file(path_valid);
} }
#[test]
fn me_route_backpressure_base_timeout_ms_out_of_range_is_rejected() {
let toml = r#"
[general]
me_route_backpressure_base_timeout_ms = 5001
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_route_backpressure_base_timeout_ms_out_of_range_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.me_route_backpressure_base_timeout_ms must be within [1, 5000]"));
let _ = std::fs::remove_file(path);
}
#[test]
fn me_route_backpressure_high_timeout_ms_out_of_range_is_rejected() {
let toml = r#"
[general]
me_route_backpressure_base_timeout_ms = 100
me_route_backpressure_high_timeout_ms = 5001
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_me_route_backpressure_high_timeout_ms_out_of_range_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.me_route_backpressure_high_timeout_ms must be within [1, 5000]"));
let _ = std::fs::remove_file(path);
}
#[test] #[test]
fn me_route_no_writer_wait_ms_out_of_range_is_rejected() { fn me_route_no_writer_wait_ms_out_of_range_is_rejected() {
let toml = r#" let toml = r#"
@@ -1986,6 +2040,45 @@ mod tests {
let _ = std::fs::remove_file(path); let _ = std::fs::remove_file(path);
} }
#[test]
fn force_close_default_matches_drain_ttl() {
let toml = r#"
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_force_close_default_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert_eq!(cfg.general.me_reinit_drain_timeout_secs, 90);
assert_eq!(cfg.general.effective_me_pool_force_close_secs(), 90);
let _ = std::fs::remove_file(path);
}
#[test]
fn force_close_zero_uses_runtime_safety_fallback() {
let toml = r#"
[general]
me_reinit_drain_timeout_secs = 0
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_force_close_zero_fallback_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert_eq!(cfg.general.me_reinit_drain_timeout_secs, 0);
assert_eq!(cfg.general.effective_me_pool_force_close_secs(), 300);
let _ = std::fs::remove_file(path);
}
#[test] #[test]
fn force_close_bumped_when_below_drain_ttl() { fn force_close_bumped_when_below_drain_ttl() {
let toml = r#" let toml = r#"
@@ -2007,6 +2100,59 @@ mod tests {
let _ = std::fs::remove_file(path); let _ = std::fs::remove_file(path);
} }
#[test]
fn tls_fetch_scope_default_is_empty() {
let toml = r#"
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_tls_fetch_scope_default_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert!(cfg.censorship.tls_fetch_scope.is_empty());
let _ = std::fs::remove_file(path);
}
#[test]
fn tls_fetch_scope_is_trimmed_during_load() {
let toml = r#"
[censorship]
tls_domain = "example.com"
tls_fetch_scope = " me "
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_tls_fetch_scope_trim_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert_eq!(cfg.censorship.tls_fetch_scope, "me");
let _ = std::fs::remove_file(path);
}
#[test]
fn tls_fetch_scope_whitespace_becomes_empty() {
let toml = r#"
[censorship]
tls_domain = "example.com"
tls_fetch_scope = " "
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_tls_fetch_scope_blank_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert!(cfg.censorship.tls_fetch_scope.is_empty());
let _ = std::fs::remove_file(path);
}
#[test] #[test]
fn invalid_ad_tag_is_disabled_during_load() { fn invalid_ad_tag_is_disabled_during_load() {
let toml = r#" let toml = r#"

View File

@@ -135,8 +135,8 @@ impl MeSocksKdfPolicy {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum MeBindStaleMode { pub enum MeBindStaleMode {
Never,
#[default] #[default]
Never,
Ttl, Ttl,
Always, Always,
} }
@@ -812,6 +812,10 @@ pub struct GeneralConfig {
#[serde(default = "default_me_pool_drain_ttl_secs")] #[serde(default = "default_me_pool_drain_ttl_secs")]
pub me_pool_drain_ttl_secs: u64, pub me_pool_drain_ttl_secs: u64,
/// Force-remove any draining writer on the next cleanup tick, regardless of age/deadline.
#[serde(default = "default_me_instadrain")]
pub me_instadrain: bool,
/// Maximum allowed number of draining ME writers before oldest ones are force-closed in batches. /// Maximum allowed number of draining ME writers before oldest ones are force-closed in batches.
/// Set to 0 to disable threshold-based draining cleanup and keep timeout-only behavior. /// Set to 0 to disable threshold-based draining cleanup and keep timeout-only behavior.
#[serde(default = "default_me_pool_drain_threshold")] #[serde(default = "default_me_pool_drain_threshold")]
@@ -851,7 +855,7 @@ pub struct GeneralConfig {
pub me_pool_min_fresh_ratio: f32, pub me_pool_min_fresh_ratio: f32,
/// Drain timeout in seconds for stale ME writers after endpoint map changes. /// Drain timeout in seconds for stale ME writers after endpoint map changes.
/// Set to 0 to keep stale writers draining indefinitely (no force-close). /// Set to 0 to use the runtime safety fallback timeout.
#[serde(default = "default_me_reinit_drain_timeout_secs")] #[serde(default = "default_me_reinit_drain_timeout_secs")]
pub me_reinit_drain_timeout_secs: u64, pub me_reinit_drain_timeout_secs: u64,
@@ -1020,6 +1024,7 @@ impl Default for GeneralConfig {
me_secret_atomic_snapshot: default_me_secret_atomic_snapshot(), me_secret_atomic_snapshot: default_me_secret_atomic_snapshot(),
proxy_secret_len_max: default_proxy_secret_len_max(), proxy_secret_len_max: default_proxy_secret_len_max(),
me_pool_drain_ttl_secs: default_me_pool_drain_ttl_secs(), me_pool_drain_ttl_secs: default_me_pool_drain_ttl_secs(),
me_instadrain: default_me_instadrain(),
me_pool_drain_threshold: default_me_pool_drain_threshold(), me_pool_drain_threshold: default_me_pool_drain_threshold(),
me_pool_drain_soft_evict_enabled: default_me_pool_drain_soft_evict_enabled(), me_pool_drain_soft_evict_enabled: default_me_pool_drain_soft_evict_enabled(),
me_pool_drain_soft_evict_grace_secs: default_me_pool_drain_soft_evict_grace_secs(), me_pool_drain_soft_evict_grace_secs: default_me_pool_drain_soft_evict_grace_secs(),
@@ -1063,10 +1068,15 @@ impl GeneralConfig {
/// Resolve force-close timeout for stale writers. /// Resolve force-close timeout for stale writers.
/// `me_reinit_drain_timeout_secs` remains backward-compatible alias. /// `me_reinit_drain_timeout_secs` remains backward-compatible alias.
/// A configured `0` uses the runtime safety fallback (300s).
pub fn effective_me_pool_force_close_secs(&self) -> u64 { pub fn effective_me_pool_force_close_secs(&self) -> u64 {
if self.me_reinit_drain_timeout_secs == 0 {
300
} else {
self.me_reinit_drain_timeout_secs self.me_reinit_drain_timeout_secs
} }
} }
}
/// `[general.links]` — proxy link generation settings. /// `[general.links]` — proxy link generation settings.
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -1298,6 +1308,11 @@ pub struct AntiCensorshipConfig {
#[serde(default)] #[serde(default)]
pub tls_domains: Vec<String>, pub tls_domains: Vec<String>,
/// Upstream scope used for TLS front metadata fetches.
/// Empty value keeps default upstream routing behavior.
#[serde(default = "default_tls_fetch_scope")]
pub tls_fetch_scope: String,
#[serde(default = "default_true")] #[serde(default = "default_true")]
pub mask: bool, pub mask: bool,
@@ -1355,6 +1370,7 @@ impl Default for AntiCensorshipConfig {
Self { Self {
tls_domain: default_tls_domain(), tls_domain: default_tls_domain(),
tls_domains: Vec::new(), tls_domains: Vec::new(),
tls_fetch_scope: default_tls_fetch_scope(),
mask: default_true(), mask: default_true(),
mask_host: None, mask_host: None,
mask_port: default_mask_port(), mask_port: default_mask_port(),

View File

@@ -237,6 +237,7 @@ pub(crate) async fn initialize_me_pool(
config.general.me_adaptive_floor_max_warm_writers_global, config.general.me_adaptive_floor_max_warm_writers_global,
config.general.hardswap, config.general.hardswap,
config.general.me_pool_drain_ttl_secs, config.general.me_pool_drain_ttl_secs,
config.general.me_instadrain,
config.general.me_pool_drain_threshold, config.general.me_pool_drain_threshold,
config.general.me_pool_drain_soft_evict_enabled, config.general.me_pool_drain_soft_evict_enabled,
config.general.me_pool_drain_soft_evict_grace_secs, config.general.me_pool_drain_soft_evict_grace_secs,
@@ -331,18 +332,76 @@ pub(crate) async fn initialize_me_pool(
"Middle-End pool initialized successfully" "Middle-End pool initialized successfully"
); );
// ── Supervised background tasks ──────────────────
// Each task runs inside a nested tokio::spawn so
// that a panic is caught via JoinHandle and the
// outer loop restarts the task automatically.
let pool_health = pool_bg.clone(); let pool_health = pool_bg.clone();
let rng_health = rng_bg.clone(); let rng_health = rng_bg.clone();
let min_conns = pool_size; let min_conns = pool_size;
tokio::spawn(async move { tokio::spawn(async move {
loop {
let p = pool_health.clone();
let r = rng_health.clone();
let res = tokio::spawn(async move {
crate::transport::middle_proxy::me_health_monitor( crate::transport::middle_proxy::me_health_monitor(
pool_health, p, r, min_conns,
rng_health,
min_conns,
) )
.await; .await;
})
.await;
match res {
Ok(()) => warn!("me_health_monitor exited unexpectedly, restarting"),
Err(e) => {
error!(error = %e, "me_health_monitor panicked, restarting in 1s");
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
}
}); });
break; let pool_drain_enforcer = pool_bg.clone();
tokio::spawn(async move {
loop {
let p = pool_drain_enforcer.clone();
let res = tokio::spawn(async move {
crate::transport::middle_proxy::me_drain_timeout_enforcer(p).await;
})
.await;
match res {
Ok(()) => warn!("me_drain_timeout_enforcer exited unexpectedly, restarting"),
Err(e) => {
error!(error = %e, "me_drain_timeout_enforcer panicked, restarting in 1s");
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
}
});
let pool_watchdog = pool_bg.clone();
tokio::spawn(async move {
loop {
let p = pool_watchdog.clone();
let res = tokio::spawn(async move {
crate::transport::middle_proxy::me_zombie_writer_watchdog(p).await;
})
.await;
match res {
Ok(()) => warn!("me_zombie_writer_watchdog exited unexpectedly, restarting"),
Err(e) => {
error!(error = %e, "me_zombie_writer_watchdog panicked, restarting in 1s");
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
}
});
// CRITICAL: keep the current-thread runtime
// alive. Without this, block_on() returns,
// the Runtime is dropped, and ALL spawned
// background tasks (health monitor, drain
// enforcer, zombie watchdog) are silently
// cancelled — causing the draining-writer
// leak that brought us here.
std::future::pending::<()>().await;
unreachable!();
} }
Err(e) => { Err(e) => {
startup_tracker_bg.set_me_last_error(Some(e.to_string())).await; startup_tracker_bg.set_me_last_error(Some(e.to_string())).await;
@@ -400,14 +459,63 @@ pub(crate) async fn initialize_me_pool(
"Middle-End pool initialized successfully" "Middle-End pool initialized successfully"
); );
// ── Supervised background tasks ──────────────────
let pool_clone = pool.clone(); let pool_clone = pool.clone();
let rng_clone = rng.clone(); let rng_clone = rng.clone();
let min_conns = pool_size; let min_conns = pool_size;
tokio::spawn(async move { tokio::spawn(async move {
loop {
let p = pool_clone.clone();
let r = rng_clone.clone();
let res = tokio::spawn(async move {
crate::transport::middle_proxy::me_health_monitor( crate::transport::middle_proxy::me_health_monitor(
pool_clone, rng_clone, min_conns, p, r, min_conns,
) )
.await; .await;
})
.await;
match res {
Ok(()) => warn!("me_health_monitor exited unexpectedly, restarting"),
Err(e) => {
error!(error = %e, "me_health_monitor panicked, restarting in 1s");
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
}
});
let pool_drain_enforcer = pool.clone();
tokio::spawn(async move {
loop {
let p = pool_drain_enforcer.clone();
let res = tokio::spawn(async move {
crate::transport::middle_proxy::me_drain_timeout_enforcer(p).await;
})
.await;
match res {
Ok(()) => warn!("me_drain_timeout_enforcer exited unexpectedly, restarting"),
Err(e) => {
error!(error = %e, "me_drain_timeout_enforcer panicked, restarting in 1s");
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
}
});
let pool_watchdog = pool.clone();
tokio::spawn(async move {
loop {
let p = pool_watchdog.clone();
let res = tokio::spawn(async move {
crate::transport::middle_proxy::me_zombie_writer_watchdog(p).await;
})
.await;
match res {
Ok(()) => warn!("me_zombie_writer_watchdog exited unexpectedly, restarting"),
Err(e) => {
error!(error = %e, "me_zombie_writer_watchdog panicked, restarting in 1s");
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
}
}); });
break Some(pool); break Some(pool);

View File

@@ -38,12 +38,15 @@ pub(crate) async fn bootstrap_tls_front(
.clone() .clone()
.unwrap_or_else(|| config.censorship.tls_domain.clone()); .unwrap_or_else(|| config.censorship.tls_domain.clone());
let mask_unix_sock = config.censorship.mask_unix_sock.clone(); let mask_unix_sock = config.censorship.mask_unix_sock.clone();
let tls_fetch_scope = (!config.censorship.tls_fetch_scope.is_empty())
.then(|| config.censorship.tls_fetch_scope.clone());
let fetch_timeout = Duration::from_secs(5); let fetch_timeout = Duration::from_secs(5);
let cache_initial = cache.clone(); let cache_initial = cache.clone();
let domains_initial = tls_domains.to_vec(); let domains_initial = tls_domains.to_vec();
let host_initial = mask_host.clone(); let host_initial = mask_host.clone();
let unix_sock_initial = mask_unix_sock.clone(); let unix_sock_initial = mask_unix_sock.clone();
let scope_initial = tls_fetch_scope.clone();
let upstream_initial = upstream_manager.clone(); let upstream_initial = upstream_manager.clone();
tokio::spawn(async move { tokio::spawn(async move {
let mut join = tokio::task::JoinSet::new(); let mut join = tokio::task::JoinSet::new();
@@ -51,6 +54,7 @@ pub(crate) async fn bootstrap_tls_front(
let cache_domain = cache_initial.clone(); let cache_domain = cache_initial.clone();
let host_domain = host_initial.clone(); let host_domain = host_initial.clone();
let unix_sock_domain = unix_sock_initial.clone(); let unix_sock_domain = unix_sock_initial.clone();
let scope_domain = scope_initial.clone();
let upstream_domain = upstream_initial.clone(); let upstream_domain = upstream_initial.clone();
join.spawn(async move { join.spawn(async move {
match crate::tls_front::fetcher::fetch_real_tls( match crate::tls_front::fetcher::fetch_real_tls(
@@ -59,6 +63,7 @@ pub(crate) async fn bootstrap_tls_front(
&domain, &domain,
fetch_timeout, fetch_timeout,
Some(upstream_domain), Some(upstream_domain),
scope_domain.as_deref(),
proxy_protocol, proxy_protocol,
unix_sock_domain.as_deref(), unix_sock_domain.as_deref(),
) )
@@ -100,6 +105,7 @@ pub(crate) async fn bootstrap_tls_front(
let domains_refresh = tls_domains.to_vec(); let domains_refresh = tls_domains.to_vec();
let host_refresh = mask_host.clone(); let host_refresh = mask_host.clone();
let unix_sock_refresh = mask_unix_sock.clone(); let unix_sock_refresh = mask_unix_sock.clone();
let scope_refresh = tls_fetch_scope.clone();
let upstream_refresh = upstream_manager.clone(); let upstream_refresh = upstream_manager.clone();
tokio::spawn(async move { tokio::spawn(async move {
loop { loop {
@@ -112,6 +118,7 @@ pub(crate) async fn bootstrap_tls_front(
let cache_domain = cache_refresh.clone(); let cache_domain = cache_refresh.clone();
let host_domain = host_refresh.clone(); let host_domain = host_refresh.clone();
let unix_sock_domain = unix_sock_refresh.clone(); let unix_sock_domain = unix_sock_refresh.clone();
let scope_domain = scope_refresh.clone();
let upstream_domain = upstream_refresh.clone(); let upstream_domain = upstream_refresh.clone();
join.spawn(async move { join.spawn(async move {
match crate::tls_front::fetcher::fetch_real_tls( match crate::tls_front::fetcher::fetch_real_tls(
@@ -120,6 +127,7 @@ pub(crate) async fn bootstrap_tls_front(
&domain, &domain,
fetch_timeout, fetch_timeout,
Some(upstream_domain), Some(upstream_domain),
scope_domain.as_deref(),
proxy_protocol, proxy_protocol,
unix_sock_domain.as_deref(), unix_sock_domain.as_deref(),
) )

View File

@@ -16,7 +16,9 @@ use tracing::{info, warn, debug};
use crate::config::ProxyConfig; use crate::config::ProxyConfig;
use crate::ip_tracker::UserIpTracker; use crate::ip_tracker::UserIpTracker;
use crate::stats::beobachten::BeobachtenStore; use crate::stats::beobachten::BeobachtenStore;
use crate::stats::Stats; use crate::stats::{
MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason, Stats,
};
use crate::transport::{ListenOptions, create_listener}; use crate::transport::{ListenOptions, create_listener};
pub async fn serve( pub async fn serve(
@@ -1692,6 +1694,57 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
} }
); );
let _ = writeln!(
out,
"# HELP telemt_me_writer_close_signal_drop_total Close-signal drops for already-removed ME writers"
);
let _ = writeln!(out, "# TYPE telemt_me_writer_close_signal_drop_total counter");
let _ = writeln!(
out,
"telemt_me_writer_close_signal_drop_total {}",
if me_allows_normal {
stats.get_me_writer_close_signal_drop_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_writer_close_signal_channel_full_total Close-signal drops caused by full writer command channels"
);
let _ = writeln!(
out,
"# TYPE telemt_me_writer_close_signal_channel_full_total counter"
);
let _ = writeln!(
out,
"telemt_me_writer_close_signal_channel_full_total {}",
if me_allows_normal {
stats.get_me_writer_close_signal_channel_full_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_draining_writers_reap_progress_total Draining-writer removals processed by reap cleanup"
);
let _ = writeln!(
out,
"# TYPE telemt_me_draining_writers_reap_progress_total counter"
);
let _ = writeln!(
out,
"telemt_me_draining_writers_reap_progress_total {}",
if me_allows_normal {
stats.get_me_draining_writers_reap_progress_total()
} else {
0
}
);
let _ = writeln!(out, "# HELP telemt_me_writer_removed_total Total ME writer removals"); let _ = writeln!(out, "# HELP telemt_me_writer_removed_total Total ME writer removals");
let _ = writeln!(out, "# TYPE telemt_me_writer_removed_total counter"); let _ = writeln!(out, "# TYPE telemt_me_writer_removed_total counter");
let _ = writeln!( let _ = writeln!(
@@ -1719,6 +1772,169 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
} }
); );
let _ = writeln!(
out,
"# HELP telemt_me_writer_teardown_attempt_total ME writer teardown attempts by reason and mode"
);
let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_attempt_total counter");
for reason in MeWriterTeardownReason::ALL {
for mode in MeWriterTeardownMode::ALL {
let _ = writeln!(
out,
"telemt_me_writer_teardown_attempt_total{{reason=\"{}\",mode=\"{}\"}} {}",
reason.as_str(),
mode.as_str(),
if me_allows_normal {
stats.get_me_writer_teardown_attempt_total(reason, mode)
} else {
0
}
);
}
}
let _ = writeln!(
out,
"# HELP telemt_me_writer_teardown_success_total ME writer teardown successes by mode"
);
let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_success_total counter");
for mode in MeWriterTeardownMode::ALL {
let _ = writeln!(
out,
"telemt_me_writer_teardown_success_total{{mode=\"{}\"}} {}",
mode.as_str(),
if me_allows_normal {
stats.get_me_writer_teardown_success_total(mode)
} else {
0
}
);
}
let _ = writeln!(
out,
"# HELP telemt_me_writer_teardown_timeout_total Teardown operations that timed out"
);
let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_timeout_total counter");
let _ = writeln!(
out,
"telemt_me_writer_teardown_timeout_total {}",
if me_allows_normal {
stats.get_me_writer_teardown_timeout_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_writer_teardown_escalation_total Watchdog teardown escalations to hard detach"
);
let _ = writeln!(
out,
"# TYPE telemt_me_writer_teardown_escalation_total counter"
);
let _ = writeln!(
out,
"telemt_me_writer_teardown_escalation_total {}",
if me_allows_normal {
stats.get_me_writer_teardown_escalation_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_writer_teardown_noop_total Teardown operations that became no-op"
);
let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_noop_total counter");
let _ = writeln!(
out,
"telemt_me_writer_teardown_noop_total {}",
if me_allows_normal {
stats.get_me_writer_teardown_noop_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_writer_teardown_duration_seconds ME writer teardown latency histogram by mode"
);
let _ = writeln!(
out,
"# TYPE telemt_me_writer_teardown_duration_seconds histogram"
);
let bucket_labels = Stats::me_writer_teardown_duration_bucket_labels();
for mode in MeWriterTeardownMode::ALL {
for (bucket_idx, label) in bucket_labels.iter().enumerate() {
let _ = writeln!(
out,
"telemt_me_writer_teardown_duration_seconds_bucket{{mode=\"{}\",le=\"{}\"}} {}",
mode.as_str(),
label,
if me_allows_normal {
stats.get_me_writer_teardown_duration_bucket_total(mode, bucket_idx)
} else {
0
}
);
}
let _ = writeln!(
out,
"telemt_me_writer_teardown_duration_seconds_bucket{{mode=\"{}\",le=\"+Inf\"}} {}",
mode.as_str(),
if me_allows_normal {
stats.get_me_writer_teardown_duration_count(mode)
} else {
0
}
);
let _ = writeln!(
out,
"telemt_me_writer_teardown_duration_seconds_sum{{mode=\"{}\"}} {:.6}",
mode.as_str(),
if me_allows_normal {
stats.get_me_writer_teardown_duration_sum_seconds(mode)
} else {
0.0
}
);
let _ = writeln!(
out,
"telemt_me_writer_teardown_duration_seconds_count{{mode=\"{}\"}} {}",
mode.as_str(),
if me_allows_normal {
stats.get_me_writer_teardown_duration_count(mode)
} else {
0
}
);
}
let _ = writeln!(
out,
"# HELP telemt_me_writer_cleanup_side_effect_failures_total Failed cleanup side effects by step"
);
let _ = writeln!(
out,
"# TYPE telemt_me_writer_cleanup_side_effect_failures_total counter"
);
for step in MeWriterCleanupSideEffectStep::ALL {
let _ = writeln!(
out,
"telemt_me_writer_cleanup_side_effect_failures_total{{step=\"{}\"}} {}",
step.as_str(),
if me_allows_normal {
stats.get_me_writer_cleanup_side_effect_failures_total(step)
} else {
0
}
);
}
let _ = writeln!(out, "# HELP telemt_me_refill_triggered_total Immediate ME refill runs started"); let _ = writeln!(out, "# HELP telemt_me_refill_triggered_total Immediate ME refill runs started");
let _ = writeln!(out, "# TYPE telemt_me_refill_triggered_total counter"); let _ = writeln!(out, "# TYPE telemt_me_refill_triggered_total counter");
let _ = writeln!( let _ = writeln!(
@@ -2124,6 +2340,24 @@ mod tests {
assert!(output.contains("# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter")); assert!(output.contains("# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter"));
assert!(output.contains("# TYPE telemt_me_idle_close_by_peer_total counter")); assert!(output.contains("# TYPE telemt_me_idle_close_by_peer_total counter"));
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter")); assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
assert!(output.contains("# TYPE telemt_me_writer_teardown_attempt_total counter"));
assert!(output.contains("# TYPE telemt_me_writer_teardown_success_total counter"));
assert!(output.contains("# TYPE telemt_me_writer_teardown_timeout_total counter"));
assert!(output.contains("# TYPE telemt_me_writer_teardown_escalation_total counter"));
assert!(output.contains("# TYPE telemt_me_writer_teardown_noop_total counter"));
assert!(output.contains(
"# TYPE telemt_me_writer_teardown_duration_seconds histogram"
));
assert!(output.contains(
"# TYPE telemt_me_writer_cleanup_side_effect_failures_total counter"
));
assert!(output.contains("# TYPE telemt_me_writer_close_signal_drop_total counter"));
assert!(output.contains(
"# TYPE telemt_me_writer_close_signal_channel_full_total counter"
));
assert!(output.contains(
"# TYPE telemt_me_draining_writers_reap_progress_total counter"
));
assert!(output.contains("# TYPE telemt_pool_drain_soft_evict_total counter")); assert!(output.contains("# TYPE telemt_pool_drain_soft_evict_total counter"));
assert!(output.contains("# TYPE telemt_pool_drain_soft_evict_writer_total counter")); assert!(output.contains("# TYPE telemt_pool_drain_soft_evict_writer_total counter"));
assert!(output.contains( assert!(output.contains(

View File

@@ -19,6 +19,137 @@ use tracing::debug;
use crate::config::{MeTelemetryLevel, MeWriterPickMode}; use crate::config::{MeTelemetryLevel, MeWriterPickMode};
use self::telemetry::TelemetryPolicy; use self::telemetry::TelemetryPolicy;
const ME_WRITER_TEARDOWN_MODE_COUNT: usize = 2;
const ME_WRITER_TEARDOWN_REASON_COUNT: usize = 11;
const ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT: usize = 2;
const ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT: usize = 12;
const ME_WRITER_TEARDOWN_DURATION_BUCKET_BOUNDS_MICROS: [u64; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] = [
1_000,
5_000,
10_000,
25_000,
50_000,
100_000,
250_000,
500_000,
1_000_000,
2_500_000,
5_000_000,
10_000_000,
];
const ME_WRITER_TEARDOWN_DURATION_BUCKET_LABELS: [&str; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] = [
"0.001",
"0.005",
"0.01",
"0.025",
"0.05",
"0.1",
"0.25",
"0.5",
"1",
"2.5",
"5",
"10",
];
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum MeWriterTeardownMode {
Normal = 0,
HardDetach = 1,
}
impl MeWriterTeardownMode {
pub const ALL: [Self; ME_WRITER_TEARDOWN_MODE_COUNT] =
[Self::Normal, Self::HardDetach];
pub const fn as_str(self) -> &'static str {
match self {
Self::Normal => "normal",
Self::HardDetach => "hard_detach",
}
}
const fn idx(self) -> usize {
self as usize
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum MeWriterTeardownReason {
ReaderExit = 0,
WriterTaskExit = 1,
PingSendFail = 2,
SignalSendFail = 3,
RouteChannelClosed = 4,
CloseRpcChannelClosed = 5,
PruneClosedWriter = 6,
ReapTimeoutExpired = 7,
ReapThresholdForce = 8,
ReapEmpty = 9,
WatchdogStuckDraining = 10,
}
impl MeWriterTeardownReason {
pub const ALL: [Self; ME_WRITER_TEARDOWN_REASON_COUNT] = [
Self::ReaderExit,
Self::WriterTaskExit,
Self::PingSendFail,
Self::SignalSendFail,
Self::RouteChannelClosed,
Self::CloseRpcChannelClosed,
Self::PruneClosedWriter,
Self::ReapTimeoutExpired,
Self::ReapThresholdForce,
Self::ReapEmpty,
Self::WatchdogStuckDraining,
];
pub const fn as_str(self) -> &'static str {
match self {
Self::ReaderExit => "reader_exit",
Self::WriterTaskExit => "writer_task_exit",
Self::PingSendFail => "ping_send_fail",
Self::SignalSendFail => "signal_send_fail",
Self::RouteChannelClosed => "route_channel_closed",
Self::CloseRpcChannelClosed => "close_rpc_channel_closed",
Self::PruneClosedWriter => "prune_closed_writer",
Self::ReapTimeoutExpired => "reap_timeout_expired",
Self::ReapThresholdForce => "reap_threshold_force",
Self::ReapEmpty => "reap_empty",
Self::WatchdogStuckDraining => "watchdog_stuck_draining",
}
}
const fn idx(self) -> usize {
self as usize
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum MeWriterCleanupSideEffectStep {
CloseSignalChannelFull = 0,
CloseSignalChannelClosed = 1,
}
impl MeWriterCleanupSideEffectStep {
pub const ALL: [Self; ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT] =
[Self::CloseSignalChannelFull, Self::CloseSignalChannelClosed];
pub const fn as_str(self) -> &'static str {
match self {
Self::CloseSignalChannelFull => "close_signal_channel_full",
Self::CloseSignalChannelClosed => "close_signal_channel_closed",
}
}
const fn idx(self) -> usize {
self as usize
}
}
// ============= Stats ============= // ============= Stats =============
#[derive(Default)] #[derive(Default)]
@@ -123,8 +254,23 @@ pub struct Stats {
pool_drain_soft_evict_total: AtomicU64, pool_drain_soft_evict_total: AtomicU64,
pool_drain_soft_evict_writer_total: AtomicU64, pool_drain_soft_evict_writer_total: AtomicU64,
pool_stale_pick_total: AtomicU64, pool_stale_pick_total: AtomicU64,
me_writer_close_signal_drop_total: AtomicU64,
me_writer_close_signal_channel_full_total: AtomicU64,
me_draining_writers_reap_progress_total: AtomicU64,
me_writer_removed_total: AtomicU64, me_writer_removed_total: AtomicU64,
me_writer_removed_unexpected_total: AtomicU64, me_writer_removed_unexpected_total: AtomicU64,
me_writer_teardown_attempt_total:
[[AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT]; ME_WRITER_TEARDOWN_REASON_COUNT],
me_writer_teardown_success_total: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT],
me_writer_teardown_timeout_total: AtomicU64,
me_writer_teardown_escalation_total: AtomicU64,
me_writer_teardown_noop_total: AtomicU64,
me_writer_cleanup_side_effect_failures_total:
[AtomicU64; ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT],
me_writer_teardown_duration_bucket_hits:
[[AtomicU64; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT + 1]; ME_WRITER_TEARDOWN_MODE_COUNT],
me_writer_teardown_duration_sum_micros: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT],
me_writer_teardown_duration_count: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT],
me_refill_triggered_total: AtomicU64, me_refill_triggered_total: AtomicU64,
me_refill_skipped_inflight_total: AtomicU64, me_refill_skipped_inflight_total: AtomicU64,
me_refill_failed_total: AtomicU64, me_refill_failed_total: AtomicU64,
@@ -734,6 +880,24 @@ impl Stats {
self.pool_stale_pick_total.fetch_add(1, Ordering::Relaxed); self.pool_stale_pick_total.fetch_add(1, Ordering::Relaxed);
} }
} }
pub fn increment_me_writer_close_signal_drop_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_writer_close_signal_drop_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_writer_close_signal_channel_full_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_writer_close_signal_channel_full_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_draining_writers_reap_progress_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_draining_writers_reap_progress_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_writer_removed_total(&self) { pub fn increment_me_writer_removed_total(&self) {
if self.telemetry_me_allows_debug() { if self.telemetry_me_allows_debug() {
self.me_writer_removed_total.fetch_add(1, Ordering::Relaxed); self.me_writer_removed_total.fetch_add(1, Ordering::Relaxed);
@@ -744,6 +908,74 @@ impl Stats {
self.me_writer_removed_unexpected_total.fetch_add(1, Ordering::Relaxed); self.me_writer_removed_unexpected_total.fetch_add(1, Ordering::Relaxed);
} }
} }
pub fn increment_me_writer_teardown_attempt_total(
&self,
reason: MeWriterTeardownReason,
mode: MeWriterTeardownMode,
) {
if self.telemetry_me_allows_normal() {
self.me_writer_teardown_attempt_total[reason.idx()][mode.idx()]
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_writer_teardown_success_total(&self, mode: MeWriterTeardownMode) {
if self.telemetry_me_allows_normal() {
self.me_writer_teardown_success_total[mode.idx()].fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_writer_teardown_timeout_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_writer_teardown_timeout_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_writer_teardown_escalation_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_writer_teardown_escalation_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_writer_teardown_noop_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_writer_teardown_noop_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_writer_cleanup_side_effect_failures_total(
&self,
step: MeWriterCleanupSideEffectStep,
) {
if self.telemetry_me_allows_normal() {
self.me_writer_cleanup_side_effect_failures_total[step.idx()]
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn observe_me_writer_teardown_duration(
&self,
mode: MeWriterTeardownMode,
duration: Duration,
) {
if !self.telemetry_me_allows_normal() {
return;
}
let duration_micros = duration.as_micros().min(u64::MAX as u128) as u64;
let mut bucket_idx = ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT;
for (idx, upper_bound_micros) in ME_WRITER_TEARDOWN_DURATION_BUCKET_BOUNDS_MICROS
.iter()
.copied()
.enumerate()
{
if duration_micros <= upper_bound_micros {
bucket_idx = idx;
break;
}
}
self.me_writer_teardown_duration_bucket_hits[mode.idx()][bucket_idx]
.fetch_add(1, Ordering::Relaxed);
self.me_writer_teardown_duration_sum_micros[mode.idx()]
.fetch_add(duration_micros, Ordering::Relaxed);
self.me_writer_teardown_duration_count[mode.idx()].fetch_add(1, Ordering::Relaxed);
}
pub fn increment_me_refill_triggered_total(&self) { pub fn increment_me_refill_triggered_total(&self) {
if self.telemetry_me_allows_debug() { if self.telemetry_me_allows_debug() {
self.me_refill_triggered_total.fetch_add(1, Ordering::Relaxed); self.me_refill_triggered_total.fetch_add(1, Ordering::Relaxed);
@@ -1259,12 +1491,96 @@ impl Stats {
pub fn get_pool_stale_pick_total(&self) -> u64 { pub fn get_pool_stale_pick_total(&self) -> u64 {
self.pool_stale_pick_total.load(Ordering::Relaxed) self.pool_stale_pick_total.load(Ordering::Relaxed)
} }
pub fn get_me_writer_close_signal_drop_total(&self) -> u64 {
self.me_writer_close_signal_drop_total.load(Ordering::Relaxed)
}
pub fn get_me_writer_close_signal_channel_full_total(&self) -> u64 {
self.me_writer_close_signal_channel_full_total
.load(Ordering::Relaxed)
}
pub fn get_me_draining_writers_reap_progress_total(&self) -> u64 {
self.me_draining_writers_reap_progress_total
.load(Ordering::Relaxed)
}
pub fn get_me_writer_removed_total(&self) -> u64 { pub fn get_me_writer_removed_total(&self) -> u64 {
self.me_writer_removed_total.load(Ordering::Relaxed) self.me_writer_removed_total.load(Ordering::Relaxed)
} }
pub fn get_me_writer_removed_unexpected_total(&self) -> u64 { pub fn get_me_writer_removed_unexpected_total(&self) -> u64 {
self.me_writer_removed_unexpected_total.load(Ordering::Relaxed) self.me_writer_removed_unexpected_total.load(Ordering::Relaxed)
} }
pub fn get_me_writer_teardown_attempt_total(
&self,
reason: MeWriterTeardownReason,
mode: MeWriterTeardownMode,
) -> u64 {
self.me_writer_teardown_attempt_total[reason.idx()][mode.idx()]
.load(Ordering::Relaxed)
}
pub fn get_me_writer_teardown_attempt_total_by_mode(&self, mode: MeWriterTeardownMode) -> u64 {
MeWriterTeardownReason::ALL
.iter()
.copied()
.map(|reason| self.get_me_writer_teardown_attempt_total(reason, mode))
.sum()
}
pub fn get_me_writer_teardown_success_total(&self, mode: MeWriterTeardownMode) -> u64 {
self.me_writer_teardown_success_total[mode.idx()].load(Ordering::Relaxed)
}
pub fn get_me_writer_teardown_timeout_total(&self) -> u64 {
self.me_writer_teardown_timeout_total.load(Ordering::Relaxed)
}
pub fn get_me_writer_teardown_escalation_total(&self) -> u64 {
self.me_writer_teardown_escalation_total
.load(Ordering::Relaxed)
}
pub fn get_me_writer_teardown_noop_total(&self) -> u64 {
self.me_writer_teardown_noop_total.load(Ordering::Relaxed)
}
pub fn get_me_writer_cleanup_side_effect_failures_total(
&self,
step: MeWriterCleanupSideEffectStep,
) -> u64 {
self.me_writer_cleanup_side_effect_failures_total[step.idx()]
.load(Ordering::Relaxed)
}
pub fn get_me_writer_cleanup_side_effect_failures_total_all(&self) -> u64 {
MeWriterCleanupSideEffectStep::ALL
.iter()
.copied()
.map(|step| self.get_me_writer_cleanup_side_effect_failures_total(step))
.sum()
}
pub fn me_writer_teardown_duration_bucket_labels(
) -> &'static [&'static str; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] {
&ME_WRITER_TEARDOWN_DURATION_BUCKET_LABELS
}
pub fn get_me_writer_teardown_duration_bucket_hits(
&self,
mode: MeWriterTeardownMode,
bucket_idx: usize,
) -> u64 {
self.me_writer_teardown_duration_bucket_hits[mode.idx()][bucket_idx]
.load(Ordering::Relaxed)
}
pub fn get_me_writer_teardown_duration_bucket_total(
&self,
mode: MeWriterTeardownMode,
bucket_idx: usize,
) -> u64 {
let capped_idx = bucket_idx.min(ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT);
let mut total = 0u64;
for idx in 0..=capped_idx {
total = total.saturating_add(self.get_me_writer_teardown_duration_bucket_hits(mode, idx));
}
total
}
pub fn get_me_writer_teardown_duration_count(&self, mode: MeWriterTeardownMode) -> u64 {
self.me_writer_teardown_duration_count[mode.idx()].load(Ordering::Relaxed)
}
pub fn get_me_writer_teardown_duration_sum_seconds(&self, mode: MeWriterTeardownMode) -> f64 {
self.me_writer_teardown_duration_sum_micros[mode.idx()].load(Ordering::Relaxed) as f64
/ 1_000_000.0
}
pub fn get_me_refill_triggered_total(&self) -> u64 { pub fn get_me_refill_triggered_total(&self) -> u64 {
self.me_refill_triggered_total.load(Ordering::Relaxed) self.me_refill_triggered_total.load(Ordering::Relaxed)
} }
@@ -1769,6 +2085,79 @@ mod tests {
assert_eq!(stats.get_me_route_drop_queue_full(), 0); assert_eq!(stats.get_me_route_drop_queue_full(), 0);
} }
#[test]
fn test_teardown_counters_and_duration() {
let stats = Stats::new();
stats.increment_me_writer_teardown_attempt_total(
MeWriterTeardownReason::ReaderExit,
MeWriterTeardownMode::Normal,
);
stats.increment_me_writer_teardown_success_total(MeWriterTeardownMode::Normal);
stats.observe_me_writer_teardown_duration(
MeWriterTeardownMode::Normal,
Duration::from_millis(3),
);
stats.increment_me_writer_cleanup_side_effect_failures_total(
MeWriterCleanupSideEffectStep::CloseSignalChannelFull,
);
assert_eq!(
stats.get_me_writer_teardown_attempt_total(
MeWriterTeardownReason::ReaderExit,
MeWriterTeardownMode::Normal
),
1
);
assert_eq!(
stats.get_me_writer_teardown_success_total(MeWriterTeardownMode::Normal),
1
);
assert_eq!(
stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal),
1
);
assert!(
stats.get_me_writer_teardown_duration_sum_seconds(MeWriterTeardownMode::Normal) > 0.0
);
assert_eq!(
stats.get_me_writer_cleanup_side_effect_failures_total(
MeWriterCleanupSideEffectStep::CloseSignalChannelFull
),
1
);
}
#[test]
fn test_teardown_counters_respect_me_silent() {
let stats = Stats::new();
stats.apply_telemetry_policy(TelemetryPolicy {
core_enabled: true,
user_enabled: true,
me_level: MeTelemetryLevel::Silent,
});
stats.increment_me_writer_teardown_attempt_total(
MeWriterTeardownReason::ReaderExit,
MeWriterTeardownMode::Normal,
);
stats.increment_me_writer_teardown_timeout_total();
stats.observe_me_writer_teardown_duration(
MeWriterTeardownMode::Normal,
Duration::from_millis(1),
);
assert_eq!(
stats.get_me_writer_teardown_attempt_total(
MeWriterTeardownReason::ReaderExit,
MeWriterTeardownMode::Normal
),
0
);
assert_eq!(stats.get_me_writer_teardown_timeout_total(), 0);
assert_eq!(
stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal),
0
);
}
#[test] #[test]
fn test_replay_checker_basic() { fn test_replay_checker_basic() {
let checker = ReplayChecker::new(100, Duration::from_secs(60)); let checker = ReplayChecker::new(100, Duration::from_secs(60));

View File

@@ -394,15 +394,17 @@ async fn connect_tcp_with_upstream(
port: u16, port: u16,
connect_timeout: Duration, connect_timeout: Duration,
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>, upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
scope: Option<&str>,
) -> Result<TcpStream> { ) -> Result<TcpStream> {
if let Some(manager) = upstream { if let Some(manager) = upstream {
if let Some(addr) = resolve_socket_addr(host, port) { if let Some(addr) = resolve_socket_addr(host, port) {
match manager.connect(addr, None, None).await { match manager.connect(addr, None, scope).await {
Ok(stream) => return Ok(stream), Ok(stream) => return Ok(stream),
Err(e) => { Err(e) => {
warn!( warn!(
host = %host, host = %host,
port = port, port = port,
scope = ?scope,
error = %e, error = %e,
"Upstream connect failed, using direct connect" "Upstream connect failed, using direct connect"
); );
@@ -410,12 +412,13 @@ async fn connect_tcp_with_upstream(
} }
} else if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await { } else if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await {
if let Some(addr) = addrs.find(|a| a.is_ipv4()) { if let Some(addr) = addrs.find(|a| a.is_ipv4()) {
match manager.connect(addr, None, None).await { match manager.connect(addr, None, scope).await {
Ok(stream) => return Ok(stream), Ok(stream) => return Ok(stream),
Err(e) => { Err(e) => {
warn!( warn!(
host = %host, host = %host,
port = port, port = port,
scope = ?scope,
error = %e, error = %e,
"Upstream connect failed, using direct connect" "Upstream connect failed, using direct connect"
); );
@@ -537,6 +540,7 @@ async fn fetch_via_raw_tls(
sni: &str, sni: &str,
connect_timeout: Duration, connect_timeout: Duration,
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>, upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
scope: Option<&str>,
proxy_protocol: u8, proxy_protocol: u8,
unix_sock: Option<&str>, unix_sock: Option<&str>,
) -> Result<TlsFetchResult> { ) -> Result<TlsFetchResult> {
@@ -572,7 +576,7 @@ async fn fetch_via_raw_tls(
#[cfg(not(unix))] #[cfg(not(unix))]
let _ = unix_sock; let _ = unix_sock;
let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream).await?; let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope).await?;
fetch_via_raw_tls_stream(stream, sni, connect_timeout, proxy_protocol).await fetch_via_raw_tls_stream(stream, sni, connect_timeout, proxy_protocol).await
} }
@@ -675,6 +679,7 @@ async fn fetch_via_rustls(
sni: &str, sni: &str,
connect_timeout: Duration, connect_timeout: Duration,
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>, upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
scope: Option<&str>,
proxy_protocol: u8, proxy_protocol: u8,
unix_sock: Option<&str>, unix_sock: Option<&str>,
) -> Result<TlsFetchResult> { ) -> Result<TlsFetchResult> {
@@ -710,7 +715,7 @@ async fn fetch_via_rustls(
#[cfg(not(unix))] #[cfg(not(unix))]
let _ = unix_sock; let _ = unix_sock;
let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream).await?; let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope).await?;
fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await
} }
@@ -726,6 +731,7 @@ pub async fn fetch_real_tls(
sni: &str, sni: &str,
connect_timeout: Duration, connect_timeout: Duration,
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>, upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
scope: Option<&str>,
proxy_protocol: u8, proxy_protocol: u8,
unix_sock: Option<&str>, unix_sock: Option<&str>,
) -> Result<TlsFetchResult> { ) -> Result<TlsFetchResult> {
@@ -735,6 +741,7 @@ pub async fn fetch_real_tls(
sni, sni,
connect_timeout, connect_timeout,
upstream.clone(), upstream.clone(),
scope,
proxy_protocol, proxy_protocol,
unix_sock, unix_sock,
) )
@@ -753,6 +760,7 @@ pub async fn fetch_real_tls(
sni, sni,
connect_timeout, connect_timeout,
upstream, upstream,
scope,
proxy_protocol, proxy_protocol,
unix_sock, unix_sock,
) )

View File

@@ -298,6 +298,7 @@ async fn run_update_cycle(
pool.update_runtime_reinit_policy( pool.update_runtime_reinit_policy(
cfg.general.hardswap, cfg.general.hardswap,
cfg.general.me_pool_drain_ttl_secs, cfg.general.me_pool_drain_ttl_secs,
cfg.general.me_instadrain,
cfg.general.me_pool_drain_threshold, cfg.general.me_pool_drain_threshold,
cfg.general.me_pool_drain_soft_evict_enabled, cfg.general.me_pool_drain_soft_evict_enabled,
cfg.general.me_pool_drain_soft_evict_grace_secs, cfg.general.me_pool_drain_soft_evict_grace_secs,
@@ -530,6 +531,7 @@ pub async fn me_config_updater(
pool.update_runtime_reinit_policy( pool.update_runtime_reinit_policy(
cfg.general.hardswap, cfg.general.hardswap,
cfg.general.me_pool_drain_ttl_secs, cfg.general.me_pool_drain_ttl_secs,
cfg.general.me_instadrain,
cfg.general.me_pool_drain_threshold, cfg.general.me_pool_drain_threshold,
cfg.general.me_pool_drain_soft_evict_enabled, cfg.general.me_pool_drain_soft_evict_enabled,
cfg.general.me_pool_drain_soft_evict_grace_secs, cfg.general.me_pool_drain_soft_evict_grace_secs,

View File

@@ -10,8 +10,10 @@ use tracing::{debug, info, warn};
use crate::config::MeFloorMode; use crate::config::MeFloorMode;
use crate::crypto::SecureRandom; use crate::crypto::SecureRandom;
use crate::network::IpFamily; use crate::network::IpFamily;
use crate::stats::MeWriterTeardownReason;
use super::MePool; use super::MePool;
use super::pool::{MeFamilyRuntimeState, MeWriter};
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
#[allow(dead_code)] #[allow(dead_code)]
@@ -30,6 +32,35 @@ const HEALTH_DRAIN_CLOSE_BUDGET_MIN: usize = 16;
const HEALTH_DRAIN_CLOSE_BUDGET_MAX: usize = 256; const HEALTH_DRAIN_CLOSE_BUDGET_MAX: usize = 256;
const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MIN: usize = 8; const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MIN: usize = 8;
const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MAX: usize = 256; const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MAX: usize = 256;
const HEALTH_DRAIN_REAP_OPPORTUNISTIC_INTERVAL_SECS: u64 = 1;
const HEALTH_DRAIN_TIMEOUT_ENFORCER_INTERVAL_SECS: u64 = 1;
const FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD: u32 = 6;
const FAMILY_SUPPRESS_WINDOW_SECS: u64 = 120;
const FAMILY_RECOVER_PROBE_INTERVAL_SECS: u64 = 5;
const FAMILY_RECOVER_SUCCESS_STREAK_REQUIRED: u32 = 3;
#[derive(Debug, Clone)]
struct FamilyCircuitState {
state: MeFamilyRuntimeState,
state_since_at: Instant,
suppressed_until: Option<Instant>,
next_probe_at: Instant,
fail_streak: u32,
recover_success_streak: u32,
}
impl FamilyCircuitState {
fn new(now: Instant) -> Self {
Self {
state: MeFamilyRuntimeState::Healthy,
state_since_at: now,
suppressed_until: None,
next_probe_at: now,
fail_streak: 0,
recover_success_streak: 0,
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct DcFloorPlanEntry { struct DcFloorPlanEntry {
@@ -69,6 +100,25 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
let mut floor_warn_next_allowed: HashMap<(i32, IpFamily), Instant> = HashMap::new(); let mut floor_warn_next_allowed: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut drain_warn_next_allowed: HashMap<u64, Instant> = HashMap::new(); let mut drain_warn_next_allowed: HashMap<u64, Instant> = HashMap::new();
let mut drain_soft_evict_next_allowed: HashMap<u64, Instant> = HashMap::new(); let mut drain_soft_evict_next_allowed: HashMap<u64, Instant> = HashMap::new();
let mut family_v4_circuit = FamilyCircuitState::new(Instant::now());
let mut family_v6_circuit = FamilyCircuitState::new(Instant::now());
let init_epoch_secs = MePool::now_epoch_secs();
pool.set_family_runtime_state(
IpFamily::V4,
family_v4_circuit.state,
init_epoch_secs,
0,
family_v4_circuit.fail_streak,
family_v4_circuit.recover_success_streak,
);
pool.set_family_runtime_state(
IpFamily::V6,
family_v6_circuit.state,
init_epoch_secs,
0,
family_v6_circuit.fail_streak,
family_v6_circuit.recover_success_streak,
);
let mut degraded_interval = true; let mut degraded_interval = true;
loop { loop {
let interval = if degraded_interval { let interval = if degraded_interval {
@@ -84,7 +134,9 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut drain_soft_evict_next_allowed, &mut drain_soft_evict_next_allowed,
) )
.await; .await;
let v4_degraded = check_family( let now = Instant::now();
let now_epoch_secs = MePool::now_epoch_secs();
let v4_degraded_raw = check_family(
IpFamily::V4, IpFamily::V4,
&pool, &pool,
&rng, &rng,
@@ -99,9 +151,23 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut adaptive_idle_since, &mut adaptive_idle_since,
&mut adaptive_recover_until, &mut adaptive_recover_until,
&mut floor_warn_next_allowed, &mut floor_warn_next_allowed,
&mut drain_warn_next_allowed,
&mut drain_soft_evict_next_allowed,
) )
.await; .await;
let v6_degraded = check_family( let v4_degraded = apply_family_circuit_result(
&pool,
IpFamily::V4,
&mut family_v4_circuit,
Some(v4_degraded_raw),
false,
now,
now_epoch_secs,
);
let v6_check_ran = should_run_family_check(&mut family_v6_circuit, now);
let v6_degraded_raw = if v6_check_ran {
check_family(
IpFamily::V6, IpFamily::V6,
&pool, &pool,
&rng, &rng,
@@ -116,12 +182,221 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut adaptive_idle_since, &mut adaptive_idle_since,
&mut adaptive_recover_until, &mut adaptive_recover_until,
&mut floor_warn_next_allowed, &mut floor_warn_next_allowed,
&mut drain_warn_next_allowed,
&mut drain_soft_evict_next_allowed,
) )
.await; .await
} else {
false
};
let v6_degraded = apply_family_circuit_result(
&pool,
IpFamily::V6,
&mut family_v6_circuit,
if v6_check_ran {
Some(v6_degraded_raw)
} else {
None
},
true,
now,
now_epoch_secs,
);
degraded_interval = v4_degraded || v6_degraded; degraded_interval = v4_degraded || v6_degraded;
} }
} }
pub async fn me_drain_timeout_enforcer(pool: Arc<MePool>) {
let mut drain_warn_next_allowed: HashMap<u64, Instant> = HashMap::new();
let mut drain_soft_evict_next_allowed: HashMap<u64, Instant> = HashMap::new();
loop {
tokio::time::sleep(Duration::from_secs(
HEALTH_DRAIN_TIMEOUT_ENFORCER_INTERVAL_SECS,
))
.await;
reap_draining_writers(
&pool,
&mut drain_warn_next_allowed,
&mut drain_soft_evict_next_allowed,
)
.await;
}
}
fn should_run_family_check(circuit: &mut FamilyCircuitState, now: Instant) -> bool {
match circuit.state {
MeFamilyRuntimeState::Suppressed => {
if now < circuit.next_probe_at {
return false;
}
circuit.next_probe_at =
now + Duration::from_secs(FAMILY_RECOVER_PROBE_INTERVAL_SECS);
true
}
_ => true,
}
}
fn apply_family_circuit_result(
pool: &Arc<MePool>,
family: IpFamily,
circuit: &mut FamilyCircuitState,
degraded: Option<bool>,
allow_suppress: bool,
now: Instant,
now_epoch_secs: u64,
) -> bool {
let Some(degraded) = degraded else {
// Preserve suppression state when probe tick is intentionally skipped.
return false;
};
let previous_state = circuit.state;
match circuit.state {
MeFamilyRuntimeState::Suppressed => {
if degraded {
circuit.fail_streak = circuit.fail_streak.saturating_add(1);
circuit.recover_success_streak = 0;
let until = now + Duration::from_secs(FAMILY_SUPPRESS_WINDOW_SECS);
circuit.suppressed_until = Some(until);
circuit.state_since_at = now;
warn!(
?family,
fail_streak = circuit.fail_streak,
suppress_secs = FAMILY_SUPPRESS_WINDOW_SECS,
"ME family remains suppressed due to ongoing failures"
);
} else {
circuit.fail_streak = 0;
circuit.recover_success_streak = 1;
circuit.state = MeFamilyRuntimeState::Recovering;
}
}
MeFamilyRuntimeState::Recovering => {
if degraded {
circuit.fail_streak = circuit.fail_streak.saturating_add(1);
if allow_suppress {
circuit.state = MeFamilyRuntimeState::Suppressed;
let until = now + Duration::from_secs(FAMILY_SUPPRESS_WINDOW_SECS);
circuit.suppressed_until = Some(until);
circuit.next_probe_at =
now + Duration::from_secs(FAMILY_RECOVER_PROBE_INTERVAL_SECS);
warn!(
?family,
fail_streak = circuit.fail_streak,
suppress_secs = FAMILY_SUPPRESS_WINDOW_SECS,
"ME family temporarily suppressed after repeated degradation"
);
} else {
circuit.state = MeFamilyRuntimeState::Degraded;
}
} else {
circuit.recover_success_streak = circuit.recover_success_streak.saturating_add(1);
if circuit.recover_success_streak >= FAMILY_RECOVER_SUCCESS_STREAK_REQUIRED {
circuit.fail_streak = 0;
circuit.recover_success_streak = 0;
circuit.suppressed_until = None;
circuit.state = MeFamilyRuntimeState::Healthy;
info!(
?family,
"ME family suppression lifted after stable recovery probes"
);
}
}
}
_ => {
if degraded {
circuit.fail_streak = circuit.fail_streak.saturating_add(1);
circuit.recover_success_streak = 0;
circuit.state = MeFamilyRuntimeState::Degraded;
if allow_suppress && circuit.fail_streak >= FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD {
circuit.state = MeFamilyRuntimeState::Suppressed;
let until = now + Duration::from_secs(FAMILY_SUPPRESS_WINDOW_SECS);
circuit.suppressed_until = Some(until);
circuit.next_probe_at =
now + Duration::from_secs(FAMILY_RECOVER_PROBE_INTERVAL_SECS);
warn!(
?family,
fail_streak = circuit.fail_streak,
suppress_secs = FAMILY_SUPPRESS_WINDOW_SECS,
"ME family temporarily suppressed after repeated degradation"
);
}
} else {
circuit.fail_streak = 0;
circuit.recover_success_streak = 0;
circuit.suppressed_until = None;
circuit.state = MeFamilyRuntimeState::Healthy;
}
}
}
if previous_state != circuit.state {
circuit.state_since_at = now;
}
let suppressed_until_epoch_secs = circuit
.suppressed_until
.and_then(|until| {
if until > now {
Some(
now_epoch_secs
.saturating_add(until.saturating_duration_since(now).as_secs()),
)
} else {
None
}
})
.unwrap_or(0);
let state_since_epoch_secs = if previous_state == circuit.state {
pool.family_runtime_state_since_epoch_secs(family)
} else {
now_epoch_secs
};
pool.set_family_runtime_state(
family,
circuit.state,
state_since_epoch_secs,
suppressed_until_epoch_secs,
circuit.fail_streak,
circuit.recover_success_streak,
);
!matches!(circuit.state, MeFamilyRuntimeState::Suppressed) && degraded
}
fn draining_writer_timeout_expired(
pool: &MePool,
writer: &MeWriter,
now_epoch_secs: u64,
drain_ttl_secs: u64,
) -> bool {
if pool
.me_instadrain
.load(std::sync::atomic::Ordering::Relaxed)
{
return true;
}
let deadline_epoch_secs = writer
.drain_deadline_epoch_secs
.load(std::sync::atomic::Ordering::Relaxed);
if deadline_epoch_secs != 0 {
return now_epoch_secs >= deadline_epoch_secs;
}
if drain_ttl_secs == 0 {
return false;
}
let drain_started_at_epoch_secs = writer
.draining_started_at_epoch_secs
.load(std::sync::atomic::Ordering::Relaxed);
if drain_started_at_epoch_secs == 0 {
return false;
}
now_epoch_secs.saturating_sub(drain_started_at_epoch_secs) > drain_ttl_secs
}
pub(super) async fn reap_draining_writers( pub(super) async fn reap_draining_writers(
pool: &Arc<MePool>, pool: &Arc<MePool>,
warn_next_allowed: &mut HashMap<u64, Instant>, warn_next_allowed: &mut HashMap<u64, Instant>,
@@ -137,11 +412,16 @@ pub(super) async fn reap_draining_writers(
let activity = pool.registry.writer_activity_snapshot().await; let activity = pool.registry.writer_activity_snapshot().await;
let mut draining_writers = Vec::new(); let mut draining_writers = Vec::new();
let mut empty_writer_ids = Vec::<u64>::new(); let mut empty_writer_ids = Vec::<u64>::new();
let mut timeout_expired_writer_ids = Vec::<u64>::new();
let mut force_close_writer_ids = Vec::<u64>::new(); let mut force_close_writer_ids = Vec::<u64>::new();
for writer in writers { for writer in writers {
if !writer.draining.load(std::sync::atomic::Ordering::Relaxed) { if !writer.draining.load(std::sync::atomic::Ordering::Relaxed) {
continue; continue;
} }
if draining_writer_timeout_expired(pool, &writer, now_epoch_secs, drain_ttl_secs) {
timeout_expired_writer_ids.push(writer.id);
continue;
}
if activity if activity
.bound_clients_by_writer .bound_clients_by_writer
.get(&writer.id) .get(&writer.id)
@@ -207,14 +487,6 @@ pub(super) async fn reap_draining_writers(
"ME draining writer remains non-empty past drain TTL" "ME draining writer remains non-empty past drain TTL"
); );
} }
let deadline_epoch_secs = writer
.drain_deadline_epoch_secs
.load(std::sync::atomic::Ordering::Relaxed);
if deadline_epoch_secs != 0 && now_epoch_secs >= deadline_epoch_secs {
warn!(writer_id = writer.id, "Drain timeout, force-closing");
force_close_writer_ids.push(writer.id);
active_draining_writer_ids.remove(&writer.id);
}
} }
warn_next_allowed.retain(|writer_id, _| active_draining_writer_ids.contains(writer_id)); warn_next_allowed.retain(|writer_id, _| active_draining_writer_ids.contains(writer_id));
@@ -299,11 +571,22 @@ pub(super) async fn reap_draining_writers(
} }
} }
let close_budget = health_drain_close_budget(); let mut closed_writer_ids = HashSet::<u64>::new();
for writer_id in timeout_expired_writer_ids {
if !closed_writer_ids.insert(writer_id) {
continue;
}
pool.stats.increment_pool_force_close_total();
pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapTimeoutExpired)
.await;
pool.stats
.increment_me_draining_writers_reap_progress_total();
}
let requested_force_close = force_close_writer_ids.len(); let requested_force_close = force_close_writer_ids.len();
let requested_empty_close = empty_writer_ids.len(); let requested_empty_close = empty_writer_ids.len();
let requested_close_total = requested_force_close.saturating_add(requested_empty_close); let requested_close_total = requested_force_close.saturating_add(requested_empty_close);
let mut closed_writer_ids = HashSet::<u64>::new(); let close_budget = health_drain_close_budget();
let mut closed_total = 0usize; let mut closed_total = 0usize;
for writer_id in force_close_writer_ids { for writer_id in force_close_writer_ids {
if closed_total >= close_budget { if closed_total >= close_budget {
@@ -313,7 +596,10 @@ pub(super) async fn reap_draining_writers(
continue; continue;
} }
pool.stats.increment_pool_force_close_total(); pool.stats.increment_pool_force_close_total();
pool.remove_writer_and_close_clients(writer_id).await; pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapThresholdForce)
.await;
pool.stats
.increment_me_draining_writers_reap_progress_total();
closed_total = closed_total.saturating_add(1); closed_total = closed_total.saturating_add(1);
} }
for writer_id in empty_writer_ids { for writer_id in empty_writer_ids {
@@ -323,7 +609,10 @@ pub(super) async fn reap_draining_writers(
if !closed_writer_ids.insert(writer_id) { if !closed_writer_ids.insert(writer_id) {
continue; continue;
} }
pool.remove_writer_and_close_clients(writer_id).await; pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapEmpty)
.await;
pool.stats
.increment_me_draining_writers_reap_progress_total();
closed_total = closed_total.saturating_add(1); closed_total = closed_total.saturating_add(1);
} }
@@ -392,6 +681,8 @@ async fn check_family(
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>, adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>, adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
floor_warn_next_allowed: &mut HashMap<(i32, IpFamily), Instant>, floor_warn_next_allowed: &mut HashMap<(i32, IpFamily), Instant>,
drain_warn_next_allowed: &mut HashMap<u64, Instant>,
drain_soft_evict_next_allowed: &mut HashMap<u64, Instant>,
) -> bool { ) -> bool {
let enabled = match family { let enabled = match family {
IpFamily::V4 => pool.decision.ipv4_me, IpFamily::V4 => pool.decision.ipv4_me,
@@ -472,8 +763,15 @@ async fn check_family(
floor_plan.active_writers_current, floor_plan.active_writers_current,
floor_plan.warm_writers_current, floor_plan.warm_writers_current,
); );
let mut next_drain_reap_at = Instant::now();
for (dc, endpoints) in dc_endpoints { for (dc, endpoints) in dc_endpoints {
if Instant::now() >= next_drain_reap_at {
reap_draining_writers(pool, drain_warn_next_allowed, drain_soft_evict_next_allowed)
.await;
next_drain_reap_at = Instant::now()
+ Duration::from_secs(HEALTH_DRAIN_REAP_OPPORTUNISTIC_INTERVAL_SECS);
}
if endpoints.is_empty() { if endpoints.is_empty() {
continue; continue;
} }
@@ -617,6 +915,12 @@ async fn check_family(
let mut restored = 0usize; let mut restored = 0usize;
for _ in 0..missing { for _ in 0..missing {
if Instant::now() >= next_drain_reap_at {
reap_draining_writers(pool, drain_warn_next_allowed, drain_soft_evict_next_allowed)
.await;
next_drain_reap_at = Instant::now()
+ Duration::from_secs(HEALTH_DRAIN_REAP_OPPORTUNISTIC_INTERVAL_SECS);
}
if reconnect_budget == 0 { if reconnect_budget == 0 {
break; break;
} }
@@ -1468,6 +1772,187 @@ async fn maybe_rotate_single_endpoint_shadow(
); );
} }
/// Last-resort safety net for draining writers stuck past their deadline.
///
/// Runs every `TICK_SECS` and force-closes any draining writer whose
/// `drain_deadline_epoch_secs` has been exceeded by more than a threshold.
///
/// Two thresholds:
/// - `SOFT_THRESHOLD_SECS` (60s): writers with no bound clients
/// - `HARD_THRESHOLD_SECS` (300s): writers WITH bound clients (unconditional)
///
/// Intentionally kept trivial and independent of pool config to minimise
/// the probability of panicking itself. Uses `SystemTime` directly
/// as a fallback clock source and timeouts on every lock acquisition
/// and writer removal so one stuck writer cannot block the rest.
pub async fn me_zombie_writer_watchdog(pool: Arc<MePool>) {
use std::time::{SystemTime, UNIX_EPOCH};
const TICK_SECS: u64 = 30;
const SOFT_THRESHOLD_SECS: u64 = 60;
const HARD_THRESHOLD_SECS: u64 = 300;
const LOCK_TIMEOUT_SECS: u64 = 5;
const REMOVE_TIMEOUT_SECS: u64 = 10;
const HARD_DETACH_TIMEOUT_STREAK: u8 = 3;
let mut removal_timeout_streak = HashMap::<u64, u8>::new();
loop {
tokio::time::sleep(Duration::from_secs(TICK_SECS)).await;
let now = match SystemTime::now().duration_since(UNIX_EPOCH) {
Ok(d) => d.as_secs(),
Err(_) => continue,
};
// Phase 1: collect zombie IDs under a short read-lock with timeout.
let zombie_ids_with_meta: Vec<(u64, bool)> = {
let Ok(ws) = tokio::time::timeout(
Duration::from_secs(LOCK_TIMEOUT_SECS),
pool.writers.read(),
)
.await
else {
warn!("zombie_watchdog: writers read-lock timeout, skipping tick");
continue;
};
ws.iter()
.filter(|w| w.draining.load(std::sync::atomic::Ordering::Relaxed))
.filter_map(|w| {
let deadline = w
.drain_deadline_epoch_secs
.load(std::sync::atomic::Ordering::Relaxed);
if deadline == 0 {
return None;
}
let overdue = now.saturating_sub(deadline);
if overdue == 0 {
return None;
}
let started = w
.draining_started_at_epoch_secs
.load(std::sync::atomic::Ordering::Relaxed);
let drain_age = now.saturating_sub(started);
if drain_age > HARD_THRESHOLD_SECS {
return Some((w.id, true));
}
if overdue > SOFT_THRESHOLD_SECS {
return Some((w.id, false));
}
None
})
.collect()
};
// read lock released here
if zombie_ids_with_meta.is_empty() {
removal_timeout_streak.clear();
continue;
}
let mut active_zombie_ids = HashSet::<u64>::with_capacity(zombie_ids_with_meta.len());
for (writer_id, _) in &zombie_ids_with_meta {
active_zombie_ids.insert(*writer_id);
}
removal_timeout_streak.retain(|writer_id, _| active_zombie_ids.contains(writer_id));
warn!(
zombie_count = zombie_ids_with_meta.len(),
soft_threshold_secs = SOFT_THRESHOLD_SECS,
hard_threshold_secs = HARD_THRESHOLD_SECS,
"Zombie draining writers detected by watchdog, force-closing"
);
// Phase 2: remove each writer individually with a timeout.
// One stuck removal cannot block the rest.
for (writer_id, had_clients) in &zombie_ids_with_meta {
let result = tokio::time::timeout(
Duration::from_secs(REMOVE_TIMEOUT_SECS),
pool.remove_writer_and_close_clients(
*writer_id,
MeWriterTeardownReason::WatchdogStuckDraining,
),
)
.await;
match result {
Ok(true) => {
removal_timeout_streak.remove(writer_id);
pool.stats.increment_pool_force_close_total();
pool.stats
.increment_me_draining_writers_reap_progress_total();
info!(
writer_id,
had_clients,
"Zombie writer removed by watchdog"
);
}
Ok(false) => {
removal_timeout_streak.remove(writer_id);
debug!(
writer_id,
had_clients,
"Zombie writer watchdog removal became no-op"
);
}
Err(_) => {
pool.stats.increment_me_writer_teardown_timeout_total();
let streak = removal_timeout_streak
.entry(*writer_id)
.and_modify(|value| *value = value.saturating_add(1))
.or_insert(1);
warn!(
writer_id,
had_clients,
timeout_streak = *streak,
"Zombie writer removal timed out"
);
if *streak < HARD_DETACH_TIMEOUT_STREAK {
continue;
}
pool.stats.increment_me_writer_teardown_escalation_total();
let hard_detach = tokio::time::timeout(
Duration::from_secs(REMOVE_TIMEOUT_SECS),
pool.remove_draining_writer_hard_detach(
*writer_id,
MeWriterTeardownReason::WatchdogStuckDraining,
),
)
.await;
match hard_detach {
Ok(true) => {
removal_timeout_streak.remove(writer_id);
pool.stats.increment_pool_force_close_total();
pool.stats
.increment_me_draining_writers_reap_progress_total();
info!(
writer_id,
had_clients,
"Zombie writer hard-detached after repeated timeouts"
);
}
Ok(false) => {
removal_timeout_streak.remove(writer_id);
debug!(
writer_id,
had_clients,
"Zombie hard-detach skipped (writer already gone or no longer draining)"
);
}
Err(_) => {
pool.stats.increment_me_writer_teardown_timeout_total();
warn!(
writer_id,
had_clients,
"Zombie hard-detach timed out, will retry next tick"
);
}
}
}
}
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::collections::HashMap; use std::collections::HashMap;
@@ -1479,13 +1964,19 @@ mod tests {
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use super::reap_draining_writers; use super::{
FamilyCircuitState, apply_family_circuit_result, reap_draining_writers,
should_run_family_check,
};
use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode}; use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode};
use crate::crypto::SecureRandom; use crate::crypto::SecureRandom;
use crate::network::IpFamily;
use crate::network::probe::NetworkDecision; use crate::network::probe::NetworkDecision;
use crate::stats::Stats; use crate::stats::Stats;
use crate::transport::middle_proxy::codec::WriterCommand; use crate::transport::middle_proxy::codec::WriterCommand;
use crate::transport::middle_proxy::pool::{MePool, MeWriter, WriterContour}; use crate::transport::middle_proxy::pool::{
MeFamilyRuntimeState, MePool, MeWriter, WriterContour,
};
use crate::transport::middle_proxy::registry::ConnMeta; use crate::transport::middle_proxy::registry::ConnMeta;
async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> { async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> {
@@ -1544,6 +2035,7 @@ mod tests {
general.me_adaptive_floor_max_warm_writers_global, general.me_adaptive_floor_max_warm_writers_global,
general.hardswap, general.hardswap,
general.me_pool_drain_ttl_secs, general.me_pool_drain_ttl_secs,
general.me_instadrain,
general.me_pool_drain_threshold, general.me_pool_drain_threshold,
general.me_pool_drain_soft_evict_enabled, general.me_pool_drain_soft_evict_enabled,
general.me_pool_drain_soft_evict_grace_secs, general.me_pool_drain_soft_evict_grace_secs,
@@ -1662,4 +2154,47 @@ mod tests {
assert_eq!(pool.registry.get_writer(conn_b).await.unwrap().writer_id, 20); assert_eq!(pool.registry.get_writer(conn_b).await.unwrap().writer_id, 20);
assert_eq!(pool.registry.get_writer(conn_c).await.unwrap().writer_id, 30); assert_eq!(pool.registry.get_writer(conn_c).await.unwrap().writer_id, 30);
} }
#[tokio::test]
async fn suppressed_family_probe_skip_preserves_suppressed_state() {
let pool = make_pool(0).await;
let now = Instant::now();
let now_epoch_secs = MePool::now_epoch_secs();
let suppressed_until_epoch_secs = now_epoch_secs.saturating_add(60);
pool.set_family_runtime_state(
IpFamily::V6,
MeFamilyRuntimeState::Suppressed,
now_epoch_secs,
suppressed_until_epoch_secs,
7,
0,
);
let mut circuit = FamilyCircuitState {
state: MeFamilyRuntimeState::Suppressed,
state_since_at: now,
suppressed_until: Some(now + Duration::from_secs(60)),
next_probe_at: now + Duration::from_secs(5),
fail_streak: 7,
recover_success_streak: 0,
};
assert!(!should_run_family_check(&mut circuit, now));
assert!(!apply_family_circuit_result(
&pool,
IpFamily::V6,
&mut circuit,
None,
true,
now,
now_epoch_secs,
));
assert_eq!(circuit.state, MeFamilyRuntimeState::Suppressed);
assert_eq!(circuit.fail_streak, 7);
assert_eq!(circuit.recover_success_streak, 0);
assert_eq!(
pool.family_runtime_state(IpFamily::V6),
MeFamilyRuntimeState::Suppressed,
);
}
} }

View File

@@ -81,6 +81,7 @@ async fn make_pool(
general.me_adaptive_floor_max_warm_writers_global, general.me_adaptive_floor_max_warm_writers_global,
general.hardswap, general.hardswap,
general.me_pool_drain_ttl_secs, general.me_pool_drain_ttl_secs,
general.me_instadrain,
general.me_pool_drain_threshold, general.me_pool_drain_threshold,
general.me_pool_drain_soft_evict_enabled, general.me_pool_drain_soft_evict_enabled,
general.me_pool_drain_soft_evict_grace_secs, general.me_pool_drain_soft_evict_grace_secs,
@@ -213,7 +214,7 @@ async fn reap_draining_writers_respects_threshold_across_multiple_overflow_cycle
insert_draining_writer( insert_draining_writer(
&pool, &pool,
writer_id, writer_id,
now_epoch_secs.saturating_sub(600).saturating_add(writer_id), now_epoch_secs.saturating_sub(20),
1, 1,
0, 0,
) )
@@ -230,7 +231,7 @@ async fn reap_draining_writers_respects_threshold_across_multiple_overflow_cycle
} }
assert_eq!(writer_count(&pool).await, threshold as usize); assert_eq!(writer_count(&pool).await, threshold as usize);
assert_eq!(sorted_writer_ids(&pool).await, vec![58, 59, 60]); assert_eq!(sorted_writer_ids(&pool).await, vec![1, 2, 3]);
} }
#[tokio::test] #[tokio::test]
@@ -315,7 +316,12 @@ async fn reap_draining_writers_maintains_warn_state_subset_property_under_bulk_c
let ids = sorted_writer_ids(&pool).await; let ids = sorted_writer_ids(&pool).await;
for writer_id in ids.into_iter().take(3) { for writer_id in ids.into_iter().take(3) {
let _ = pool.remove_writer_and_close_clients(writer_id).await; let _ = pool
.remove_writer_and_close_clients(
writer_id,
crate::stats::MeWriterTeardownReason::ReapEmpty,
)
.await;
} }
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;

View File

@@ -80,6 +80,7 @@ async fn make_pool(
general.me_adaptive_floor_max_warm_writers_global, general.me_adaptive_floor_max_warm_writers_global,
general.hardswap, general.hardswap,
general.me_pool_drain_ttl_secs, general.me_pool_drain_ttl_secs,
general.me_instadrain,
general.me_pool_drain_threshold, general.me_pool_drain_threshold,
general.me_pool_drain_soft_evict_enabled, general.me_pool_drain_soft_evict_enabled,
general.me_pool_drain_soft_evict_grace_secs, general.me_pool_drain_soft_evict_grace_secs,

View File

@@ -4,6 +4,7 @@ use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use bytes::Bytes;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
@@ -11,7 +12,9 @@ use super::codec::WriterCommand;
use super::health::{health_drain_close_budget, reap_draining_writers}; use super::health::{health_drain_close_budget, reap_draining_writers};
use super::pool::{MePool, MeWriter, WriterContour}; use super::pool::{MePool, MeWriter, WriterContour};
use super::registry::ConnMeta; use super::registry::ConnMeta;
use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode}; use crate::config::{
GeneralConfig, MeBindStaleMode, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode,
};
use crate::crypto::SecureRandom; use crate::crypto::SecureRandom;
use crate::network::probe::NetworkDecision; use crate::network::probe::NetworkDecision;
use crate::stats::Stats; use crate::stats::Stats;
@@ -73,6 +76,7 @@ async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> {
general.me_adaptive_floor_max_warm_writers_global, general.me_adaptive_floor_max_warm_writers_global,
general.hardswap, general.hardswap,
general.me_pool_drain_ttl_secs, general.me_pool_drain_ttl_secs,
general.me_instadrain,
general.me_pool_drain_threshold, general.me_pool_drain_threshold,
general.me_pool_drain_soft_evict_enabled, general.me_pool_drain_soft_evict_enabled,
general.me_pool_drain_soft_evict_grace_secs, general.me_pool_drain_soft_evict_grace_secs,
@@ -179,15 +183,23 @@ async fn current_writer_ids(pool: &Arc<MePool>) -> Vec<u64> {
async fn reap_draining_writers_drops_warn_state_for_removed_writer() { async fn reap_draining_writers_drops_warn_state_for_removed_writer() {
let pool = make_pool(128).await; let pool = make_pool(128).await;
let now_epoch_secs = MePool::now_epoch_secs(); let now_epoch_secs = MePool::now_epoch_secs();
let conn_ids = let conn_ids = insert_draining_writer(
insert_draining_writer(&pool, 7, now_epoch_secs.saturating_sub(180), 1, 0).await; &pool,
7,
now_epoch_secs.saturating_sub(180),
1,
now_epoch_secs.saturating_add(3_600),
)
.await;
let mut warn_next_allowed = HashMap::new(); let mut warn_next_allowed = HashMap::new();
let mut soft_evict_next_allowed = HashMap::new(); let mut soft_evict_next_allowed = HashMap::new();
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
assert!(warn_next_allowed.contains_key(&7)); assert!(warn_next_allowed.contains_key(&7));
let _ = pool.remove_writer_and_close_clients(7).await; let _ = pool
.remove_writer_and_close_clients(7, crate::stats::MeWriterTeardownReason::ReapEmpty)
.await;
assert!(pool.registry.get_writer(conn_ids[0]).await.is_none()); assert!(pool.registry.get_writer(conn_ids[0]).await.is_none());
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
@@ -209,6 +221,89 @@ async fn reap_draining_writers_removes_empty_draining_writers() {
assert_eq!(current_writer_ids(&pool).await, vec![3]); assert_eq!(current_writer_ids(&pool).await, vec![3]);
} }
#[tokio::test]
async fn reap_draining_writers_does_not_block_on_stuck_writer_close_signal() {
let pool = make_pool(128).await;
let now_epoch_secs = MePool::now_epoch_secs();
let (blocked_tx, blocked_rx) = mpsc::channel::<WriterCommand>(1);
assert!(
blocked_tx
.try_send(WriterCommand::Data(Bytes::from_static(b"stuck")))
.is_ok()
);
let blocked_rx_guard = tokio::spawn(async move {
let _hold_rx = blocked_rx;
tokio::time::sleep(Duration::from_secs(30)).await;
});
let blocked_writer_id = 90u64;
let blocked_writer = MeWriter {
id: blocked_writer_id,
addr: SocketAddr::new(
IpAddr::V4(Ipv4Addr::LOCALHOST),
4500 + blocked_writer_id as u16,
),
source_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
writer_dc: 2,
generation: 1,
contour: Arc::new(AtomicU8::new(WriterContour::Draining.as_u8())),
created_at: Instant::now() - Duration::from_secs(blocked_writer_id),
tx: blocked_tx.clone(),
cancel: CancellationToken::new(),
degraded: Arc::new(AtomicBool::new(false)),
rtt_ema_ms_x10: Arc::new(AtomicU32::new(0)),
draining: Arc::new(AtomicBool::new(true)),
draining_started_at_epoch_secs: Arc::new(AtomicU64::new(
now_epoch_secs.saturating_sub(120),
)),
drain_deadline_epoch_secs: Arc::new(AtomicU64::new(0)),
allow_drain_fallback: Arc::new(AtomicBool::new(false)),
};
pool.writers.write().await.push(blocked_writer);
pool.registry
.register_writer(blocked_writer_id, blocked_tx)
.await;
pool.conn_count.fetch_add(1, Ordering::Relaxed);
insert_draining_writer(&pool, 91, now_epoch_secs.saturating_sub(110), 0, 0).await;
let mut warn_next_allowed = HashMap::new();
let mut soft_evict_next_allowed = HashMap::new();
let reap_res = tokio::time::timeout(
Duration::from_millis(500),
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed),
)
.await;
blocked_rx_guard.abort();
assert!(reap_res.is_ok(), "reap should not block on close signal");
assert!(current_writer_ids(&pool).await.is_empty());
assert_eq!(pool.stats.get_me_writer_close_signal_drop_total(), 2);
assert_eq!(pool.stats.get_me_writer_close_signal_channel_full_total(), 1);
assert_eq!(pool.stats.get_me_draining_writers_reap_progress_total(), 2);
let activity = pool.registry.writer_activity_snapshot().await;
assert!(!activity.bound_clients_by_writer.contains_key(&blocked_writer_id));
assert!(!activity.bound_clients_by_writer.contains_key(&91));
let (probe_conn_id, _rx) = pool.registry.register().await;
assert!(
!pool.registry
.bind_writer(
probe_conn_id,
blocked_writer_id,
ConnMeta {
target_dc: 2,
client_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 6400),
our_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443),
proto_flags: 0,
},
)
.await
);
let _ = pool.registry.unregister(probe_conn_id).await;
}
#[tokio::test] #[tokio::test]
async fn reap_draining_writers_overflow_closes_oldest_non_empty_writers() { async fn reap_draining_writers_overflow_closes_oldest_non_empty_writers() {
let pool = make_pool(2).await; let pool = make_pool(2).await;
@@ -247,17 +342,17 @@ async fn reap_draining_writers_deadline_force_close_applies_under_threshold() {
#[tokio::test] #[tokio::test]
async fn reap_draining_writers_limits_closes_per_health_tick() { async fn reap_draining_writers_limits_closes_per_health_tick() {
let pool = make_pool(128).await; let pool = make_pool(1).await;
let now_epoch_secs = MePool::now_epoch_secs(); let now_epoch_secs = MePool::now_epoch_secs();
let close_budget = health_drain_close_budget(); let close_budget = health_drain_close_budget();
let writer_total = close_budget.saturating_add(19); let writer_total = close_budget.saturating_add(20);
for writer_id in 1..=writer_total as u64 { for writer_id in 1..=writer_total as u64 {
insert_draining_writer( insert_draining_writer(
&pool, &pool,
writer_id, writer_id,
now_epoch_secs.saturating_sub(20), now_epoch_secs.saturating_sub(20),
1, 1,
now_epoch_secs.saturating_sub(1), 0,
) )
.await; .await;
} }
@@ -280,8 +375,8 @@ async fn reap_draining_writers_backlog_drains_across_ticks() {
&pool, &pool,
writer_id, writer_id,
now_epoch_secs.saturating_sub(20), now_epoch_secs.saturating_sub(20),
1, 0,
now_epoch_secs.saturating_sub(1), 0,
) )
.await; .await;
} }
@@ -309,7 +404,7 @@ async fn reap_draining_writers_threshold_backlog_converges_to_threshold() {
insert_draining_writer( insert_draining_writer(
&pool, &pool,
writer_id, writer_id,
now_epoch_secs.saturating_sub(200).saturating_add(writer_id), now_epoch_secs.saturating_sub(20),
1, 1,
0, 0,
) )
@@ -345,27 +440,27 @@ async fn reap_draining_writers_threshold_zero_preserves_non_expired_non_empty_wr
#[tokio::test] #[tokio::test]
async fn reap_draining_writers_prioritizes_force_close_before_empty_cleanup() { async fn reap_draining_writers_prioritizes_force_close_before_empty_cleanup() {
let pool = make_pool(128).await; let pool = make_pool(1).await;
let now_epoch_secs = MePool::now_epoch_secs(); let now_epoch_secs = MePool::now_epoch_secs();
let close_budget = health_drain_close_budget(); let close_budget = health_drain_close_budget();
for writer_id in 1..=close_budget as u64 { for writer_id in 1..=close_budget.saturating_add(1) as u64 {
insert_draining_writer( insert_draining_writer(
&pool, &pool,
writer_id, writer_id,
now_epoch_secs.saturating_sub(20), now_epoch_secs.saturating_sub(20),
1, 1,
now_epoch_secs.saturating_sub(1), 0,
) )
.await; .await;
} }
let empty_writer_id = close_budget as u64 + 1; let empty_writer_id = close_budget.saturating_add(2) as u64;
insert_draining_writer(&pool, empty_writer_id, now_epoch_secs.saturating_sub(20), 0, 0).await; insert_draining_writer(&pool, empty_writer_id, now_epoch_secs.saturating_sub(20), 0, 0).await;
let mut warn_next_allowed = HashMap::new(); let mut warn_next_allowed = HashMap::new();
let mut soft_evict_next_allowed = HashMap::new(); let mut soft_evict_next_allowed = HashMap::new();
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
assert_eq!(current_writer_ids(&pool).await, vec![empty_writer_id]); assert_eq!(current_writer_ids(&pool).await, vec![1, empty_writer_id]);
} }
#[tokio::test] #[tokio::test]
@@ -434,7 +529,12 @@ async fn reap_draining_writers_warn_state_never_exceeds_live_draining_population
let existing_writer_ids = current_writer_ids(&pool).await; let existing_writer_ids = current_writer_ids(&pool).await;
for writer_id in existing_writer_ids.into_iter().take(4) { for writer_id in existing_writer_ids.into_iter().take(4) {
let _ = pool.remove_writer_and_close_clients(writer_id).await; let _ = pool
.remove_writer_and_close_clients(
writer_id,
crate::stats::MeWriterTeardownReason::ReapEmpty,
)
.await;
} }
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
assert!(warn_next_allowed.len() <= pool.writers.read().await.len()); assert!(warn_next_allowed.len() <= pool.writers.read().await.len());
@@ -487,7 +587,14 @@ async fn reap_draining_writers_soft_evicts_stuck_writer_with_per_writer_cap() {
.store(1, Ordering::Relaxed); .store(1, Ordering::Relaxed);
let now_epoch_secs = MePool::now_epoch_secs(); let now_epoch_secs = MePool::now_epoch_secs();
insert_draining_writer(&pool, 77, now_epoch_secs.saturating_sub(240), 3, 0).await; insert_draining_writer(
&pool,
77,
now_epoch_secs.saturating_sub(240),
3,
now_epoch_secs.saturating_add(3_600),
)
.await;
let mut warn_next_allowed = HashMap::new(); let mut warn_next_allowed = HashMap::new();
let mut soft_evict_next_allowed = HashMap::new(); let mut soft_evict_next_allowed = HashMap::new();
@@ -511,7 +618,14 @@ async fn reap_draining_writers_soft_evict_respects_cooldown_per_writer() {
.store(60_000, Ordering::Relaxed); .store(60_000, Ordering::Relaxed);
let now_epoch_secs = MePool::now_epoch_secs(); let now_epoch_secs = MePool::now_epoch_secs();
insert_draining_writer(&pool, 88, now_epoch_secs.saturating_sub(240), 3, 0).await; insert_draining_writer(
&pool,
88,
now_epoch_secs.saturating_sub(240),
3,
now_epoch_secs.saturating_add(3_600),
)
.await;
let mut warn_next_allowed = HashMap::new(); let mut warn_next_allowed = HashMap::new();
let mut soft_evict_next_allowed = HashMap::new(); let mut soft_evict_next_allowed = HashMap::new();
@@ -524,12 +638,40 @@ async fn reap_draining_writers_soft_evict_respects_cooldown_per_writer() {
assert_eq!(pool.stats.get_pool_drain_soft_evict_writer_total(), 1); assert_eq!(pool.stats.get_pool_drain_soft_evict_writer_total(), 1);
} }
#[tokio::test]
async fn reap_draining_writers_instadrain_removes_non_expired_writers_immediately() {
let pool = make_pool(0).await;
pool.me_instadrain.store(true, Ordering::Relaxed);
let now_epoch_secs = MePool::now_epoch_secs();
insert_draining_writer(&pool, 101, now_epoch_secs.saturating_sub(5), 1, 0).await;
insert_draining_writer(&pool, 102, now_epoch_secs.saturating_sub(4), 1, 0).await;
let mut warn_next_allowed = HashMap::new();
let mut soft_evict_next_allowed = HashMap::new();
reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await;
assert!(current_writer_ids(&pool).await.is_empty());
}
#[test] #[test]
fn general_config_default_drain_threshold_remains_enabled() { fn general_config_default_drain_threshold_remains_enabled() {
assert_eq!(GeneralConfig::default().me_pool_drain_threshold, 128); assert_eq!(GeneralConfig::default().me_pool_drain_threshold, 32);
assert!(GeneralConfig::default().me_pool_drain_soft_evict_enabled); assert!(GeneralConfig::default().me_pool_drain_soft_evict_enabled);
assert_eq!( assert_eq!(
GeneralConfig::default().me_pool_drain_soft_evict_per_writer, GeneralConfig::default().me_pool_drain_soft_evict_grace_secs,
1 10
); );
assert_eq!(
GeneralConfig::default().me_pool_drain_soft_evict_per_writer,
2
);
assert_eq!(
GeneralConfig::default().me_pool_drain_soft_evict_budget_per_core,
16
);
assert_eq!(
GeneralConfig::default().me_pool_drain_soft_evict_cooldown_ms,
1000
);
assert_eq!(GeneralConfig::default().me_bind_stale_mode, MeBindStaleMode::Never);
} }

View File

@@ -30,7 +30,7 @@ mod health_adversarial_tests;
use bytes::Bytes; use bytes::Bytes;
pub use health::me_health_monitor; pub use health::{me_drain_timeout_enforcer, me_health_monitor, me_zombie_writer_watchdog};
#[allow(unused_imports)] #[allow(unused_imports)]
pub use ping::{run_me_ping, format_sample_line, format_me_route, MePingReport, MePingSample, MePingFamily}; pub use ping::{run_me_ping, format_sample_line, format_me_route, MePingReport, MePingSample, MePingFamily};
pub use pool::MePool; pub use pool::MePool;

View File

@@ -18,6 +18,8 @@ use crate::transport::UpstreamManager;
use super::ConnRegistry; use super::ConnRegistry;
use super::codec::WriterCommand; use super::codec::WriterCommand;
const ME_FORCE_CLOSE_SAFETY_FALLBACK_SECS: u64 = 300;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) struct RefillDcKey { pub(super) struct RefillDcKey {
pub dc: i32, pub dc: i32,
@@ -72,6 +74,64 @@ impl WriterContour {
} }
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub(crate) enum MeFamilyRuntimeState {
Healthy = 0,
Degraded = 1,
Suppressed = 2,
Recovering = 3,
}
impl MeFamilyRuntimeState {
pub(crate) fn from_u8(value: u8) -> Self {
match value {
1 => Self::Degraded,
2 => Self::Suppressed,
3 => Self::Recovering,
_ => Self::Healthy,
}
}
pub(crate) fn as_str(self) -> &'static str {
match self {
Self::Healthy => "healthy",
Self::Degraded => "degraded",
Self::Suppressed => "suppressed",
Self::Recovering => "recovering",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub(crate) enum MeDrainGateReason {
Open = 0,
CoverageQuorum = 1,
Redundancy = 2,
SuppressionActive = 3,
}
impl MeDrainGateReason {
pub(crate) fn from_u8(value: u8) -> Self {
match value {
1 => Self::CoverageQuorum,
2 => Self::Redundancy,
3 => Self::SuppressionActive,
_ => Self::Open,
}
}
pub(crate) fn as_str(self) -> &'static str {
match self {
Self::Open => "open",
Self::CoverageQuorum => "coverage_quorum",
Self::Redundancy => "redundancy",
Self::SuppressionActive => "suppression_active",
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct SecretSnapshot { pub struct SecretSnapshot {
pub epoch: u64, pub epoch: u64,
@@ -171,6 +231,7 @@ pub struct MePool {
pub(super) endpoint_quarantine: Arc<Mutex<HashMap<SocketAddr, Instant>>>, pub(super) endpoint_quarantine: Arc<Mutex<HashMap<SocketAddr, Instant>>>,
pub(super) kdf_material_fingerprint: Arc<RwLock<HashMap<SocketAddr, (u64, u16)>>>, pub(super) kdf_material_fingerprint: Arc<RwLock<HashMap<SocketAddr, (u64, u16)>>>,
pub(super) me_pool_drain_ttl_secs: AtomicU64, pub(super) me_pool_drain_ttl_secs: AtomicU64,
pub(super) me_instadrain: AtomicBool,
pub(super) me_pool_drain_threshold: AtomicU64, pub(super) me_pool_drain_threshold: AtomicU64,
pub(super) me_pool_drain_soft_evict_enabled: AtomicBool, pub(super) me_pool_drain_soft_evict_enabled: AtomicBool,
pub(super) me_pool_drain_soft_evict_grace_secs: AtomicU64, pub(super) me_pool_drain_soft_evict_grace_secs: AtomicU64,
@@ -200,6 +261,20 @@ pub struct MePool {
pub(super) me_health_interval_ms_unhealthy: AtomicU64, pub(super) me_health_interval_ms_unhealthy: AtomicU64,
pub(super) me_health_interval_ms_healthy: AtomicU64, pub(super) me_health_interval_ms_healthy: AtomicU64,
pub(super) me_warn_rate_limit_ms: AtomicU64, pub(super) me_warn_rate_limit_ms: AtomicU64,
pub(super) me_family_v4_runtime_state: AtomicU8,
pub(super) me_family_v6_runtime_state: AtomicU8,
pub(super) me_family_v4_state_since_epoch_secs: AtomicU64,
pub(super) me_family_v6_state_since_epoch_secs: AtomicU64,
pub(super) me_family_v4_suppressed_until_epoch_secs: AtomicU64,
pub(super) me_family_v6_suppressed_until_epoch_secs: AtomicU64,
pub(super) me_family_v4_fail_streak: AtomicU32,
pub(super) me_family_v6_fail_streak: AtomicU32,
pub(super) me_family_v4_recover_success_streak: AtomicU32,
pub(super) me_family_v6_recover_success_streak: AtomicU32,
pub(super) me_last_drain_gate_route_quorum_ok: AtomicBool,
pub(super) me_last_drain_gate_redundancy_ok: AtomicBool,
pub(super) me_last_drain_gate_block_reason: AtomicU8,
pub(super) me_last_drain_gate_updated_at_epoch_secs: AtomicU64,
pub(super) runtime_ready: AtomicBool, pub(super) runtime_ready: AtomicBool,
pool_size: usize, pool_size: usize,
pub(super) preferred_endpoints_by_dc: Arc<RwLock<HashMap<i32, Vec<SocketAddr>>>>, pub(super) preferred_endpoints_by_dc: Arc<RwLock<HashMap<i32, Vec<SocketAddr>>>>,
@@ -228,6 +303,14 @@ impl MePool {
.as_secs() .as_secs()
} }
fn normalize_force_close_secs(force_close_secs: u64) -> u64 {
if force_close_secs == 0 {
ME_FORCE_CLOSE_SAFETY_FALLBACK_SECS
} else {
force_close_secs
}
}
pub fn new( pub fn new(
proxy_tag: Option<Vec<u8>>, proxy_tag: Option<Vec<u8>>,
proxy_secret: Vec<u8>, proxy_secret: Vec<u8>,
@@ -279,6 +362,7 @@ impl MePool {
me_adaptive_floor_max_warm_writers_global: u32, me_adaptive_floor_max_warm_writers_global: u32,
hardswap: bool, hardswap: bool,
me_pool_drain_ttl_secs: u64, me_pool_drain_ttl_secs: u64,
me_instadrain: bool,
me_pool_drain_threshold: u64, me_pool_drain_threshold: u64,
me_pool_drain_soft_evict_enabled: bool, me_pool_drain_soft_evict_enabled: bool,
me_pool_drain_soft_evict_grace_secs: u64, me_pool_drain_soft_evict_grace_secs: u64,
@@ -462,6 +546,7 @@ impl MePool {
endpoint_quarantine: Arc::new(Mutex::new(HashMap::new())), endpoint_quarantine: Arc::new(Mutex::new(HashMap::new())),
kdf_material_fingerprint: Arc::new(RwLock::new(HashMap::new())), kdf_material_fingerprint: Arc::new(RwLock::new(HashMap::new())),
me_pool_drain_ttl_secs: AtomicU64::new(me_pool_drain_ttl_secs), me_pool_drain_ttl_secs: AtomicU64::new(me_pool_drain_ttl_secs),
me_instadrain: AtomicBool::new(me_instadrain),
me_pool_drain_threshold: AtomicU64::new(me_pool_drain_threshold), me_pool_drain_threshold: AtomicU64::new(me_pool_drain_threshold),
me_pool_drain_soft_evict_enabled: AtomicBool::new(me_pool_drain_soft_evict_enabled), me_pool_drain_soft_evict_enabled: AtomicBool::new(me_pool_drain_soft_evict_enabled),
me_pool_drain_soft_evict_grace_secs: AtomicU64::new(me_pool_drain_soft_evict_grace_secs), me_pool_drain_soft_evict_grace_secs: AtomicU64::new(me_pool_drain_soft_evict_grace_secs),
@@ -474,7 +559,9 @@ impl MePool {
me_pool_drain_soft_evict_cooldown_ms: AtomicU64::new( me_pool_drain_soft_evict_cooldown_ms: AtomicU64::new(
me_pool_drain_soft_evict_cooldown_ms.max(1), me_pool_drain_soft_evict_cooldown_ms.max(1),
), ),
me_pool_force_close_secs: AtomicU64::new(me_pool_force_close_secs), me_pool_force_close_secs: AtomicU64::new(Self::normalize_force_close_secs(
me_pool_force_close_secs,
)),
me_pool_min_fresh_ratio_permille: AtomicU32::new(Self::ratio_to_permille( me_pool_min_fresh_ratio_permille: AtomicU32::new(Self::ratio_to_permille(
me_pool_min_fresh_ratio, me_pool_min_fresh_ratio,
)), )),
@@ -503,6 +590,20 @@ impl MePool {
me_health_interval_ms_unhealthy: AtomicU64::new(me_health_interval_ms_unhealthy.max(1)), me_health_interval_ms_unhealthy: AtomicU64::new(me_health_interval_ms_unhealthy.max(1)),
me_health_interval_ms_healthy: AtomicU64::new(me_health_interval_ms_healthy.max(1)), me_health_interval_ms_healthy: AtomicU64::new(me_health_interval_ms_healthy.max(1)),
me_warn_rate_limit_ms: AtomicU64::new(me_warn_rate_limit_ms.max(1)), me_warn_rate_limit_ms: AtomicU64::new(me_warn_rate_limit_ms.max(1)),
me_family_v4_runtime_state: AtomicU8::new(MeFamilyRuntimeState::Healthy as u8),
me_family_v6_runtime_state: AtomicU8::new(MeFamilyRuntimeState::Healthy as u8),
me_family_v4_state_since_epoch_secs: AtomicU64::new(Self::now_epoch_secs()),
me_family_v6_state_since_epoch_secs: AtomicU64::new(Self::now_epoch_secs()),
me_family_v4_suppressed_until_epoch_secs: AtomicU64::new(0),
me_family_v6_suppressed_until_epoch_secs: AtomicU64::new(0),
me_family_v4_fail_streak: AtomicU32::new(0),
me_family_v6_fail_streak: AtomicU32::new(0),
me_family_v4_recover_success_streak: AtomicU32::new(0),
me_family_v6_recover_success_streak: AtomicU32::new(0),
me_last_drain_gate_route_quorum_ok: AtomicBool::new(false),
me_last_drain_gate_redundancy_ok: AtomicBool::new(false),
me_last_drain_gate_block_reason: AtomicU8::new(MeDrainGateReason::Open as u8),
me_last_drain_gate_updated_at_epoch_secs: AtomicU64::new(Self::now_epoch_secs()),
runtime_ready: AtomicBool::new(false), runtime_ready: AtomicBool::new(false),
preferred_endpoints_by_dc: Arc::new(RwLock::new(preferred_endpoints_by_dc)), preferred_endpoints_by_dc: Arc::new(RwLock::new(preferred_endpoints_by_dc)),
}) })
@@ -520,10 +621,158 @@ impl MePool {
self.runtime_ready.load(Ordering::Relaxed) self.runtime_ready.load(Ordering::Relaxed)
} }
pub(super) fn set_family_runtime_state(
&self,
family: IpFamily,
state: MeFamilyRuntimeState,
state_since_epoch_secs: u64,
suppressed_until_epoch_secs: u64,
fail_streak: u32,
recover_success_streak: u32,
) {
match family {
IpFamily::V4 => {
self.me_family_v4_runtime_state
.store(state as u8, Ordering::Relaxed);
self.me_family_v4_state_since_epoch_secs
.store(state_since_epoch_secs, Ordering::Relaxed);
self.me_family_v4_suppressed_until_epoch_secs
.store(suppressed_until_epoch_secs, Ordering::Relaxed);
self.me_family_v4_fail_streak
.store(fail_streak, Ordering::Relaxed);
self.me_family_v4_recover_success_streak
.store(recover_success_streak, Ordering::Relaxed);
}
IpFamily::V6 => {
self.me_family_v6_runtime_state
.store(state as u8, Ordering::Relaxed);
self.me_family_v6_state_since_epoch_secs
.store(state_since_epoch_secs, Ordering::Relaxed);
self.me_family_v6_suppressed_until_epoch_secs
.store(suppressed_until_epoch_secs, Ordering::Relaxed);
self.me_family_v6_fail_streak
.store(fail_streak, Ordering::Relaxed);
self.me_family_v6_recover_success_streak
.store(recover_success_streak, Ordering::Relaxed);
}
}
}
pub(crate) fn family_runtime_state(&self, family: IpFamily) -> MeFamilyRuntimeState {
match family {
IpFamily::V4 => MeFamilyRuntimeState::from_u8(
self.me_family_v4_runtime_state.load(Ordering::Relaxed),
),
IpFamily::V6 => MeFamilyRuntimeState::from_u8(
self.me_family_v6_runtime_state.load(Ordering::Relaxed),
),
}
}
pub(crate) fn family_runtime_state_since_epoch_secs(&self, family: IpFamily) -> u64 {
match family {
IpFamily::V4 => self
.me_family_v4_state_since_epoch_secs
.load(Ordering::Relaxed),
IpFamily::V6 => self
.me_family_v6_state_since_epoch_secs
.load(Ordering::Relaxed),
}
}
pub(crate) fn family_suppressed_until_epoch_secs(&self, family: IpFamily) -> u64 {
match family {
IpFamily::V4 => self
.me_family_v4_suppressed_until_epoch_secs
.load(Ordering::Relaxed),
IpFamily::V6 => self
.me_family_v6_suppressed_until_epoch_secs
.load(Ordering::Relaxed),
}
}
pub(crate) fn family_fail_streak(&self, family: IpFamily) -> u32 {
match family {
IpFamily::V4 => self.me_family_v4_fail_streak.load(Ordering::Relaxed),
IpFamily::V6 => self.me_family_v6_fail_streak.load(Ordering::Relaxed),
}
}
pub(crate) fn family_recover_success_streak(&self, family: IpFamily) -> u32 {
match family {
IpFamily::V4 => self
.me_family_v4_recover_success_streak
.load(Ordering::Relaxed),
IpFamily::V6 => self
.me_family_v6_recover_success_streak
.load(Ordering::Relaxed),
}
}
pub(crate) fn is_family_temporarily_suppressed(
&self,
family: IpFamily,
now_epoch_secs: u64,
) -> bool {
self.family_suppressed_until_epoch_secs(family) > now_epoch_secs
}
pub(super) fn family_enabled_for_drain_coverage(
&self,
family: IpFamily,
now_epoch_secs: u64,
) -> bool {
let configured = match family {
IpFamily::V4 => self.decision.ipv4_me,
IpFamily::V6 => self.decision.ipv6_me,
};
configured && !self.is_family_temporarily_suppressed(family, now_epoch_secs)
}
pub(super) fn set_last_drain_gate(
&self,
route_quorum_ok: bool,
redundancy_ok: bool,
block_reason: MeDrainGateReason,
updated_at_epoch_secs: u64,
) {
self.me_last_drain_gate_route_quorum_ok
.store(route_quorum_ok, Ordering::Relaxed);
self.me_last_drain_gate_redundancy_ok
.store(redundancy_ok, Ordering::Relaxed);
self.me_last_drain_gate_block_reason
.store(block_reason as u8, Ordering::Relaxed);
self.me_last_drain_gate_updated_at_epoch_secs
.store(updated_at_epoch_secs, Ordering::Relaxed);
}
pub(crate) fn last_drain_gate_route_quorum_ok(&self) -> bool {
self.me_last_drain_gate_route_quorum_ok
.load(Ordering::Relaxed)
}
pub(crate) fn last_drain_gate_redundancy_ok(&self) -> bool {
self.me_last_drain_gate_redundancy_ok
.load(Ordering::Relaxed)
}
pub(crate) fn last_drain_gate_block_reason(&self) -> MeDrainGateReason {
MeDrainGateReason::from_u8(
self.me_last_drain_gate_block_reason
.load(Ordering::Relaxed),
)
}
pub(crate) fn last_drain_gate_updated_at_epoch_secs(&self) -> u64 {
self.me_last_drain_gate_updated_at_epoch_secs
.load(Ordering::Relaxed)
}
pub fn update_runtime_reinit_policy( pub fn update_runtime_reinit_policy(
&self, &self,
hardswap: bool, hardswap: bool,
drain_ttl_secs: u64, drain_ttl_secs: u64,
instadrain: bool,
pool_drain_threshold: u64, pool_drain_threshold: u64,
pool_drain_soft_evict_enabled: bool, pool_drain_soft_evict_enabled: bool,
pool_drain_soft_evict_grace_secs: u64, pool_drain_soft_evict_grace_secs: u64,
@@ -568,6 +817,7 @@ impl MePool {
self.hardswap.store(hardswap, Ordering::Relaxed); self.hardswap.store(hardswap, Ordering::Relaxed);
self.me_pool_drain_ttl_secs self.me_pool_drain_ttl_secs
.store(drain_ttl_secs, Ordering::Relaxed); .store(drain_ttl_secs, Ordering::Relaxed);
self.me_instadrain.store(instadrain, Ordering::Relaxed);
self.me_pool_drain_threshold self.me_pool_drain_threshold
.store(pool_drain_threshold, Ordering::Relaxed); .store(pool_drain_threshold, Ordering::Relaxed);
self.me_pool_drain_soft_evict_enabled self.me_pool_drain_soft_evict_enabled
@@ -582,8 +832,10 @@ impl MePool {
); );
self.me_pool_drain_soft_evict_cooldown_ms self.me_pool_drain_soft_evict_cooldown_ms
.store(pool_drain_soft_evict_cooldown_ms.max(1), Ordering::Relaxed); .store(pool_drain_soft_evict_cooldown_ms.max(1), Ordering::Relaxed);
self.me_pool_force_close_secs self.me_pool_force_close_secs.store(
.store(force_close_secs, Ordering::Relaxed); Self::normalize_force_close_secs(force_close_secs),
Ordering::Relaxed,
);
self.me_pool_min_fresh_ratio_permille self.me_pool_min_fresh_ratio_permille
.store(Self::ratio_to_permille(min_fresh_ratio), Ordering::Relaxed); .store(Self::ratio_to_permille(min_fresh_ratio), Ordering::Relaxed);
self.me_hardswap_warmup_delay_min_ms self.me_hardswap_warmup_delay_min_ms
@@ -728,13 +980,10 @@ impl MePool {
} }
pub(super) fn force_close_timeout(&self) -> Option<Duration> { pub(super) fn force_close_timeout(&self) -> Option<Duration> {
let secs = self.me_pool_force_close_secs.load(Ordering::Relaxed); let secs =
if secs == 0 { Self::normalize_force_close_secs(self.me_pool_force_close_secs.load(Ordering::Relaxed));
None
} else {
Some(Duration::from_secs(secs)) Some(Duration::from_secs(secs))
} }
}
pub(super) fn drain_soft_evict_enabled(&self) -> bool { pub(super) fn drain_soft_evict_enabled(&self) -> bool {
self.me_pool_drain_soft_evict_enabled self.me_pool_drain_soft_evict_enabled
@@ -1005,9 +1254,10 @@ impl MePool {
} }
pub(super) async fn active_coverage_required_total(&self) -> usize { pub(super) async fn active_coverage_required_total(&self) -> usize {
let now_epoch_secs = Self::now_epoch_secs();
let mut endpoints_by_dc = HashMap::<i32, HashSet<SocketAddr>>::new(); let mut endpoints_by_dc = HashMap::<i32, HashSet<SocketAddr>>::new();
if self.decision.ipv4_me { if self.family_enabled_for_drain_coverage(IpFamily::V4, now_epoch_secs) {
let map = self.proxy_map_v4.read().await; let map = self.proxy_map_v4.read().await;
for (dc, addrs) in map.iter() { for (dc, addrs) in map.iter() {
let entry = endpoints_by_dc.entry(*dc).or_default(); let entry = endpoints_by_dc.entry(*dc).or_default();
@@ -1017,7 +1267,7 @@ impl MePool {
} }
} }
if self.decision.ipv6_me { if self.family_enabled_for_drain_coverage(IpFamily::V6, now_epoch_secs) {
let map = self.proxy_map_v6.read().await; let map = self.proxy_map_v6.read().await;
for (dc, addrs) in map.iter() { for (dc, addrs) in map.iter() {
let entry = endpoints_by_dc.entry(*dc).or_default(); let entry = endpoints_by_dc.entry(*dc).or_default();

View File

@@ -74,9 +74,8 @@ impl MePool {
debug!( debug!(
%addr, %addr,
wait_ms = expiry.saturating_duration_since(now).as_millis(), wait_ms = expiry.saturating_duration_since(now).as_millis(),
"All ME endpoints are quarantined for the DC group; retrying earliest one" "All ME endpoints are quarantined for the DC group; waiting for quarantine expiry"
); );
return vec![addr];
} }
Vec::new() Vec::new()
@@ -165,9 +164,10 @@ impl MePool {
} }
async fn endpoints_for_dc(&self, target_dc: i32) -> Vec<SocketAddr> { async fn endpoints_for_dc(&self, target_dc: i32) -> Vec<SocketAddr> {
let now_epoch_secs = Self::now_epoch_secs();
let mut endpoints = HashSet::<SocketAddr>::new(); let mut endpoints = HashSet::<SocketAddr>::new();
if self.decision.ipv4_me { if self.family_enabled_for_drain_coverage(IpFamily::V4, now_epoch_secs) {
let map = self.proxy_map_v4.read().await; let map = self.proxy_map_v4.read().await;
if let Some(addrs) = map.get(&target_dc) { if let Some(addrs) = map.get(&target_dc) {
for (ip, port) in addrs { for (ip, port) in addrs {
@@ -176,7 +176,7 @@ impl MePool {
} }
} }
if self.decision.ipv6_me { if self.family_enabled_for_drain_coverage(IpFamily::V6, now_epoch_secs) {
let map = self.proxy_map_v6.read().await; let map = self.proxy_map_v6.read().await;
if let Some(addrs) = map.get(&target_dc) { if let Some(addrs) = map.get(&target_dc) {
for (ip, port) in addrs { for (ip, port) in addrs {

View File

@@ -11,8 +11,9 @@ use tracing::{debug, info, warn};
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use crate::crypto::SecureRandom; use crate::crypto::SecureRandom;
use crate::network::IpFamily;
use super::pool::{MePool, WriterContour}; use super::pool::{MeDrainGateReason, MePool, WriterContour};
const ME_HARDSWAP_PENDING_TTL_SECS: u64 = 1800; const ME_HARDSWAP_PENDING_TTL_SECS: u64 = 1800;
@@ -120,9 +121,10 @@ impl MePool {
} }
async fn desired_dc_endpoints(&self) -> HashMap<i32, HashSet<SocketAddr>> { async fn desired_dc_endpoints(&self) -> HashMap<i32, HashSet<SocketAddr>> {
let now_epoch_secs = Self::now_epoch_secs();
let mut out: HashMap<i32, HashSet<SocketAddr>> = HashMap::new(); let mut out: HashMap<i32, HashSet<SocketAddr>> = HashMap::new();
if self.decision.ipv4_me { if self.family_enabled_for_drain_coverage(IpFamily::V4, now_epoch_secs) {
let map_v4 = self.proxy_map_v4.read().await.clone(); let map_v4 = self.proxy_map_v4.read().await.clone();
for (dc, addrs) in map_v4 { for (dc, addrs) in map_v4 {
let entry = out.entry(dc).or_default(); let entry = out.entry(dc).or_default();
@@ -132,7 +134,7 @@ impl MePool {
} }
} }
if self.decision.ipv6_me { if self.family_enabled_for_drain_coverage(IpFamily::V6, now_epoch_secs) {
let map_v6 = self.proxy_map_v6.read().await.clone(); let map_v6 = self.proxy_map_v6.read().await.clone();
for (dc, addrs) in map_v6 { for (dc, addrs) in map_v6 {
let entry = out.entry(dc).or_default(); let entry = out.entry(dc).or_default();
@@ -313,13 +315,23 @@ impl MePool {
pub async fn zero_downtime_reinit_after_map_change(self: &Arc<Self>, rng: &SecureRandom) { pub async fn zero_downtime_reinit_after_map_change(self: &Arc<Self>, rng: &SecureRandom) {
let desired_by_dc = self.desired_dc_endpoints().await; let desired_by_dc = self.desired_dc_endpoints().await;
let now_epoch_secs = Self::now_epoch_secs();
let v4_suppressed = self.is_family_temporarily_suppressed(IpFamily::V4, now_epoch_secs);
let v6_suppressed = self.is_family_temporarily_suppressed(IpFamily::V6, now_epoch_secs);
if desired_by_dc.is_empty() { if desired_by_dc.is_empty() {
warn!("ME endpoint map is empty; skipping stale writer drain"); warn!("ME endpoint map is empty; skipping stale writer drain");
let reason = if (self.decision.ipv4_me && v4_suppressed)
|| (self.decision.ipv6_me && v6_suppressed)
{
MeDrainGateReason::SuppressionActive
} else {
MeDrainGateReason::CoverageQuorum
};
self.set_last_drain_gate(false, false, reason, now_epoch_secs);
return; return;
} }
let desired_map_hash = Self::desired_map_hash(&desired_by_dc); let desired_map_hash = Self::desired_map_hash(&desired_by_dc);
let now_epoch_secs = Self::now_epoch_secs();
let previous_generation = self.current_generation(); let previous_generation = self.current_generation();
let hardswap = self.hardswap.load(Ordering::Relaxed); let hardswap = self.hardswap.load(Ordering::Relaxed);
let generation = if hardswap { let generation = if hardswap {
@@ -390,7 +402,17 @@ impl MePool {
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
); );
let (coverage_ratio, missing_dc) = Self::coverage_ratio(&desired_by_dc, &active_writer_addrs); let (coverage_ratio, missing_dc) = Self::coverage_ratio(&desired_by_dc, &active_writer_addrs);
let mut route_quorum_ok = coverage_ratio >= min_ratio;
let mut redundancy_ok = missing_dc.is_empty();
let mut redundancy_missing_dc = missing_dc.clone();
let mut gate_coverage_ratio = coverage_ratio;
if !hardswap && coverage_ratio < min_ratio { if !hardswap && coverage_ratio < min_ratio {
self.set_last_drain_gate(
false,
redundancy_ok,
MeDrainGateReason::CoverageQuorum,
now_epoch_secs,
);
warn!( warn!(
previous_generation, previous_generation,
generation, generation,
@@ -411,7 +433,17 @@ impl MePool {
.collect(); .collect();
let (fresh_coverage_ratio, fresh_missing_dc) = let (fresh_coverage_ratio, fresh_missing_dc) =
Self::coverage_ratio(&desired_by_dc, &fresh_writer_addrs); Self::coverage_ratio(&desired_by_dc, &fresh_writer_addrs);
if !fresh_missing_dc.is_empty() { route_quorum_ok = fresh_coverage_ratio >= min_ratio;
redundancy_ok = fresh_missing_dc.is_empty();
redundancy_missing_dc = fresh_missing_dc.clone();
gate_coverage_ratio = fresh_coverage_ratio;
if fresh_coverage_ratio < min_ratio {
self.set_last_drain_gate(
false,
redundancy_ok,
MeDrainGateReason::CoverageQuorum,
now_epoch_secs,
);
warn!( warn!(
previous_generation, previous_generation,
generation, generation,
@@ -421,13 +453,16 @@ impl MePool {
); );
return; return;
} }
} else if !missing_dc.is_empty() { }
self.set_last_drain_gate(route_quorum_ok, redundancy_ok, MeDrainGateReason::Open, now_epoch_secs);
if !redundancy_ok {
warn!( warn!(
missing_dc = ?missing_dc, missing_dc = ?redundancy_missing_dc,
// Keep stale writers alive when fresh coverage is incomplete. coverage_ratio = format_args!("{gate_coverage_ratio:.3}"),
"ME reinit coverage incomplete; keeping stale writers" min_ratio = format_args!("{min_ratio:.3}"),
"ME reinit proceeds with weighted quorum while some DC groups remain uncovered"
); );
return;
} }
if hardswap { if hardswap {

View File

@@ -1,7 +1,7 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::time::Instant; use std::time::Instant;
use super::pool::{MePool, RefillDcKey}; use super::pool::{MeDrainGateReason, MePool, RefillDcKey};
use crate::network::IpFamily; use crate::network::IpFamily;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@@ -36,6 +36,24 @@ pub(crate) struct MeApiNatStunSnapshot {
pub stun_backoff_remaining_ms: Option<u64>, pub stun_backoff_remaining_ms: Option<u64>,
} }
#[derive(Clone, Debug)]
pub(crate) struct MeApiFamilyStateSnapshot {
pub family: &'static str,
pub state: &'static str,
pub state_since_epoch_secs: u64,
pub suppressed_until_epoch_secs: Option<u64>,
pub fail_streak: u32,
pub recover_success_streak: u32,
}
#[derive(Clone, Debug)]
pub(crate) struct MeApiDrainGateSnapshot {
pub route_quorum_ok: bool,
pub redundancy_ok: bool,
pub block_reason: &'static str,
pub updated_at_epoch_secs: u64,
}
impl MePool { impl MePool {
pub(crate) async fn api_refill_snapshot(&self) -> MeApiRefillSnapshot { pub(crate) async fn api_refill_snapshot(&self) -> MeApiRefillSnapshot {
let inflight_endpoints_total = self.refill_inflight.lock().await.len(); let inflight_endpoints_total = self.refill_inflight.lock().await.len();
@@ -125,4 +143,35 @@ impl MePool {
stun_backoff_remaining_ms, stun_backoff_remaining_ms,
} }
} }
pub(crate) fn api_family_state_snapshot(&self) -> Vec<MeApiFamilyStateSnapshot> {
[IpFamily::V4, IpFamily::V6]
.into_iter()
.map(|family| {
let state = self.family_runtime_state(family);
let suppressed_until = self.family_suppressed_until_epoch_secs(family);
MeApiFamilyStateSnapshot {
family: match family {
IpFamily::V4 => "v4",
IpFamily::V6 => "v6",
},
state: state.as_str(),
state_since_epoch_secs: self.family_runtime_state_since_epoch_secs(family),
suppressed_until_epoch_secs: (suppressed_until != 0).then_some(suppressed_until),
fail_streak: self.family_fail_streak(family),
recover_success_streak: self.family_recover_success_streak(family),
}
})
.collect()
}
pub(crate) fn api_drain_gate_snapshot(&self) -> MeApiDrainGateSnapshot {
let reason: MeDrainGateReason = self.last_drain_gate_block_reason();
MeApiDrainGateSnapshot {
route_quorum_ok: self.last_drain_gate_route_quorum_ok(),
redundancy_ok: self.last_drain_gate_redundancy_ok(),
block_reason: reason.as_str(),
updated_at_epoch_secs: self.last_drain_gate_updated_at_epoch_secs(),
}
}
} }

View File

@@ -126,6 +126,7 @@ pub(crate) struct MeApiRuntimeSnapshot {
pub me_reconnect_backoff_cap_ms: u64, pub me_reconnect_backoff_cap_ms: u64,
pub me_reconnect_fast_retry_count: u32, pub me_reconnect_fast_retry_count: u32,
pub me_pool_drain_ttl_secs: u64, pub me_pool_drain_ttl_secs: u64,
pub me_instadrain: bool,
pub me_pool_drain_soft_evict_enabled: bool, pub me_pool_drain_soft_evict_enabled: bool,
pub me_pool_drain_soft_evict_grace_secs: u64, pub me_pool_drain_soft_evict_grace_secs: u64,
pub me_pool_drain_soft_evict_per_writer: u8, pub me_pool_drain_soft_evict_per_writer: u8,
@@ -583,6 +584,7 @@ impl MePool {
me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64, me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64,
me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count, me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed), me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed),
me_instadrain: self.me_instadrain.load(Ordering::Relaxed),
me_pool_drain_soft_evict_enabled: self me_pool_drain_soft_evict_enabled: self
.me_pool_drain_soft_evict_enabled .me_pool_drain_soft_evict_enabled
.load(Ordering::Relaxed), .load(Ordering::Relaxed),

View File

@@ -8,6 +8,7 @@ use bytes::Bytes;
use bytes::BytesMut; use bytes::BytesMut;
use rand::Rng; use rand::Rng;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::{debug, info, warn}; use tracing::{debug, info, warn};
@@ -15,11 +16,13 @@ use crate::config::MeBindStaleMode;
use crate::crypto::SecureRandom; use crate::crypto::SecureRandom;
use crate::error::{ProxyError, Result}; use crate::error::{ProxyError, Result};
use crate::protocol::constants::{RPC_CLOSE_EXT_U32, RPC_PING_U32}; use crate::protocol::constants::{RPC_CLOSE_EXT_U32, RPC_PING_U32};
use crate::stats::{
MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason,
};
use super::codec::{RpcWriter, WriterCommand}; use super::codec::{RpcWriter, WriterCommand};
use super::pool::{MePool, MeWriter, WriterContour}; use super::pool::{MePool, MeWriter, WriterContour};
use super::reader::reader_loop; use super::reader::reader_loop;
use super::registry::BoundConn;
use super::wire::build_proxy_req_payload; use super::wire::build_proxy_req_payload;
const ME_ACTIVE_PING_SECS: u64 = 25; const ME_ACTIVE_PING_SECS: u64 = 25;
@@ -27,6 +30,12 @@ const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5; const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700; const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
#[derive(Clone, Copy)]
enum WriterRemoveGuardMode {
Any,
DrainingOnly,
}
fn is_me_peer_closed_error(error: &ProxyError) -> bool { fn is_me_peer_closed_error(error: &ProxyError) -> bool {
matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof) matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof)
} }
@@ -43,9 +52,16 @@ impl MePool {
for writer_id in closed_writer_ids { for writer_id in closed_writer_ids {
if self.registry.is_writer_empty(writer_id).await { if self.registry.is_writer_empty(writer_id).await {
let _ = self.remove_writer_only(writer_id).await; let _ = self
.remove_writer_only(writer_id, MeWriterTeardownReason::PruneClosedWriter)
.await;
} else { } else {
let _ = self.remove_writer_and_close_clients(writer_id).await; let _ = self
.remove_writer_and_close_clients(
writer_id,
MeWriterTeardownReason::PruneClosedWriter,
)
.await;
} }
} }
} }
@@ -142,6 +158,9 @@ impl MePool {
crc_mode: hs.crc_mode, crc_mode: hs.crc_mode,
}; };
let cancel_wr = cancel.clone(); let cancel_wr = cancel.clone();
let cleanup_done = Arc::new(AtomicBool::new(false));
let cleanup_for_writer = cleanup_done.clone();
let pool_writer_task = Arc::downgrade(self);
tokio::spawn(async move { tokio::spawn(async move {
loop { loop {
tokio::select! { tokio::select! {
@@ -159,6 +178,20 @@ impl MePool {
_ = cancel_wr.cancelled() => break, _ = cancel_wr.cancelled() => break,
} }
} }
if cleanup_for_writer
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
if let Some(pool) = pool_writer_task.upgrade() {
pool.remove_writer_and_close_clients(
writer_id,
MeWriterTeardownReason::WriterTaskExit,
)
.await;
} else {
cancel_wr.cancel();
}
}
}); });
let writer = MeWriter { let writer = MeWriter {
id: writer_id, id: writer_id,
@@ -195,7 +228,6 @@ impl MePool {
let cancel_ping = cancel.clone(); let cancel_ping = cancel.clone();
let tx_ping = tx.clone(); let tx_ping = tx.clone();
let ping_tracker_ping = ping_tracker.clone(); let ping_tracker_ping = ping_tracker.clone();
let cleanup_done = Arc::new(AtomicBool::new(false));
let cleanup_for_reader = cleanup_done.clone(); let cleanup_for_reader = cleanup_done.clone();
let cleanup_for_ping = cleanup_done.clone(); let cleanup_for_ping = cleanup_done.clone();
let keepalive_enabled = self.me_keepalive_enabled; let keepalive_enabled = self.me_keepalive_enabled;
@@ -241,21 +273,29 @@ impl MePool {
stats_reader_close.increment_me_idle_close_by_peer_total(); stats_reader_close.increment_me_idle_close_by_peer_total();
info!(writer_id, "ME socket closed by peer on idle writer"); info!(writer_id, "ME socket closed by peer on idle writer");
} }
if let Some(pool) = pool.upgrade() if cleanup_for_reader
&& cleanup_for_reader
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok() .is_ok()
{ {
pool.remove_writer_and_close_clients(writer_id).await; if let Some(pool) = pool.upgrade() {
pool.remove_writer_and_close_clients(
writer_id,
MeWriterTeardownReason::ReaderExit,
)
.await;
} else {
// Fallback for shutdown races: make writer task exit quickly so stale
// channels are observable by periodic prune.
cancel_reader_token.cancel();
}
} }
if let Err(e) = res { if let Err(e) = res {
if !idle_close_by_peer { if !idle_close_by_peer {
warn!(error = %e, "ME reader ended"); warn!(error = %e, "ME reader ended");
} }
} }
let mut ws = writers_arc.write().await; let remaining = writers_arc.read().await.len();
ws.retain(|w| w.id != writer_id); debug!(writer_id, remaining, "ME reader task finished");
info!(remaining = ws.len(), "Dead ME writer removed from pool");
}); });
let pool_ping = Arc::downgrade(self); let pool_ping = Arc::downgrade(self);
@@ -350,7 +390,11 @@ impl MePool {
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok() .is_ok()
{ {
pool.remove_writer_and_close_clients(writer_id).await; pool.remove_writer_and_close_clients(
writer_id,
MeWriterTeardownReason::PingSendFail,
)
.await;
} }
break; break;
} }
@@ -443,7 +487,11 @@ impl MePool {
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok() .is_ok()
{ {
pool.remove_writer_and_close_clients(writer_id).await; pool.remove_writer_and_close_clients(
writer_id,
MeWriterTeardownReason::SignalSendFail,
)
.await;
} }
break; break;
} }
@@ -477,7 +525,11 @@ impl MePool {
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok() .is_ok()
{ {
pool.remove_writer_and_close_clients(writer_id).await; pool.remove_writer_and_close_clients(
writer_id,
MeWriterTeardownReason::SignalSendFail,
)
.await;
} }
break; break;
} }
@@ -490,23 +542,83 @@ impl MePool {
Ok(()) Ok(())
} }
pub(crate) async fn remove_writer_and_close_clients(self: &Arc<Self>, writer_id: u64) { pub(crate) async fn remove_writer_and_close_clients(
let conns = self.remove_writer_only(writer_id).await; self: &Arc<Self>,
for bound in conns { writer_id: u64,
let _ = self.registry.route(bound.conn_id, super::MeResponse::Close).await; reason: MeWriterTeardownReason,
let _ = self.registry.unregister(bound.conn_id).await; ) -> bool {
} // Full client cleanup now happens inside `registry.writer_lost` to keep
// writer reap/remove paths strictly non-blocking per connection.
self.remove_writer_with_mode(
writer_id,
reason,
MeWriterTeardownMode::Normal,
WriterRemoveGuardMode::Any,
)
.await
} }
async fn remove_writer_only(self: &Arc<Self>, writer_id: u64) -> Vec<BoundConn> { pub(super) async fn remove_draining_writer_hard_detach(
self: &Arc<Self>,
writer_id: u64,
reason: MeWriterTeardownReason,
) -> bool {
self.remove_writer_with_mode(
writer_id,
reason,
MeWriterTeardownMode::HardDetach,
WriterRemoveGuardMode::DrainingOnly,
)
.await
}
async fn remove_writer_only(
self: &Arc<Self>,
writer_id: u64,
reason: MeWriterTeardownReason,
) -> bool {
self.remove_writer_with_mode(
writer_id,
reason,
MeWriterTeardownMode::Normal,
WriterRemoveGuardMode::Any,
)
.await
}
// Authoritative teardown primitive shared by normal cleanup and watchdog path.
// Lock-order invariant:
// 1) mutate `writers` under pool write lock,
// 2) release pool lock,
// 3) run registry/metrics/refill side effects.
// `registry.writer_lost` must never run while `writers` lock is held.
async fn remove_writer_with_mode(
self: &Arc<Self>,
writer_id: u64,
reason: MeWriterTeardownReason,
mode: MeWriterTeardownMode,
guard_mode: WriterRemoveGuardMode,
) -> bool {
let started_at = Instant::now();
self.stats
.increment_me_writer_teardown_attempt_total(reason, mode);
let mut close_tx: Option<mpsc::Sender<WriterCommand>> = None; let mut close_tx: Option<mpsc::Sender<WriterCommand>> = None;
let mut removed_addr: Option<SocketAddr> = None; let mut removed_addr: Option<SocketAddr> = None;
let mut removed_dc: Option<i32> = None; let mut removed_dc: Option<i32> = None;
let mut removed_uptime: Option<Duration> = None; let mut removed_uptime: Option<Duration> = None;
let mut trigger_refill = false; let mut trigger_refill = false;
let mut removed = false;
{ {
let mut ws = self.writers.write().await; let mut ws = self.writers.write().await;
if let Some(pos) = ws.iter().position(|w| w.id == writer_id) { if let Some(pos) = ws.iter().position(|w| w.id == writer_id) {
if matches!(guard_mode, WriterRemoveGuardMode::DrainingOnly)
&& !ws[pos].draining.load(Ordering::Relaxed)
{
self.stats.increment_me_writer_teardown_noop_total();
self.stats
.observe_me_writer_teardown_duration(mode, started_at.elapsed());
return false;
}
let w = ws.remove(pos); let w = ws.remove(pos);
let was_draining = w.draining.load(Ordering::Relaxed); let was_draining = w.draining.load(Ordering::Relaxed);
if was_draining { if was_draining {
@@ -523,27 +635,65 @@ impl MePool {
} }
close_tx = Some(w.tx.clone()); close_tx = Some(w.tx.clone());
self.conn_count.fetch_sub(1, Ordering::Relaxed); self.conn_count.fetch_sub(1, Ordering::Relaxed);
removed = true;
} }
} }
let conns = self.registry.writer_lost(writer_id).await; // State invariant:
// - writer is removed from `self.writers` (pool visibility),
// - writer is removed from registry routing/binding maps via `writer_lost`.
// The close command below is only a best-effort accelerator for task shutdown.
// Cleanup progress must never depend on command-channel availability.
let _ = self.registry.writer_lost(writer_id).await;
{ {
let mut tracker = self.ping_tracker.lock().await; let mut tracker = self.ping_tracker.lock().await;
tracker.retain(|_, (_, wid)| *wid != writer_id); tracker.retain(|_, (_, wid)| *wid != writer_id);
} }
self.rtt_stats.lock().await.remove(&writer_id); self.rtt_stats.lock().await.remove(&writer_id);
if let Some(tx) = close_tx { if let Some(tx) = close_tx {
let _ = tx.send(WriterCommand::Close).await; match tx.try_send(WriterCommand::Close) {
Ok(()) => {}
Err(TrySendError::Full(_)) => {
self.stats.increment_me_writer_close_signal_drop_total();
self.stats
.increment_me_writer_close_signal_channel_full_total();
self.stats.increment_me_writer_cleanup_side_effect_failures_total(
MeWriterCleanupSideEffectStep::CloseSignalChannelFull,
);
debug!(
writer_id,
"Skipping close signal for removed writer: command channel is full"
);
} }
if trigger_refill Err(TrySendError::Closed(_)) => {
&& let Some(addr) = removed_addr self.stats.increment_me_writer_close_signal_drop_total();
&& let Some(writer_dc) = removed_dc self.stats.increment_me_writer_cleanup_side_effect_failures_total(
{ MeWriterCleanupSideEffectStep::CloseSignalChannelClosed,
);
debug!(
writer_id,
"Skipping close signal for removed writer: command channel is closed"
);
}
}
}
if let Some(addr) = removed_addr {
if let Some(uptime) = removed_uptime { if let Some(uptime) = removed_uptime {
self.maybe_quarantine_flapping_endpoint(addr, uptime).await; self.maybe_quarantine_flapping_endpoint(addr, uptime).await;
} }
if trigger_refill
&& let Some(writer_dc) = removed_dc
{
self.trigger_immediate_refill_for_dc(addr, writer_dc); self.trigger_immediate_refill_for_dc(addr, writer_dc);
} }
conns }
if removed {
self.stats.increment_me_writer_teardown_success_total(mode);
} else {
self.stats.increment_me_writer_teardown_noop_total();
}
self.stats
.observe_me_writer_teardown_duration(mode, started_at.elapsed());
removed
} }
pub(crate) async fn mark_writer_draining_with_timeout( pub(crate) async fn mark_writer_draining_with_timeout(

View File

@@ -8,6 +8,7 @@ use bytes::{Bytes, BytesMut};
use tokio::io::AsyncReadExt; use tokio::io::AsyncReadExt;
use tokio::net::TcpStream; use tokio::net::TcpStream;
use tokio::sync::{Mutex, mpsc}; use tokio::sync::{Mutex, mpsc};
use tokio::sync::mpsc::error::TrySendError;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::{debug, trace, warn}; use tracing::{debug, trace, warn};
@@ -173,12 +174,12 @@ pub(crate) async fn reader_loop(
} else if pt == RPC_CLOSE_EXT_U32 && body.len() >= 8 { } else if pt == RPC_CLOSE_EXT_U32 && body.len() >= 8 {
let cid = u64::from_le_bytes(body[0..8].try_into().unwrap()); let cid = u64::from_le_bytes(body[0..8].try_into().unwrap());
debug!(cid, "RPC_CLOSE_EXT from ME"); debug!(cid, "RPC_CLOSE_EXT from ME");
reg.route(cid, MeResponse::Close).await; let _ = reg.route_nowait(cid, MeResponse::Close).await;
reg.unregister(cid).await; reg.unregister(cid).await;
} else if pt == RPC_CLOSE_CONN_U32 && body.len() >= 8 { } else if pt == RPC_CLOSE_CONN_U32 && body.len() >= 8 {
let cid = u64::from_le_bytes(body[0..8].try_into().unwrap()); let cid = u64::from_le_bytes(body[0..8].try_into().unwrap());
debug!(cid, "RPC_CLOSE_CONN from ME"); debug!(cid, "RPC_CLOSE_CONN from ME");
reg.route(cid, MeResponse::Close).await; let _ = reg.route_nowait(cid, MeResponse::Close).await;
reg.unregister(cid).await; reg.unregister(cid).await;
} else if pt == RPC_PING_U32 && body.len() >= 8 { } else if pt == RPC_PING_U32 && body.len() >= 8 {
let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap()); let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap());
@@ -186,14 +187,16 @@ pub(crate) async fn reader_loop(
let mut pong = Vec::with_capacity(12); let mut pong = Vec::with_capacity(12);
pong.extend_from_slice(&RPC_PONG_U32.to_le_bytes()); pong.extend_from_slice(&RPC_PONG_U32.to_le_bytes());
pong.extend_from_slice(&ping_id.to_le_bytes()); pong.extend_from_slice(&ping_id.to_le_bytes());
if tx match tx.try_send(WriterCommand::DataAndFlush(Bytes::from(pong))) {
.send(WriterCommand::DataAndFlush(Bytes::from(pong))) Ok(()) => {}
.await Err(TrySendError::Full(_)) => {
.is_err() debug!(ping_id, "PONG dropped: writer command channel is full");
{ }
warn!("PONG send failed"); Err(TrySendError::Closed(_)) => {
warn!("PONG send failed: writer channel closed");
break; break;
} }
}
} else if pt == RPC_PONG_U32 && body.len() >= 8 { } else if pt == RPC_PONG_U32 && body.len() >= 8 {
let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap()); let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap());
stats.increment_me_keepalive_pong(); stats.increment_me_keepalive_pong();
@@ -232,6 +235,13 @@ async fn send_close_conn(tx: &mpsc::Sender<WriterCommand>, conn_id: u64) {
let mut p = Vec::with_capacity(12); let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_CLOSE_CONN_U32.to_le_bytes()); p.extend_from_slice(&RPC_CLOSE_CONN_U32.to_le_bytes());
p.extend_from_slice(&conn_id.to_le_bytes()); p.extend_from_slice(&conn_id.to_le_bytes());
match tx.try_send(WriterCommand::DataAndFlush(Bytes::from(p))) {
let _ = tx.send(WriterCommand::DataAndFlush(Bytes::from(p))).await; Ok(()) => {}
Err(TrySendError::Full(_)) => {
debug!(conn_id, "ME close_conn signal skipped: writer command channel is full");
}
Err(TrySendError::Closed(_)) => {
debug!(conn_id, "ME close_conn signal skipped: writer command channel is closed");
}
}
} }

View File

@@ -169,6 +169,7 @@ impl ConnRegistry {
None None
} }
#[allow(dead_code)]
pub async fn route(&self, id: u64, resp: MeResponse) -> RouteResult { pub async fn route(&self, id: u64, resp: MeResponse) -> RouteResult {
let tx = { let tx = {
let inner = self.inner.read().await; let inner = self.inner.read().await;
@@ -445,6 +446,9 @@ impl ConnRegistry {
} }
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> { pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
let mut close_txs = Vec::<mpsc::Sender<MeResponse>>::new();
let mut out = Vec::new();
{
let mut inner = self.inner.write().await; let mut inner = self.inner.write().await;
inner.writers.remove(&writer_id); inner.writers.remove(&writer_id);
inner.last_meta_for_writer.remove(&writer_id); inner.last_meta_for_writer.remove(&writer_id);
@@ -456,19 +460,24 @@ impl ConnRegistry {
.into_iter() .into_iter()
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let mut out = Vec::new();
for conn_id in conns { for conn_id in conns {
if inner.writer_for_conn.get(&conn_id).copied() != Some(writer_id) { if inner.writer_for_conn.get(&conn_id).copied() != Some(writer_id) {
continue; continue;
} }
inner.writer_for_conn.remove(&conn_id); inner.writer_for_conn.remove(&conn_id);
if let Some(m) = inner.meta.get(&conn_id) { if let Some(client_tx) = inner.map.remove(&conn_id) {
out.push(BoundConn { close_txs.push(client_tx);
conn_id, }
meta: m.clone(), if let Some(meta) = inner.meta.remove(&conn_id) {
}); out.push(BoundConn { conn_id, meta });
} }
} }
}
for client_tx in close_txs {
let _ = client_tx.try_send(MeResponse::Close);
}
out out
} }
@@ -491,6 +500,7 @@ impl ConnRegistry {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::time::Duration;
use super::ConnMeta; use super::ConnMeta;
use super::ConnRegistry; use super::ConnRegistry;
@@ -663,6 +673,39 @@ mod tests {
assert!(registry.is_writer_empty(20).await); assert!(registry.is_writer_empty(20).await);
} }
#[tokio::test]
async fn writer_lost_removes_bound_conn_from_registry_and_signals_close() {
let registry = ConnRegistry::new();
let (conn_id, mut rx) = registry.register().await;
let (writer_tx, _writer_rx) = tokio::sync::mpsc::channel(8);
registry.register_writer(10, writer_tx).await;
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443);
assert!(
registry
.bind_writer(
conn_id,
10,
ConnMeta {
target_dc: 2,
client_addr: addr,
our_addr: addr,
proto_flags: 0,
},
)
.await
);
let lost = registry.writer_lost(10).await;
assert_eq!(lost.len(), 1);
assert_eq!(lost[0].conn_id, conn_id);
assert!(registry.get_writer(conn_id).await.is_none());
assert!(registry.get_meta(conn_id).await.is_none());
assert_eq!(registry.unregister(conn_id).await, None);
let close = tokio::time::timeout(Duration::from_millis(50), rx.recv()).await;
assert!(matches!(close, Ok(Some(MeResponse::Close))));
}
#[tokio::test] #[tokio::test]
async fn bind_writer_rejects_unregistered_writer() { async fn bind_writer_rejects_unregistered_writer() {
let registry = ConnRegistry::new(); let registry = ConnRegistry::new();

View File

@@ -14,6 +14,7 @@ use crate::config::{MeRouteNoWriterMode, MeWriterPickMode};
use crate::error::{ProxyError, Result}; use crate::error::{ProxyError, Result};
use crate::network::IpFamily; use crate::network::IpFamily;
use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32}; use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32};
use crate::stats::MeWriterTeardownReason;
use super::MePool; use super::MePool;
use super::codec::WriterCommand; use super::codec::WriterCommand;
@@ -134,7 +135,11 @@ impl MePool {
Ok(()) => return Ok(()), Ok(()) => return Ok(()),
Err(TimedSendError::Closed(_)) => { Err(TimedSendError::Closed(_)) => {
warn!(writer_id = current.writer_id, "ME writer channel closed"); warn!(writer_id = current.writer_id, "ME writer channel closed");
self.remove_writer_and_close_clients(current.writer_id).await; self.remove_writer_and_close_clients(
current.writer_id,
MeWriterTeardownReason::RouteChannelClosed,
)
.await;
continue; continue;
} }
Err(TimedSendError::Timeout(_)) => { Err(TimedSendError::Timeout(_)) => {
@@ -151,7 +156,11 @@ impl MePool {
} }
Err(TrySendError::Closed(_)) => { Err(TrySendError::Closed(_)) => {
warn!(writer_id = current.writer_id, "ME writer channel closed"); warn!(writer_id = current.writer_id, "ME writer channel closed");
self.remove_writer_and_close_clients(current.writer_id).await; self.remove_writer_and_close_clients(
current.writer_id,
MeWriterTeardownReason::RouteChannelClosed,
)
.await;
continue; continue;
} }
} }
@@ -458,7 +467,11 @@ impl MePool {
Err(TrySendError::Closed(_)) => { Err(TrySendError::Closed(_)) => {
self.stats.increment_me_writer_pick_closed_total(pick_mode); self.stats.increment_me_writer_pick_closed_total(pick_mode);
warn!(writer_id = w.id, "ME writer channel closed"); warn!(writer_id = w.id, "ME writer channel closed");
self.remove_writer_and_close_clients(w.id).await; self.remove_writer_and_close_clients(
w.id,
MeWriterTeardownReason::RouteChannelClosed,
)
.await;
continue; continue;
} }
} }
@@ -503,7 +516,11 @@ impl MePool {
Err(TimedSendError::Closed(_)) => { Err(TimedSendError::Closed(_)) => {
self.stats.increment_me_writer_pick_closed_total(pick_mode); self.stats.increment_me_writer_pick_closed_total(pick_mode);
warn!(writer_id = w.id, "ME writer channel closed (blocking)"); warn!(writer_id = w.id, "ME writer channel closed (blocking)");
self.remove_writer_and_close_clients(w.id).await; self.remove_writer_and_close_clients(
w.id,
MeWriterTeardownReason::RouteChannelClosed,
)
.await;
} }
Err(TimedSendError::Timeout(_)) => { Err(TimedSendError::Timeout(_)) => {
self.stats.increment_me_writer_pick_full_total(pick_mode); self.stats.increment_me_writer_pick_full_total(pick_mode);
@@ -643,13 +660,23 @@ impl MePool {
let mut p = Vec::with_capacity(12); let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes()); p.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
p.extend_from_slice(&conn_id.to_le_bytes()); p.extend_from_slice(&conn_id.to_le_bytes());
if w.tx match w.tx.try_send(WriterCommand::DataAndFlush(Bytes::from(p))) {
.send(WriterCommand::DataAndFlush(Bytes::from(p))) Ok(()) => {}
.await Err(TrySendError::Full(_)) => {
.is_err() debug!(
{ conn_id,
writer_id = w.writer_id,
"ME close skipped: writer command channel is full"
);
}
Err(TrySendError::Closed(_)) => {
debug!("ME close write failed"); debug!("ME close write failed");
self.remove_writer_and_close_clients(w.writer_id).await; self.remove_writer_and_close_clients(
w.writer_id,
MeWriterTeardownReason::CloseRpcChannelClosed,
)
.await;
}
} }
} else { } else {
debug!(conn_id, "ME close skipped (writer missing)"); debug!(conn_id, "ME close skipped (writer missing)");
@@ -666,8 +693,12 @@ impl MePool {
p.extend_from_slice(&conn_id.to_le_bytes()); p.extend_from_slice(&conn_id.to_le_bytes());
match w.tx.try_send(WriterCommand::DataAndFlush(Bytes::from(p))) { match w.tx.try_send(WriterCommand::DataAndFlush(Bytes::from(p))) {
Ok(()) => {} Ok(()) => {}
Err(TrySendError::Full(cmd)) => { Err(TrySendError::Full(_)) => {
let _ = tokio::time::timeout(Duration::from_millis(50), w.tx.send(cmd)).await; debug!(
conn_id,
writer_id = w.writer_id,
"ME close_conn skipped: writer command channel is full"
);
} }
Err(TrySendError::Closed(_)) => { Err(TrySendError::Closed(_)) => {
debug!(conn_id, "ME close_conn skipped: writer channel closed"); debug!(conn_id, "ME close_conn skipped: writer channel closed");