diff --git a/Cargo.lock b/Cargo.lock index a943322..20e7dc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,15 +69,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anyhow" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "arc-swap" @@ -147,7 +147,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -197,9 +197,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "blake3" @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "byte_string" @@ -268,9 +268,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.55" +version = "1.2.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", "shlex", @@ -320,9 +320,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -372,18 +372,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.58" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.58" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstyle", "clap_lex", @@ -391,9 +391,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "const-oid" @@ -622,7 +622,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -642,7 +642,7 @@ checksum = "0b0713d5c1d52e774c5cd7bb8b043d7c0fc4f921abfb678556140bfbe6ab2364" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -670,7 +670,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -750,9 +750,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -765,9 +765,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -775,15 +775,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -792,38 +792,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -833,7 +833,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -869,20 +868,20 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", "wasip2", "wasip3", ] @@ -1137,7 +1136,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.2", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -1304,7 +1303,7 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd5b3eaf1a28b758ac0faa5a4254e8ab2705605496f1b1f3fbbc3988ad73d199" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "inotify-sys", "libc", ] @@ -1342,9 +1341,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "ipnetwork" @@ -1387,15 +1386,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "js-sys" -version = "0.3.85" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", @@ -1435,26 +1434,27 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" -version = "0.2.181" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libredox" -version = "0.1.12" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "libc", - "redox_syscall 0.7.1", + "plain", + "redox_syscall 0.7.3", ] [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" @@ -1585,7 +1585,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "cfg_aliases 0.1.1", "libc", @@ -1608,7 +1608,7 @@ version = "6.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "crossbeam-channel", "filetime", "fsevent-sys", @@ -1627,7 +1627,7 @@ version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "fsevent-sys", "inotify 0.11.1", "kqueue", @@ -1645,7 +1645,7 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42b8cfee0e339a0337359f3c88165702ac6e600dc01c0cc9579a92d62b08477a" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] @@ -1702,9 +1702,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" dependencies = [ "critical-section", "portable-atomic", @@ -1768,14 +1768,14 @@ checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pin-utils" @@ -1793,6 +1793,12 @@ dependencies = [ "spki", ] +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "plotters" version = "0.3.7" @@ -1881,7 +1887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1901,7 +1907,7 @@ checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.10.0", + "bitflags 2.11.0", "num-traits", "rand", "rand_chacha", @@ -1931,7 +1937,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.6.2", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -1940,9 +1946,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", "getrandom 0.3.4", @@ -1968,16 +1974,16 @@ dependencies = [ "cfg_aliases 0.2.1", "libc", "once_cell", - "socket2 0.6.2", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -1988,6 +1994,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.9.2" @@ -2061,16 +2073,16 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] name = "redox_syscall" -version = "0.7.1" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] @@ -2098,9 +2110,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" @@ -2199,11 +2211,11 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys", @@ -2212,9 +2224,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.36" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ "once_cell", "ring", @@ -2292,7 +2304,7 @@ checksum = "22f968c5ea23d555e670b449c1c5e7b2fc399fdaec1d304a17cd48e288abc107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2338,7 +2350,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2426,7 +2438,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "shadowsocks-crypto", - "socket2 0.6.2", + "socket2 0.6.3", "spin", "thiserror 2.0.18", "tokio", @@ -2510,12 +2522,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2562,9 +2574,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -2600,7 +2612,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2611,10 +2623,11 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "telemt" -version = "3.3.20" +version = "3.3.27" dependencies = [ "aes", "anyhow", + "arc-swap", "base64", "bytes", "cbc", @@ -2668,12 +2681,12 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.25.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.4.1", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -2705,7 +2718,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2716,7 +2729,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2781,9 +2794,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -2796,9 +2809,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.49.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -2806,7 +2819,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.2", + "socket2 0.6.3", "tokio-macros", "tracing", "windows-sys 0.61.2", @@ -2814,13 +2827,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2867,7 +2880,7 @@ dependencies = [ "log", "once_cell", "pin-project", - "socket2 0.6.2", + "socket2 0.6.3", "tokio", "windows-sys 0.60.2", ] @@ -2951,7 +2964,7 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "bytes", "futures-util", "http", @@ -2994,7 +3007,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3020,9 +3033,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "matchers", "nu-ansi-term", @@ -3044,7 +3057,7 @@ checksum = "70977707304198400eb4835a78f6a9f928bf41bba420deb8fdb175cd965d77a7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3067,9 +3080,9 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-ident" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-xid" @@ -3117,7 +3130,7 @@ version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ - "getrandom 0.4.1", + "getrandom 0.4.2", "js-sys", "wasm-bindgen", ] @@ -3188,9 +3201,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.108" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -3201,9 +3214,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.58" +version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ "cfg-if", "futures-util", @@ -3215,9 +3228,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.108" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3225,22 +3238,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.108" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.108" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] @@ -3273,7 +3286,7 @@ version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "hashbrown 0.15.5", "indexmap", "semver", @@ -3281,9 +3294,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.85" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", @@ -3353,7 +3366,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3364,7 +3377,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3615,9 +3628,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.14" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" dependencies = [ "memchr", ] @@ -3662,7 +3675,7 @@ dependencies = [ "heck", "indexmap", "prettyplease", - "syn 2.0.114", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -3678,7 +3691,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -3690,7 +3703,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags 2.10.0", + "bitflags 2.11.0", "indexmap", "log", "serde", @@ -3762,28 +3775,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "synstructure 0.13.2", ] [[package]] name = "zerocopy" -version = "0.8.39" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" +checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.39" +version = "0.8.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" +checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3803,7 +3816,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "synstructure 0.13.2", ] @@ -3824,7 +3837,7 @@ checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3857,11 +3870,11 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "zmij" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index bdad8eb..ecaa156 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "telemt" -version = "3.3.24" +version = "3.3.27" edition = "2024" [dependencies] @@ -41,6 +41,7 @@ tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } parking_lot = "0.12" dashmap = "5.5" +arc-swap = "1.7" lru = "0.16" rand = "0.9" chrono = { version = "0.4", features = ["serde"] } diff --git a/Dockerfile b/Dockerfile index 7abe548..15a4900 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,6 +38,7 @@ USER telemt EXPOSE 443 EXPOSE 9090 +EXPOSE 9091 ENTRYPOINT ["/app/telemt"] CMD ["config.toml"] diff --git a/README.md b/README.md index 2102a3a..5980885 100644 --- a/README.md +++ b/README.md @@ -19,9 +19,9 @@ ### πŸ‡·πŸ‡Ί RU -#### Π Π΅Π»ΠΈΠ· 3.3.15 Semistable +#### О Ρ€Π΅Π»ΠΈΠ·Π°Ρ… -[3.3.15](https://github.com/telemt/telemt/releases/tag/3.3.15) ΠΏΠΎ ΠΈΡ‚ΠΎΠ³Π°ΠΌ Ρ€Π°Π±ΠΎΡ‚Ρ‹ Π² ΠΏΡ€ΠΎΠ΄Π°ΠΊΡˆΠ½ ΠΏΡ€ΠΈΠ·Π½Π°Π½ ΠΎΠ΄Π½ΠΈΠΌ ΠΈΠ· самых ΡΡ‚Π°Π±ΠΈΠ»ΡŒΠ½Ρ‹Ρ… ΠΈ рСкомСндуСтся ΠΊ использованию, ΠΊΠΎΠ³Π΄Π° cutting-edge Ρ„ΠΈΡ‡ΠΈ Π½Π΅ΠΊΡ€ΠΈΡ‚ΠΈΡ‡Π½Ρ‹! +[3.3.27](https://github.com/telemt/telemt/releases/tag/3.3.27) Π΄Π°Ρ‘Ρ‚ баланс ΡΡ‚Π°Π±ΠΈΠ»ΡŒΠ½ΠΎΡΡ‚ΠΈ ΠΈ ΠΏΠ΅Ρ€Π΅Π΄ΠΎΠ²ΠΎΠ³ΠΎ Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΎΠ½Π°Π»Π°, Π° Ρ‚Π°ΠΊ ΠΆΠ΅ послСдниС исправлСния ΠΏΠΎ бСзопасности ΠΈ Π±Π°Π³Π°ΠΌ Π‘ΡƒΠ΄Π΅ΠΌ Ρ€Π°Π΄Ρ‹ Π²Π°ΡˆΠ΅ΠΌΡƒ Ρ„ΠΈΠ΄Π±Π΅ΠΊΡƒ ΠΈ прСдлоТСниям ΠΏΠΎ ΡƒΠ»ΡƒΡ‡ΡˆΠ΅Π½ΠΈΡŽ β€” особСнно Π² части **API**, **статистики**, **UX** @@ -40,9 +40,9 @@ ### πŸ‡¬πŸ‡§ EN -#### Release 3.3.15 Semistable +#### About releases -[3.3.15](https://github.com/telemt/telemt/releases/tag/3.3.15) is, based on the results of his work in production, recognized as one of the most stable and recommended for use when cutting-edge features are not so necessary! +[3.3.27](https://github.com/telemt/telemt/releases/tag/3.3.27) provides a balance of stability and advanced functionality, as well as the latest security and bug fixes We are looking forward to your feedback and improvement proposals β€” especially regarding **API**, **statistics**, **UX** diff --git a/docker-compose.yml b/docker-compose.yml index 5866c4d..815ba24 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,6 +7,7 @@ services: ports: - "443:443" - "127.0.0.1:9090:9090" + - "127.0.0.1:9091:9091" # Allow caching 'proxy-secret' in read-only container working_dir: /run/telemt volumes: diff --git a/docs/QUICK_START_GUIDE.en.md b/docs/QUICK_START_GUIDE.en.md index c43a2dc..ffb387f 100644 --- a/docs/QUICK_START_GUIDE.en.md +++ b/docs/QUICK_START_GUIDE.en.md @@ -181,6 +181,8 @@ docker compose down docker build -t telemt:local . docker run --name telemt --restart unless-stopped \ -p 443:443 \ + -p 9090:9090 \ + -p 9091:9091 \ -e RUST_LOG=info \ -v "$PWD/config.toml:/app/config.toml:ro" \ --read-only \ diff --git a/docs/QUICK_START_GUIDE.ru.md b/docs/QUICK_START_GUIDE.ru.md index 35fbd27..e4c5005 100644 --- a/docs/QUICK_START_GUIDE.ru.md +++ b/docs/QUICK_START_GUIDE.ru.md @@ -183,6 +183,8 @@ docker compose down docker build -t telemt:local . docker run --name telemt --restart unless-stopped \ -p 443:443 \ + -p 9090:9090 \ + -p 9091:9091 \ -e RUST_LOG=info \ -v "$PWD/config.toml:/app/config.toml:ro" \ --read-only \ diff --git a/install.sh b/install.sh index 330bc3e..90c28f4 100644 --- a/install.sh +++ b/install.sh @@ -1,154 +1,190 @@ #!/bin/sh set -eu -# --- Global Configurations --- REPO="${REPO:-telemt/telemt}" BIN_NAME="${BIN_NAME:-telemt}" INSTALL_DIR="${INSTALL_DIR:-/bin}" CONFIG_DIR="${CONFIG_DIR:-/etc/telemt}" CONFIG_FILE="${CONFIG_FILE:-${CONFIG_DIR}/telemt.toml}" WORK_DIR="${WORK_DIR:-/opt/telemt}" +TLS_DOMAIN="${TLS_DOMAIN:-petrovich.ru}" SERVICE_NAME="telemt" TEMP_DIR="" SUDO="" +CONFIG_PARENT_DIR="" +SERVICE_START_FAILED=0 -# --- Argument Parsing --- ACTION="install" TARGET_VERSION="${VERSION:-latest}" while [ $# -gt 0 ]; do case "$1" in - -h|--help) - ACTION="help" - shift - ;; + -h|--help) ACTION="help"; shift ;; uninstall|--uninstall) - [ "$ACTION" != "purge" ] && ACTION="uninstall" - shift - ;; - --purge) - ACTION="purge" - shift - ;; - install|--install) - ACTION="install" - shift - ;; - -*) - printf '[ERROR] Unknown option: %s\n' "$1" >&2 - exit 1 - ;; + if [ "$ACTION" != "purge" ]; then ACTION="uninstall"; fi + shift ;; + purge|--purge) ACTION="purge"; shift ;; + install|--install) ACTION="install"; shift ;; + -*) printf '[ERROR] Unknown option: %s\n' "$1" >&2; exit 1 ;; *) - if [ "$ACTION" = "install" ]; then - TARGET_VERSION="$1" - fi - shift - ;; + if [ "$ACTION" = "install" ]; then TARGET_VERSION="$1" + else printf '[WARNING] Ignoring extra argument: %s\n' "$1" >&2; fi + shift ;; esac done -# --- Core Functions --- -say() { printf '[INFO] %s\n' "$*"; } +say() { + if [ "$#" -eq 0 ] || [ -z "${1:-}" ]; then + printf '\n' + else + printf '[INFO] %s\n' "$*" + fi +} die() { printf '[ERROR] %s\n' "$*" >&2; exit 1; } +write_root() { $SUDO sh -c 'cat > "$1"' _ "$1"; } + cleanup() { if [ -n "${TEMP_DIR:-}" ] && [ -d "$TEMP_DIR" ]; then rm -rf -- "$TEMP_DIR" fi } - trap cleanup EXIT INT TERM show_help() { - say "Usage: $0 [version | install | uninstall | --purge | --help]" - say " version Install specific version (e.g. 1.0.0, default: latest)" - say " uninstall Remove the binary and service (keeps config)" - say " --purge Remove everything including configuration" + say "Usage: $0 [ | install | uninstall | purge | --help ]" + say " Install specific version (e.g. 3.3.15, default: latest)" + say " install Install the latest version" + say " uninstall Remove the binary and service (keeps config and user)" + say " purge Remove everything including configuration, data, and user" exit 0 } -user_exists() { - if command -v getent >/dev/null 2>&1; then - getent passwd "$1" >/dev/null 2>&1 +check_os_entity() { + if command -v getent >/dev/null 2>&1; then getent "$1" "$2" >/dev/null 2>&1 + else grep -q "^${2}:" "/etc/$1" 2>/dev/null; fi +} + +normalize_path() { + printf '%s\n' "$1" | tr -s '/' | sed 's|/$||; s|^$|/|' +} + +get_realpath() { + path_in="$1" + case "$path_in" in /*) ;; *) path_in="$(pwd)/$path_in" ;; esac + + if command -v realpath >/dev/null 2>&1; then + if realpath_out="$(realpath -m "$path_in" 2>/dev/null)"; then + printf '%s\n' "$realpath_out" + return + fi + fi + + if command -v readlink >/dev/null 2>&1; then + resolved_path="$(readlink -f "$path_in" 2>/dev/null || true)" + if [ -n "$resolved_path" ]; then + printf '%s\n' "$resolved_path" + return + fi + fi + + d="${path_in%/*}"; b="${path_in##*/}" + if [ -z "$d" ]; then d="/"; fi + if [ "$d" = "$path_in" ]; then d="/"; b="$path_in"; fi + + if [ -d "$d" ]; then + abs_d="$(cd "$d" >/dev/null 2>&1 && pwd || true)" + if [ -n "$abs_d" ]; then + if [ "$b" = "." ] || [ -z "$b" ]; then printf '%s\n' "$abs_d" + elif [ "$abs_d" = "/" ]; then printf '/%s\n' "$b" + else printf '%s/%s\n' "$abs_d" "$b"; fi + else + normalize_path "$path_in" + fi else - grep -q "^${1}:" /etc/passwd 2>/dev/null + normalize_path "$path_in" fi } -group_exists() { - if command -v getent >/dev/null 2>&1; then - getent group "$1" >/dev/null 2>&1 - else - grep -q "^${1}:" /etc/group 2>/dev/null - fi +get_svc_mgr() { + if command -v systemctl >/dev/null 2>&1 && [ -d /run/systemd/system ]; then echo "systemd" + elif command -v rc-service >/dev/null 2>&1; then echo "openrc" + else echo "none"; fi } verify_common() { - [ -z "$BIN_NAME" ] && die "BIN_NAME cannot be empty." - [ -z "$INSTALL_DIR" ] && die "INSTALL_DIR cannot be empty." - [ -z "$CONFIG_DIR" ] && die "CONFIG_DIR cannot be empty." + [ -n "$BIN_NAME" ] || die "BIN_NAME cannot be empty." + [ -n "$INSTALL_DIR" ] || die "INSTALL_DIR cannot be empty." + [ -n "$CONFIG_DIR" ] || die "CONFIG_DIR cannot be empty." + [ -n "$CONFIG_FILE" ] || die "CONFIG_FILE cannot be empty." + + case "${INSTALL_DIR}${CONFIG_DIR}${WORK_DIR}${CONFIG_FILE}" in + *[!a-zA-Z0-9_./-]*) die "Invalid characters in paths. Only alphanumeric, _, ., -, and / allowed." ;; + esac + + case "$TARGET_VERSION" in *[!a-zA-Z0-9_.-]*) die "Invalid characters in version." ;; esac + case "$BIN_NAME" in *[!a-zA-Z0-9_-]*) die "Invalid characters in BIN_NAME." ;; esac + + INSTALL_DIR="$(get_realpath "$INSTALL_DIR")" + CONFIG_DIR="$(get_realpath "$CONFIG_DIR")" + WORK_DIR="$(get_realpath "$WORK_DIR")" + CONFIG_FILE="$(get_realpath "$CONFIG_FILE")" + + CONFIG_PARENT_DIR="${CONFIG_FILE%/*}" + if [ -z "$CONFIG_PARENT_DIR" ]; then CONFIG_PARENT_DIR="/"; fi + if [ "$CONFIG_PARENT_DIR" = "$CONFIG_FILE" ]; then CONFIG_PARENT_DIR="."; fi if [ "$(id -u)" -eq 0 ]; then SUDO="" else - if ! command -v sudo >/dev/null 2>&1; then - die "This script requires root or sudo. Neither found." - fi + command -v sudo >/dev/null 2>&1 || die "This script requires root or sudo. Neither found." SUDO="sudo" - say "sudo is available. Caching credentials..." - if ! sudo -v; then - die "Failed to cache sudo credentials" + if ! sudo -n true 2>/dev/null; then + if ! [ -t 0 ]; then + die "sudo requires a password, but no TTY detected. Aborting to prevent hang." + fi fi fi - case "${INSTALL_DIR}${CONFIG_DIR}${WORK_DIR}" in - *[!a-zA-Z0-9_./-]*) - die "Invalid characters in path variables. Only alphanumeric, _, ., -, and / are allowed." - ;; - esac - - case "$BIN_NAME" in - *[!a-zA-Z0-9_-]*) die "Invalid characters in BIN_NAME: $BIN_NAME" ;; - esac - - for path in "$CONFIG_DIR" "$WORK_DIR"; do - check_path="$path" - - while [ "$check_path" != "/" ] && [ "${check_path%"/"}" != "$check_path" ]; do - check_path="${check_path%"/"}" - done - [ -z "$check_path" ] && check_path="/" + if [ -n "$SUDO" ]; then + if $SUDO sh -c '[ -d "$1" ]' _ "$CONFIG_FILE"; then + die "Safety check failed: CONFIG_FILE '$CONFIG_FILE' is a directory." + fi + elif [ -d "$CONFIG_FILE" ]; then + die "Safety check failed: CONFIG_FILE '$CONFIG_FILE' is a directory." + fi + for path in "$CONFIG_DIR" "$CONFIG_PARENT_DIR" "$WORK_DIR"; do + check_path="$(get_realpath "$path")" case "$check_path" in - /|/bin|/sbin|/usr|/usr/bin|/usr/local|/etc|/opt|/var|/home|/root|/tmp) - die "Safety check failed: '$path' is a critical system directory." - ;; + /|/bin|/sbin|/usr|/usr/bin|/usr/sbin|/usr/local|/usr/local/bin|/usr/local/sbin|/usr/local/etc|/usr/local/share|/etc|/var|/var/lib|/var/log|/var/run|/home|/root|/tmp|/lib|/lib64|/opt|/run|/boot|/dev|/sys|/proc) + die "Safety check failed: '$path' (resolved to '$check_path') is a critical system directory." ;; esac done - for cmd in uname grep find rm chown chmod mv head mktemp; do + check_install_dir="$(get_realpath "$INSTALL_DIR")" + case "$check_install_dir" in + /|/etc|/var|/home|/root|/tmp|/usr|/usr/local|/opt|/boot|/dev|/sys|/proc|/run) + die "Safety check failed: INSTALL_DIR '$INSTALL_DIR' is a critical system directory." ;; + esac + + for cmd in id uname grep find rm chown chmod mv mktemp mkdir tr dd sed ps head sleep cat tar gzip rmdir; do command -v "$cmd" >/dev/null 2>&1 || die "Required command not found: $cmd" done } verify_install_deps() { - if ! command -v curl >/dev/null 2>&1 && ! command -v wget >/dev/null 2>&1; then - die "Neither curl nor wget is installed." - fi - command -v tar >/dev/null 2>&1 || die "Required command not found: tar" - command -v gzip >/dev/null 2>&1 || die "Required command not found: gzip" + command -v curl >/dev/null 2>&1 || command -v wget >/dev/null 2>&1 || die "Neither curl nor wget is installed." command -v cp >/dev/null 2>&1 || command -v install >/dev/null 2>&1 || die "Need cp or install" if ! command -v setcap >/dev/null 2>&1; then - say "setcap is missing. Installing required capability tools..." if command -v apk >/dev/null 2>&1; then - $SUDO apk add --no-cache libcap || die "Failed to install libcap" + $SUDO apk add --no-cache libcap-utils >/dev/null 2>&1 || $SUDO apk add --no-cache libcap >/dev/null 2>&1 || true elif command -v apt-get >/dev/null 2>&1; then - $SUDO apt-get update -qq && $SUDO apt-get install -y -qq libcap2-bin || die "Failed to install libcap2-bin" - elif command -v dnf >/dev/null 2>&1 || command -v yum >/dev/null 2>&1; then - $SUDO ${YUM_CMD:-yum} install -y -q libcap || die "Failed to install libcap" - else - die "Cannot install 'setcap'. Package manager not found. Please install libcap manually." + $SUDO apt-get update -q >/dev/null 2>&1 || true + $SUDO apt-get install -y -q libcap2-bin >/dev/null 2>&1 || true + elif command -v dnf >/dev/null 2>&1; then $SUDO dnf install -y -q libcap >/dev/null 2>&1 || true + elif command -v yum >/dev/null 2>&1; then $SUDO yum install -y -q libcap >/dev/null 2>&1 || true fi fi } @@ -163,122 +199,96 @@ detect_arch() { } detect_libc() { - if command -v ldd >/dev/null 2>&1 && ldd --version 2>&1 | grep -qi musl; then - echo "musl"; return 0 - fi - - if grep -q '^ID=alpine' /etc/os-release 2>/dev/null || grep -q '^ID="alpine"' /etc/os-release 2>/dev/null; then - echo "musl"; return 0 - fi for f in /lib/ld-musl-*.so.* /lib64/ld-musl-*.so.*; do - if [ -e "$f" ]; then - echo "musl"; return 0 - fi + if [ -e "$f" ]; then echo "musl"; return 0; fi done + if grep -qE '^ID="?alpine"?' /etc/os-release 2>/dev/null; then echo "musl"; return 0; fi + if command -v ldd >/dev/null 2>&1 && (ldd --version 2>&1 || true) | grep -qi musl; then echo "musl"; return 0; fi echo "gnu" } fetch_file() { - fetch_url="$1" - fetch_out="$2" - - if command -v curl >/dev/null 2>&1; then - curl -fsSL "$fetch_url" -o "$fetch_out" || return 1 - elif command -v wget >/dev/null 2>&1; then - wget -qO "$fetch_out" "$fetch_url" || return 1 - else - die "curl or wget required" - fi + if command -v curl >/dev/null 2>&1; then curl -fsSL "$1" -o "$2" + else wget -q -O "$2" "$1"; fi } ensure_user_group() { - nologin_bin="/bin/false" + nologin_bin="$(command -v nologin 2>/dev/null || command -v false 2>/dev/null || echo /bin/false)" - cmd_nologin="$(command -v nologin 2>/dev/null || true)" - if [ -n "$cmd_nologin" ] && [ -x "$cmd_nologin" ]; then - nologin_bin="$cmd_nologin" - else - for bin in /sbin/nologin /usr/sbin/nologin; do - if [ -x "$bin" ]; then - nologin_bin="$bin" - break - fi - done + if ! check_os_entity group telemt; then + if command -v groupadd >/dev/null 2>&1; then $SUDO groupadd -r telemt + elif command -v addgroup >/dev/null 2>&1; then $SUDO addgroup -S telemt + else die "Cannot create group"; fi fi - if ! group_exists telemt; then - if command -v groupadd >/dev/null 2>&1; then - $SUDO groupadd -r telemt || die "Failed to create group via groupadd" - elif command -v addgroup >/dev/null 2>&1; then - $SUDO addgroup -S telemt || die "Failed to create group via addgroup" - else - die "Cannot create group: neither groupadd nor addgroup found" - fi - fi - - if ! user_exists telemt; then + if ! check_os_entity passwd telemt; then if command -v useradd >/dev/null 2>&1; then - $SUDO useradd -r -g telemt -d "$WORK_DIR" -s "$nologin_bin" -c "Telemt Proxy" telemt || die "Failed to create user via useradd" + $SUDO useradd -r -g telemt -d "$WORK_DIR" -s "$nologin_bin" -c "Telemt Proxy" telemt elif command -v adduser >/dev/null 2>&1; then - $SUDO adduser -S -D -H -h "$WORK_DIR" -s "$nologin_bin" -G telemt telemt || die "Failed to create user via adduser" - else - die "Cannot create user: neither useradd nor adduser found" - fi + if adduser --help 2>&1 | grep -q -- '-S'; then + $SUDO adduser -S -D -H -h "$WORK_DIR" -s "$nologin_bin" -G telemt telemt + else + $SUDO adduser --system --home "$WORK_DIR" --shell "$nologin_bin" --no-create-home --ingroup telemt --disabled-password telemt + fi + else die "Cannot create user"; fi fi } setup_dirs() { - say "Setting up directories..." - $SUDO mkdir -p "$WORK_DIR" "$CONFIG_DIR" || die "Failed to create directories" - $SUDO chown telemt:telemt "$WORK_DIR" || die "Failed to set owner on WORK_DIR" - $SUDO chmod 750 "$WORK_DIR" || die "Failed to set permissions on WORK_DIR" + $SUDO mkdir -p "$WORK_DIR" "$CONFIG_DIR" "$CONFIG_PARENT_DIR" || die "Failed to create directories" + + $SUDO chown telemt:telemt "$WORK_DIR" && $SUDO chmod 750 "$WORK_DIR" + $SUDO chown root:telemt "$CONFIG_DIR" && $SUDO chmod 750 "$CONFIG_DIR" + + if [ "$CONFIG_PARENT_DIR" != "$CONFIG_DIR" ] && [ "$CONFIG_PARENT_DIR" != "." ] && [ "$CONFIG_PARENT_DIR" != "/" ]; then + $SUDO chown root:telemt "$CONFIG_PARENT_DIR" && $SUDO chmod 750 "$CONFIG_PARENT_DIR" + fi } stop_service() { - say "Stopping service if running..." - if command -v systemctl >/dev/null 2>&1 && [ -d /run/systemd/system ]; then + svc="$(get_svc_mgr)" + if [ "$svc" = "systemd" ] && systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then $SUDO systemctl stop "$SERVICE_NAME" 2>/dev/null || true - elif command -v rc-service >/dev/null 2>&1; then + elif [ "$svc" = "openrc" ] && rc-service "$SERVICE_NAME" status >/dev/null 2>&1; then $SUDO rc-service "$SERVICE_NAME" stop 2>/dev/null || true fi } install_binary() { - bin_src="$1" - bin_dst="$2" + bin_src="$1"; bin_dst="$2" + if [ -e "$INSTALL_DIR" ] && [ ! -d "$INSTALL_DIR" ]; then + die "'$INSTALL_DIR' is not a directory." + fi $SUDO mkdir -p "$INSTALL_DIR" || die "Failed to create install directory" if command -v install >/dev/null 2>&1; then $SUDO install -m 0755 "$bin_src" "$bin_dst" || die "Failed to install binary" else - $SUDO rm -f "$bin_dst" - $SUDO cp "$bin_src" "$bin_dst" || die "Failed to copy binary" - $SUDO chmod 0755 "$bin_dst" || die "Failed to set permissions" + $SUDO rm -f "$bin_dst" 2>/dev/null || true + $SUDO cp "$bin_src" "$bin_dst" && $SUDO chmod 0755 "$bin_dst" || die "Failed to copy binary" fi - if [ ! -x "$bin_dst" ]; then - die "Failed to install binary or it is not executable: $bin_dst" - fi + $SUDO sh -c '[ -x "$1" ]' _ "$bin_dst" || die "Binary not executable: $bin_dst" - say "Granting network bind capabilities to bind port 443..." - if ! $SUDO setcap cap_net_bind_service=+ep "$bin_dst" 2>/dev/null; then - say "[WARNING] Failed to apply setcap. The service will NOT be able to open port 443!" - say "[WARNING] This usually happens inside unprivileged Docker/LXC containers." + if command -v setcap >/dev/null 2>&1; then + $SUDO setcap cap_net_bind_service=+ep "$bin_dst" 2>/dev/null || true fi } generate_secret() { - if command -v openssl >/dev/null 2>&1; then - secret="$(openssl rand -hex 16 2>/dev/null)" && [ -n "$secret" ] && { echo "$secret"; return 0; } + secret="$(command -v openssl >/dev/null 2>&1 && openssl rand -hex 16 2>/dev/null || true)" + if [ -z "$secret" ] || [ "${#secret}" -ne 32 ]; then + if command -v od >/dev/null 2>&1; then secret="$(dd if=/dev/urandom bs=16 count=1 2>/dev/null | od -An -tx1 | tr -d ' \n')" + elif command -v hexdump >/dev/null 2>&1; then secret="$(dd if=/dev/urandom bs=16 count=1 2>/dev/null | hexdump -e '1/1 "%02x"')" + elif command -v xxd >/dev/null 2>&1; then secret="$(dd if=/dev/urandom bs=16 count=1 2>/dev/null | xxd -p | tr -d '\n')" + fi fi - if command -v xxd >/dev/null 2>&1; then - secret="$(dd if=/dev/urandom bs=1 count=16 2>/dev/null | xxd -p | tr -d '\n')" && [ -n "$secret" ] && { echo "$secret"; return 0; } - fi - secret="$(dd if=/dev/urandom bs=1 count=16 2>/dev/null | od -An -tx1 | tr -d ' \n')" && [ -n "$secret" ] && { echo "$secret"; return 0; } - return 1 + if [ "${#secret}" -eq 32 ]; then echo "$secret"; else return 1; fi } generate_config_content() { + escaped_tls_domain="$(printf '%s\n' "$TLS_DOMAIN" | tr -d '[:cntrl:]' | sed 's/\\/\\\\/g; s/"/\\"/g')" + cat </dev/null && config_exists=1 || true - else - [ -f "$CONFIG_FILE" ] && config_exists=1 || true - fi - - if [ "$config_exists" -eq 1 ]; then - say "Config already exists, skipping generation." + if $SUDO sh -c '[ -f "$1" ]' _ "$CONFIG_FILE"; then + say " -> Config already exists at $CONFIG_FILE. Skipping creation." + return 0 + fi + elif [ -f "$CONFIG_FILE" ]; then + say " -> Config already exists at $CONFIG_FILE. Skipping creation." return 0 fi - toml_secret="$(generate_secret)" || die "Failed to generate secret" - say "Creating config at $CONFIG_FILE..." + toml_secret="$(generate_secret)" || die "Failed to generate secret." - tmp_conf="$(mktemp "${TEMP_DIR:-/tmp}/telemt_conf.XXXXXX")" || die "Failed to create temp config" - generate_config_content "$toml_secret" > "$tmp_conf" || die "Failed to write temp config" + generate_config_content "$toml_secret" | write_root "$CONFIG_FILE" || die "Failed to install config" + $SUDO chown root:telemt "$CONFIG_FILE" && $SUDO chmod 640 "$CONFIG_FILE" - $SUDO mv "$tmp_conf" "$CONFIG_FILE" || die "Failed to install config file" - $SUDO chown root:telemt "$CONFIG_FILE" || die "Failed to set owner" - $SUDO chmod 640 "$CONFIG_FILE" || die "Failed to set config permissions" - - say "Secret for user 'hello': $toml_secret" + say " -> Config created successfully." + say " -> Generated secret for default user 'hello': $toml_secret" } generate_systemd_content() { cat </dev/null 2>&1 && [ -d /run/systemd/system ]; then - say "Installing systemd service..." - tmp_svc="$(mktemp "${TEMP_DIR:-/tmp}/${SERVICE_NAME}.service.XXXXXX")" || die "Failed to create temp service" - generate_systemd_content > "$tmp_svc" || die "Failed to generate service content" + svc="$(get_svc_mgr)" + if [ "$svc" = "systemd" ]; then + generate_systemd_content | write_root "/etc/systemd/system/${SERVICE_NAME}.service" + $SUDO chown root:root "/etc/systemd/system/${SERVICE_NAME}.service" && $SUDO chmod 644 "/etc/systemd/system/${SERVICE_NAME}.service" - $SUDO mv "$tmp_svc" "/etc/systemd/system/${SERVICE_NAME}.service" || die "Failed to move service file" - $SUDO chown root:root "/etc/systemd/system/${SERVICE_NAME}.service" - $SUDO chmod 644 "/etc/systemd/system/${SERVICE_NAME}.service" + $SUDO systemctl daemon-reload || true + $SUDO systemctl enable "$SERVICE_NAME" || true + + if ! $SUDO systemctl start "$SERVICE_NAME"; then + say "[WARNING] Failed to start service" + SERVICE_START_FAILED=1 + fi + elif [ "$svc" = "openrc" ]; then + generate_openrc_content | write_root "/etc/init.d/${SERVICE_NAME}" + $SUDO chown root:root "/etc/init.d/${SERVICE_NAME}" && $SUDO chmod 0755 "/etc/init.d/${SERVICE_NAME}" - $SUDO systemctl daemon-reload || die "Failed to reload systemd" - $SUDO systemctl enable "$SERVICE_NAME" || die "Failed to enable service" - $SUDO systemctl start "$SERVICE_NAME" || die "Failed to start service" - - elif command -v rc-update >/dev/null 2>&1; then - say "Installing OpenRC service..." - tmp_svc="$(mktemp "${TEMP_DIR:-/tmp}/${SERVICE_NAME}.init.XXXXXX")" || die "Failed to create temp file" - generate_openrc_content > "$tmp_svc" || die "Failed to generate init content" - - $SUDO mv "$tmp_svc" "/etc/init.d/${SERVICE_NAME}" || die "Failed to move service file" - $SUDO chown root:root "/etc/init.d/${SERVICE_NAME}" - $SUDO chmod 0755 "/etc/init.d/${SERVICE_NAME}" - - $SUDO rc-update add "$SERVICE_NAME" default 2>/dev/null || die "Failed to register service" - $SUDO rc-service "$SERVICE_NAME" start 2>/dev/null || die "Failed to start OpenRC service" + $SUDO rc-update add "$SERVICE_NAME" default 2>/dev/null || true + + if ! $SUDO rc-service "$SERVICE_NAME" start 2>/dev/null; then + say "[WARNING] Failed to start service" + SERVICE_START_FAILED=1 + fi else - say "No service manager found. You can start it manually with:" - if [ -n "$SUDO" ]; then - say " sudo -u telemt ${INSTALL_DIR}/${BIN_NAME} ${CONFIG_FILE}" - else - say " su -s /bin/sh telemt -c '${INSTALL_DIR}/${BIN_NAME} ${CONFIG_FILE}'" + cmd="\"${INSTALL_DIR}/${BIN_NAME}\" \"${CONFIG_FILE}\"" + if [ -n "$SUDO" ]; then + say " -> Service manager not found. Start manually: sudo -u telemt $cmd" + else + say " -> Service manager not found. Start manually: su -s /bin/sh telemt -c '$cmd'" fi fi } kill_user_procs() { - say "Ensuring $BIN_NAME processes are killed..." - - if pkill_cmd="$(command -v pkill 2>/dev/null)"; then - $SUDO "$pkill_cmd" -u telemt "$BIN_NAME" 2>/dev/null || true + if command -v pkill >/dev/null 2>&1; then + $SUDO pkill -u telemt "$BIN_NAME" 2>/dev/null || true sleep 1 - $SUDO "$pkill_cmd" -9 -u telemt "$BIN_NAME" 2>/dev/null || true - elif killall_cmd="$(command -v killall 2>/dev/null)"; then - $SUDO "$killall_cmd" "$BIN_NAME" 2>/dev/null || true - sleep 1 - $SUDO "$killall_cmd" -9 "$BIN_NAME" 2>/dev/null || true + $SUDO pkill -9 -u telemt "$BIN_NAME" 2>/dev/null || true + else + if command -v pgrep >/dev/null 2>&1; then + pids="$(pgrep -u telemt 2>/dev/null || true)" + else + pids="$(ps -u telemt -o pid= 2>/dev/null || true)" + fi + + if [ -n "$pids" ]; then + for pid in $pids; do + case "$pid" in ''|*[!0-9]*) continue ;; *) $SUDO kill "$pid" 2>/dev/null || true ;; esac + done + sleep 1 + for pid in $pids; do + case "$pid" in ''|*[!0-9]*) continue ;; *) $SUDO kill -9 "$pid" 2>/dev/null || true ;; esac + done + fi fi } uninstall() { - purge_data=0 - [ "$ACTION" = "purge" ] && purge_data=1 + say "Starting uninstallation of $BIN_NAME..." - say "Uninstalling $BIN_NAME..." + say ">>> Stage 1: Stopping services" stop_service - if command -v systemctl >/dev/null 2>&1 && [ -d /run/systemd/system ]; then + say ">>> Stage 2: Removing service configuration" + svc="$(get_svc_mgr)" + if [ "$svc" = "systemd" ]; then $SUDO systemctl disable "$SERVICE_NAME" 2>/dev/null || true $SUDO rm -f "/etc/systemd/system/${SERVICE_NAME}.service" - $SUDO systemctl daemon-reload || true - elif command -v rc-update >/dev/null 2>&1; then + $SUDO systemctl daemon-reload 2>/dev/null || true + elif [ "$svc" = "openrc" ]; then $SUDO rc-update del "$SERVICE_NAME" 2>/dev/null || true $SUDO rm -f "/etc/init.d/${SERVICE_NAME}" fi + say ">>> Stage 3: Terminating user processes" kill_user_procs + say ">>> Stage 4: Removing binary" $SUDO rm -f "${INSTALL_DIR}/${BIN_NAME}" - $SUDO userdel telemt 2>/dev/null || $SUDO deluser telemt 2>/dev/null || true - $SUDO groupdel telemt 2>/dev/null || $SUDO delgroup telemt 2>/dev/null || true - - if [ "$purge_data" -eq 1 ]; then - say "Purging configuration and data..." + if [ "$ACTION" = "purge" ]; then + say ">>> Stage 5: Purging configuration, data, and user" $SUDO rm -rf "$CONFIG_DIR" "$WORK_DIR" + $SUDO rm -f "$CONFIG_FILE" + if [ "$CONFIG_PARENT_DIR" != "$CONFIG_DIR" ] && [ "$CONFIG_PARENT_DIR" != "." ] && [ "$CONFIG_PARENT_DIR" != "/" ]; then + $SUDO rmdir "$CONFIG_PARENT_DIR" 2>/dev/null || true + fi + $SUDO userdel telemt 2>/dev/null || $SUDO deluser telemt 2>/dev/null || true + $SUDO groupdel telemt 2>/dev/null || $SUDO delgroup telemt 2>/dev/null || true else - say "Note: Configuration in $CONFIG_DIR was kept. Run with '--purge' to remove it." + say "Note: Configuration and user kept. Run with 'purge' to remove completely." fi - - say "Uninstallation complete." + + printf '\n====================================================================\n' + printf ' UNINSTALLATION COMPLETE\n' + printf '====================================================================\n\n' exit 0 } -# ============================================================================ -# Main Entry Point -# ============================================================================ - case "$ACTION" in - help) - show_help - ;; - uninstall|purge) - verify_common - uninstall - ;; + help) show_help ;; + uninstall|purge) verify_common; uninstall ;; install) - say "Starting installation..." - verify_common - verify_install_deps + say "Starting installation of $BIN_NAME (Version: $TARGET_VERSION)" - ARCH="$(detect_arch)" - LIBC="$(detect_libc)" - say "Detected system: $ARCH-linux-$LIBC" + say ">>> Stage 1: Verifying environment and dependencies" + verify_common; verify_install_deps + if [ "$TARGET_VERSION" != "latest" ]; then + TARGET_VERSION="${TARGET_VERSION#v}" + fi + + ARCH="$(detect_arch)"; LIBC="$(detect_libc)" FILE_NAME="${BIN_NAME}-${ARCH}-linux-${LIBC}.tar.gz" - FILE_NAME="$(printf '%s' "$FILE_NAME" | tr -d ' \t\n\r')" - + if [ "$TARGET_VERSION" = "latest" ]; then DL_URL="https://github.com/${REPO}/releases/latest/download/${FILE_NAME}" - else + else DL_URL="https://github.com/${REPO}/releases/download/${TARGET_VERSION}/${FILE_NAME}" fi - TEMP_DIR="$(mktemp -d)" || die "Failed to create temp directory" + say ">>> Stage 2: Downloading archive" + TEMP_DIR="$(mktemp -d)" || die "Temp directory creation failed" if [ -z "$TEMP_DIR" ] || [ ! -d "$TEMP_DIR" ]; then - die "Temp directory creation failed" + die "Temp directory is invalid or was not created" fi - say "Downloading from $DL_URL..." - fetch_file "$DL_URL" "${TEMP_DIR}/archive.tar.gz" || die "Download failed (check version or network)" + fetch_file "$DL_URL" "${TEMP_DIR}/${FILE_NAME}" || die "Download failed" - gzip -dc "${TEMP_DIR}/archive.tar.gz" | tar -xf - -C "$TEMP_DIR" || die "Extraction failed" + say ">>> Stage 3: Extracting archive" + if ! gzip -dc "${TEMP_DIR}/${FILE_NAME}" | tar -xf - -C "$TEMP_DIR" 2>/dev/null; then + die "Extraction failed (downloaded archive might be invalid or 404)." + fi - EXTRACTED_BIN="$(find "$TEMP_DIR" -type f -name "$BIN_NAME" -print 2>/dev/null | head -n 1)" - [ -z "$EXTRACTED_BIN" ] && die "Binary '$BIN_NAME' not found in archive" + EXTRACTED_BIN="$(find "$TEMP_DIR" -type f -name "$BIN_NAME" -print 2>/dev/null | head -n 1 || true)" + [ -n "$EXTRACTED_BIN" ] || die "Binary '$BIN_NAME' not found in archive" - ensure_user_group - setup_dirs - stop_service - - say "Installing binary..." + say ">>> Stage 4: Setting up environment (User, Group, Directories)" + ensure_user_group; setup_dirs; stop_service + + say ">>> Stage 5: Installing binary" install_binary "$EXTRACTED_BIN" "${INSTALL_DIR}/${BIN_NAME}" - + + say ">>> Stage 6: Generating configuration" install_config + + say ">>> Stage 7: Installing and starting service" install_service - say "" - say "=============================================" - say "Installation complete!" - say "=============================================" - if command -v systemctl >/dev/null 2>&1 && [ -d /run/systemd/system ]; then - say "To check the logs, run:" - say " journalctl -u $SERVICE_NAME -f" - say "" - fi - say "To get user connection links, run:" - if command -v jq >/dev/null 2>&1; then - say " curl -s http://127.0.0.1:9091/v1/users | jq -r '.data[] | \"User: \\(.username)\\n\\(.links.tls[0] // empty)\"'" + if [ "${SERVICE_START_FAILED:-0}" -eq 1 ]; then + printf '\n====================================================================\n' + printf ' INSTALLATION COMPLETED WITH WARNINGS\n' + printf '====================================================================\n\n' + printf 'The service was installed but failed to start automatically.\n' + printf 'Please check the logs to determine the issue.\n\n' else - say " curl -s http://127.0.0.1:9091/v1/users" - say " (Note: Install 'jq' package to see the links nicely formatted)" + printf '\n====================================================================\n' + printf ' INSTALLATION SUCCESS\n' + printf '====================================================================\n\n' fi + + svc="$(get_svc_mgr)" + if [ "$svc" = "systemd" ]; then + printf 'To check the status of your proxy service, run:\n' + printf ' systemctl status %s\n\n' "$SERVICE_NAME" + elif [ "$svc" = "openrc" ]; then + printf 'To check the status of your proxy service, run:\n' + printf ' rc-service %s status\n\n' "$SERVICE_NAME" + fi + + printf 'To get your user connection links (for Telegram), run:\n' + if command -v jq >/dev/null 2>&1; then + printf ' curl -s http://127.0.0.1:9091/v1/users | jq -r '\''.data[] | "User: \\(.username)\\n\\(.links.tls[0] // empty)\\n"'\''\n' + else + printf ' curl -s http://127.0.0.1:9091/v1/users\n' + printf ' (Tip: Install '\''jq'\'' for a much cleaner output)\n' + fi + + printf '\n====================================================================\n' ;; esac diff --git a/src/api/model.rs b/src/api/model.rs index 5e64d2d..6578d35 100644 --- a/src/api/model.rs +++ b/src/api/model.rs @@ -206,6 +206,16 @@ pub(super) struct ZeroPoolData { pub(super) refill_failed_total: u64, pub(super) writer_restored_same_endpoint_total: u64, pub(super) writer_restored_fallback_total: u64, + pub(super) teardown_attempt_total_normal: u64, + pub(super) teardown_attempt_total_hard_detach: u64, + pub(super) teardown_success_total_normal: u64, + pub(super) teardown_success_total_hard_detach: u64, + pub(super) teardown_timeout_total: u64, + pub(super) teardown_escalation_total: u64, + pub(super) teardown_noop_total: u64, + pub(super) teardown_cleanup_side_effect_failures_total: u64, + pub(super) teardown_duration_count_total: u64, + pub(super) teardown_duration_sum_seconds_total: f64, } #[derive(Serialize, Clone)] @@ -365,6 +375,7 @@ pub(super) struct MinimalMeRuntimeData { pub(super) me_reconnect_backoff_cap_ms: u64, pub(super) me_reconnect_fast_retry_count: u32, pub(super) me_pool_drain_ttl_secs: u64, + pub(super) me_instadrain: bool, pub(super) me_pool_drain_soft_evict_enabled: bool, pub(super) me_pool_drain_soft_evict_grace_secs: u64, pub(super) me_pool_drain_soft_evict_per_writer: u8, diff --git a/src/api/runtime_min.rs b/src/api/runtime_min.rs index b217ad0..5ff12cc 100644 --- a/src/api/runtime_min.rs +++ b/src/api/runtime_min.rs @@ -4,6 +4,9 @@ use std::time::{SystemTime, UNIX_EPOCH}; use serde::Serialize; use crate::config::ProxyConfig; +use crate::stats::{ + MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason, Stats, +}; use super::ApiShared; @@ -98,6 +101,50 @@ pub(super) struct RuntimeMeQualityCountersData { pub(super) reconnect_success_total: u64, } +#[derive(Serialize)] +pub(super) struct RuntimeMeQualityTeardownAttemptData { + pub(super) reason: &'static str, + pub(super) mode: &'static str, + pub(super) total: u64, +} + +#[derive(Serialize)] +pub(super) struct RuntimeMeQualityTeardownSuccessData { + pub(super) mode: &'static str, + pub(super) total: u64, +} + +#[derive(Serialize)] +pub(super) struct RuntimeMeQualityTeardownSideEffectData { + pub(super) step: &'static str, + pub(super) total: u64, +} + +#[derive(Serialize)] +pub(super) struct RuntimeMeQualityTeardownDurationBucketData { + pub(super) le_seconds: &'static str, + pub(super) total: u64, +} + +#[derive(Serialize)] +pub(super) struct RuntimeMeQualityTeardownDurationData { + pub(super) mode: &'static str, + pub(super) count: u64, + pub(super) sum_seconds: f64, + pub(super) buckets: Vec, +} + +#[derive(Serialize)] +pub(super) struct RuntimeMeQualityTeardownData { + pub(super) attempts: Vec, + pub(super) success: Vec, + pub(super) timeout_total: u64, + pub(super) escalation_total: u64, + pub(super) noop_total: u64, + pub(super) cleanup_side_effect_failures: Vec, + pub(super) duration: Vec, +} + #[derive(Serialize)] pub(super) struct RuntimeMeQualityRouteDropData { pub(super) no_conn_total: u64, @@ -120,6 +167,7 @@ pub(super) struct RuntimeMeQualityDcRttData { #[derive(Serialize)] pub(super) struct RuntimeMeQualityPayload { pub(super) counters: RuntimeMeQualityCountersData, + pub(super) teardown: RuntimeMeQualityTeardownData, pub(super) route_drops: RuntimeMeQualityRouteDropData, pub(super) dc_rtt: Vec, } @@ -375,6 +423,7 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime reconnect_attempt_total: shared.stats.get_me_reconnect_attempts(), reconnect_success_total: shared.stats.get_me_reconnect_success(), }, + teardown: build_runtime_me_teardown_data(shared), route_drops: RuntimeMeQualityRouteDropData { no_conn_total: shared.stats.get_me_route_drop_no_conn(), channel_closed_total: shared.stats.get_me_route_drop_channel_closed(), @@ -398,6 +447,81 @@ pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> Runtime } } +fn build_runtime_me_teardown_data(shared: &ApiShared) -> RuntimeMeQualityTeardownData { + let attempts = MeWriterTeardownReason::ALL + .iter() + .copied() + .flat_map(|reason| { + MeWriterTeardownMode::ALL + .iter() + .copied() + .map(move |mode| RuntimeMeQualityTeardownAttemptData { + reason: reason.as_str(), + mode: mode.as_str(), + total: shared.stats.get_me_writer_teardown_attempt_total(reason, mode), + }) + }) + .collect(); + + let success = MeWriterTeardownMode::ALL + .iter() + .copied() + .map(|mode| RuntimeMeQualityTeardownSuccessData { + mode: mode.as_str(), + total: shared.stats.get_me_writer_teardown_success_total(mode), + }) + .collect(); + + let cleanup_side_effect_failures = MeWriterCleanupSideEffectStep::ALL + .iter() + .copied() + .map(|step| RuntimeMeQualityTeardownSideEffectData { + step: step.as_str(), + total: shared + .stats + .get_me_writer_cleanup_side_effect_failures_total(step), + }) + .collect(); + + let duration = MeWriterTeardownMode::ALL + .iter() + .copied() + .map(|mode| { + let count = shared.stats.get_me_writer_teardown_duration_count(mode); + let mut buckets: Vec = Stats::me_writer_teardown_duration_bucket_labels() + .iter() + .enumerate() + .map(|(bucket_idx, label)| RuntimeMeQualityTeardownDurationBucketData { + le_seconds: label, + total: shared + .stats + .get_me_writer_teardown_duration_bucket_total(mode, bucket_idx), + }) + .collect(); + buckets.push(RuntimeMeQualityTeardownDurationBucketData { + le_seconds: "+Inf", + total: count, + }); + RuntimeMeQualityTeardownDurationData { + mode: mode.as_str(), + count, + sum_seconds: shared.stats.get_me_writer_teardown_duration_sum_seconds(mode), + buckets, + } + }) + .collect(); + + RuntimeMeQualityTeardownData { + attempts, + success, + timeout_total: shared.stats.get_me_writer_teardown_timeout_total(), + escalation_total: shared.stats.get_me_writer_teardown_escalation_total(), + noop_total: shared.stats.get_me_writer_teardown_noop_total(), + cleanup_side_effect_failures, + duration, + } +} + pub(super) async fn build_runtime_upstream_quality_data( shared: &ApiShared, ) -> RuntimeUpstreamQualityData { diff --git a/src/api/runtime_stats.rs b/src/api/runtime_stats.rs index b1bc9e3..999e2cf 100644 --- a/src/api/runtime_stats.rs +++ b/src/api/runtime_stats.rs @@ -1,9 +1,9 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use crate::config::ApiConfig; -use crate::stats::Stats; -use crate::transport::UpstreamRouteKind; +use crate::stats::{MeWriterTeardownMode, Stats}; use crate::transport::upstream::IpPreference; +use crate::transport::UpstreamRouteKind; use super::ApiShared; use super::model::{ @@ -106,6 +106,29 @@ pub(super) fn build_zero_all_data(stats: &Stats, configured_users: usize) -> Zer refill_failed_total: stats.get_me_refill_failed_total(), writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(), writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(), + teardown_attempt_total_normal: stats + .get_me_writer_teardown_attempt_total_by_mode(MeWriterTeardownMode::Normal), + teardown_attempt_total_hard_detach: stats + .get_me_writer_teardown_attempt_total_by_mode(MeWriterTeardownMode::HardDetach), + teardown_success_total_normal: stats + .get_me_writer_teardown_success_total(MeWriterTeardownMode::Normal), + teardown_success_total_hard_detach: stats + .get_me_writer_teardown_success_total(MeWriterTeardownMode::HardDetach), + teardown_timeout_total: stats.get_me_writer_teardown_timeout_total(), + teardown_escalation_total: stats.get_me_writer_teardown_escalation_total(), + teardown_noop_total: stats.get_me_writer_teardown_noop_total(), + teardown_cleanup_side_effect_failures_total: stats + .get_me_writer_cleanup_side_effect_failures_total_all(), + teardown_duration_count_total: stats + .get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal) + .saturating_add( + stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::HardDetach), + ), + teardown_duration_sum_seconds_total: stats + .get_me_writer_teardown_duration_sum_seconds(MeWriterTeardownMode::Normal) + + stats.get_me_writer_teardown_duration_sum_seconds( + MeWriterTeardownMode::HardDetach, + ), }, desync: ZeroDesyncData { secure_padding_invalid_total: stats.get_secure_padding_invalid(), @@ -429,6 +452,7 @@ async fn get_minimal_payload_cached( me_reconnect_backoff_cap_ms: runtime.me_reconnect_backoff_cap_ms, me_reconnect_fast_retry_count: runtime.me_reconnect_fast_retry_count, me_pool_drain_ttl_secs: runtime.me_pool_drain_ttl_secs, + me_instadrain: runtime.me_instadrain, me_pool_drain_soft_evict_enabled: runtime.me_pool_drain_soft_evict_enabled, me_pool_drain_soft_evict_grace_secs: runtime.me_pool_drain_soft_evict_grace_secs, me_pool_drain_soft_evict_per_writer: runtime.me_pool_drain_soft_evict_per_writer, diff --git a/src/cli.rs b/src/cli.rs index a1182a7..b6e2d92 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -198,8 +198,15 @@ desync_all_full = false update_every = 43200 hardswap = false me_pool_drain_ttl_secs = 90 +me_instadrain = false +me_pool_drain_threshold = 32 +me_pool_drain_soft_evict_grace_secs = 10 +me_pool_drain_soft_evict_per_writer = 2 +me_pool_drain_soft_evict_budget_per_core = 16 +me_pool_drain_soft_evict_cooldown_ms = 1000 +me_bind_stale_mode = "never" me_pool_min_fresh_ratio = 0.8 -me_reinit_drain_timeout_secs = 120 +me_reinit_drain_timeout_secs = 90 [network] ipv4 = true diff --git a/src/config/defaults.rs b/src/config/defaults.rs index 54a53b3..fea8305 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -40,10 +40,10 @@ const DEFAULT_ME_ROUTE_HYBRID_MAX_WAIT_MS: u64 = 3000; const DEFAULT_ME_ROUTE_BLOCKING_SEND_TIMEOUT_MS: u64 = 250; const DEFAULT_ME_C2ME_SEND_TIMEOUT_MS: u64 = 4000; const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_ENABLED: bool = true; -const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_GRACE_SECS: u64 = 30; -const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_PER_WRITER: u8 = 1; -const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_BUDGET_PER_CORE: u16 = 8; -const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_COOLDOWN_MS: u64 = 5000; +const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_GRACE_SECS: u64 = 10; +const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_PER_WRITER: u8 = 2; +const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_BUDGET_PER_CORE: u16 = 16; +const DEFAULT_ME_POOL_DRAIN_SOFT_EVICT_COOLDOWN_MS: u64 = 1000; const DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS: u64 = 30; const DEFAULT_ACCEPT_PERMIT_TIMEOUT_MS: u64 = 250; const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 2; @@ -606,15 +606,19 @@ pub(crate) fn default_proxy_secret_len_max() -> usize { } pub(crate) fn default_me_reinit_drain_timeout_secs() -> u64 { - 120 + 90 } pub(crate) fn default_me_pool_drain_ttl_secs() -> u64 { 90 } +pub(crate) fn default_me_instadrain() -> bool { + false +} + pub(crate) fn default_me_pool_drain_threshold() -> u64 { - 128 + 32 } pub(crate) fn default_me_pool_drain_soft_evict_enabled() -> bool { diff --git a/src/config/hot_reload.rs b/src/config/hot_reload.rs index 7b94999..1315f9c 100644 --- a/src/config/hot_reload.rs +++ b/src/config/hot_reload.rs @@ -56,6 +56,7 @@ pub struct HotFields { pub me_reinit_coalesce_window_ms: u64, pub hardswap: bool, pub me_pool_drain_ttl_secs: u64, + pub me_instadrain: bool, pub me_pool_drain_threshold: u64, pub me_pool_drain_soft_evict_enabled: bool, pub me_pool_drain_soft_evict_grace_secs: u64, @@ -143,6 +144,7 @@ impl HotFields { me_reinit_coalesce_window_ms: cfg.general.me_reinit_coalesce_window_ms, hardswap: cfg.general.hardswap, me_pool_drain_ttl_secs: cfg.general.me_pool_drain_ttl_secs, + me_instadrain: cfg.general.me_instadrain, me_pool_drain_threshold: cfg.general.me_pool_drain_threshold, me_pool_drain_soft_evict_enabled: cfg.general.me_pool_drain_soft_evict_enabled, me_pool_drain_soft_evict_grace_secs: cfg.general.me_pool_drain_soft_evict_grace_secs, @@ -477,6 +479,7 @@ fn overlay_hot_fields(old: &ProxyConfig, new: &ProxyConfig) -> ProxyConfig { cfg.general.me_reinit_coalesce_window_ms = new.general.me_reinit_coalesce_window_ms; cfg.general.hardswap = new.general.hardswap; cfg.general.me_pool_drain_ttl_secs = new.general.me_pool_drain_ttl_secs; + cfg.general.me_instadrain = new.general.me_instadrain; cfg.general.me_pool_drain_threshold = new.general.me_pool_drain_threshold; cfg.general.me_pool_drain_soft_evict_enabled = new.general.me_pool_drain_soft_evict_enabled; cfg.general.me_pool_drain_soft_evict_grace_secs = @@ -869,6 +872,12 @@ fn log_changes( old_hot.me_pool_drain_ttl_secs, new_hot.me_pool_drain_ttl_secs, ); } + if old_hot.me_instadrain != new_hot.me_instadrain { + info!( + "config reload: me_instadrain: {} β†’ {}", + old_hot.me_instadrain, new_hot.me_instadrain, + ); + } if old_hot.me_pool_drain_threshold != new_hot.me_pool_drain_threshold { info!( diff --git a/src/config/load.rs b/src/config/load.rs index 5be4d3d..c04fce8 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -2081,6 +2081,45 @@ mod tests { let _ = std::fs::remove_file(path); } + #[test] + fn force_close_default_matches_drain_ttl() { + let toml = r#" + [censorship] + tls_domain = "example.com" + + [access.users] + user = "00000000000000000000000000000000" + "#; + let dir = std::env::temp_dir(); + let path = dir.join("telemt_force_close_default_test.toml"); + std::fs::write(&path, toml).unwrap(); + let cfg = ProxyConfig::load(&path).unwrap(); + assert_eq!(cfg.general.me_reinit_drain_timeout_secs, 90); + assert_eq!(cfg.general.effective_me_pool_force_close_secs(), 90); + let _ = std::fs::remove_file(path); + } + + #[test] + fn force_close_zero_uses_runtime_safety_fallback() { + let toml = r#" + [general] + me_reinit_drain_timeout_secs = 0 + + [censorship] + tls_domain = "example.com" + + [access.users] + user = "00000000000000000000000000000000" + "#; + let dir = std::env::temp_dir(); + let path = dir.join("telemt_force_close_zero_fallback_test.toml"); + std::fs::write(&path, toml).unwrap(); + let cfg = ProxyConfig::load(&path).unwrap(); + assert_eq!(cfg.general.me_reinit_drain_timeout_secs, 0); + assert_eq!(cfg.general.effective_me_pool_force_close_secs(), 300); + let _ = std::fs::remove_file(path); + } + #[test] fn force_close_bumped_when_below_drain_ttl() { let toml = r#" diff --git a/src/config/types.rs b/src/config/types.rs index e41f422..e5040f6 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -135,8 +135,8 @@ impl MeSocksKdfPolicy { #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] #[serde(rename_all = "lowercase")] pub enum MeBindStaleMode { - Never, #[default] + Never, Ttl, Always, } @@ -812,6 +812,10 @@ pub struct GeneralConfig { #[serde(default = "default_me_pool_drain_ttl_secs")] pub me_pool_drain_ttl_secs: u64, + /// Force-remove any draining writer on the next cleanup tick, regardless of age/deadline. + #[serde(default = "default_me_instadrain")] + pub me_instadrain: bool, + /// Maximum allowed number of draining ME writers before oldest ones are force-closed in batches. /// Set to 0 to disable threshold-based draining cleanup and keep timeout-only behavior. #[serde(default = "default_me_pool_drain_threshold")] @@ -851,7 +855,7 @@ pub struct GeneralConfig { pub me_pool_min_fresh_ratio: f32, /// Drain timeout in seconds for stale ME writers after endpoint map changes. - /// Set to 0 to keep stale writers draining indefinitely (no force-close). + /// Set to 0 to use the runtime safety fallback timeout. #[serde(default = "default_me_reinit_drain_timeout_secs")] pub me_reinit_drain_timeout_secs: u64, @@ -1036,6 +1040,7 @@ impl Default for GeneralConfig { me_secret_atomic_snapshot: default_me_secret_atomic_snapshot(), proxy_secret_len_max: default_proxy_secret_len_max(), me_pool_drain_ttl_secs: default_me_pool_drain_ttl_secs(), + me_instadrain: default_me_instadrain(), me_pool_drain_threshold: default_me_pool_drain_threshold(), me_pool_drain_soft_evict_enabled: default_me_pool_drain_soft_evict_enabled(), me_pool_drain_soft_evict_grace_secs: default_me_pool_drain_soft_evict_grace_secs(), @@ -1081,8 +1086,13 @@ impl GeneralConfig { /// Resolve force-close timeout for stale writers. /// `me_reinit_drain_timeout_secs` remains backward-compatible alias. + /// A configured `0` uses the runtime safety fallback (300s). pub fn effective_me_pool_force_close_secs(&self) -> u64 { - self.me_reinit_drain_timeout_secs + if self.me_reinit_drain_timeout_secs == 0 { + 300 + } else { + self.me_reinit_drain_timeout_secs + } } } diff --git a/src/maestro/me_startup.rs b/src/maestro/me_startup.rs index 827b00c..eb45cc4 100644 --- a/src/maestro/me_startup.rs +++ b/src/maestro/me_startup.rs @@ -237,6 +237,7 @@ pub(crate) async fn initialize_me_pool( config.general.me_adaptive_floor_max_warm_writers_global, config.general.hardswap, config.general.me_pool_drain_ttl_secs, + config.general.me_instadrain, config.general.me_pool_drain_threshold, config.general.me_pool_drain_soft_evict_enabled, config.general.me_pool_drain_soft_evict_grace_secs, @@ -331,18 +332,76 @@ pub(crate) async fn initialize_me_pool( "Middle-End pool initialized successfully" ); - let pool_health = pool_bg.clone(); - let rng_health = rng_bg.clone(); - let min_conns = pool_size; - tokio::spawn(async move { - crate::transport::middle_proxy::me_health_monitor( - pool_health, - rng_health, - min_conns, - ) - .await; - }); - break; + // ── Supervised background tasks ────────────────── + // Each task runs inside a nested tokio::spawn so + // that a panic is caught via JoinHandle and the + // outer loop restarts the task automatically. + let pool_health = pool_bg.clone(); + let rng_health = rng_bg.clone(); + let min_conns = pool_size; + tokio::spawn(async move { + loop { + let p = pool_health.clone(); + let r = rng_health.clone(); + let res = tokio::spawn(async move { + crate::transport::middle_proxy::me_health_monitor( + p, r, min_conns, + ) + .await; + }) + .await; + match res { + Ok(()) => warn!("me_health_monitor exited unexpectedly, restarting"), + Err(e) => { + error!(error = %e, "me_health_monitor panicked, restarting in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + }); + let pool_drain_enforcer = pool_bg.clone(); + tokio::spawn(async move { + loop { + let p = pool_drain_enforcer.clone(); + let res = tokio::spawn(async move { + crate::transport::middle_proxy::me_drain_timeout_enforcer(p).await; + }) + .await; + match res { + Ok(()) => warn!("me_drain_timeout_enforcer exited unexpectedly, restarting"), + Err(e) => { + error!(error = %e, "me_drain_timeout_enforcer panicked, restarting in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + }); + let pool_watchdog = pool_bg.clone(); + tokio::spawn(async move { + loop { + let p = pool_watchdog.clone(); + let res = tokio::spawn(async move { + crate::transport::middle_proxy::me_zombie_writer_watchdog(p).await; + }) + .await; + match res { + Ok(()) => warn!("me_zombie_writer_watchdog exited unexpectedly, restarting"), + Err(e) => { + error!(error = %e, "me_zombie_writer_watchdog panicked, restarting in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + }); + // CRITICAL: keep the current-thread runtime + // alive. Without this, block_on() returns, + // the Runtime is dropped, and ALL spawned + // background tasks (health monitor, drain + // enforcer, zombie watchdog) are silently + // cancelled β€” causing the draining-writer + // leak that brought us here. + std::future::pending::<()>().await; + unreachable!(); } Err(e) => { startup_tracker_bg.set_me_last_error(Some(e.to_string())).await; @@ -400,16 +459,65 @@ pub(crate) async fn initialize_me_pool( "Middle-End pool initialized successfully" ); - let pool_clone = pool.clone(); - let rng_clone = rng.clone(); - let min_conns = pool_size; - tokio::spawn(async move { - crate::transport::middle_proxy::me_health_monitor( - pool_clone, rng_clone, min_conns, - ) - .await; - }); - + // ── Supervised background tasks ────────────────── + let pool_clone = pool.clone(); + let rng_clone = rng.clone(); + let min_conns = pool_size; + tokio::spawn(async move { + loop { + let p = pool_clone.clone(); + let r = rng_clone.clone(); + let res = tokio::spawn(async move { + crate::transport::middle_proxy::me_health_monitor( + p, r, min_conns, + ) + .await; + }) + .await; + match res { + Ok(()) => warn!("me_health_monitor exited unexpectedly, restarting"), + Err(e) => { + error!(error = %e, "me_health_monitor panicked, restarting in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + }); + let pool_drain_enforcer = pool.clone(); + tokio::spawn(async move { + loop { + let p = pool_drain_enforcer.clone(); + let res = tokio::spawn(async move { + crate::transport::middle_proxy::me_drain_timeout_enforcer(p).await; + }) + .await; + match res { + Ok(()) => warn!("me_drain_timeout_enforcer exited unexpectedly, restarting"), + Err(e) => { + error!(error = %e, "me_drain_timeout_enforcer panicked, restarting in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + }); + let pool_watchdog = pool.clone(); + tokio::spawn(async move { + loop { + let p = pool_watchdog.clone(); + let res = tokio::spawn(async move { + crate::transport::middle_proxy::me_zombie_writer_watchdog(p).await; + }) + .await; + match res { + Ok(()) => warn!("me_zombie_writer_watchdog exited unexpectedly, restarting"), + Err(e) => { + error!(error = %e, "me_zombie_writer_watchdog panicked, restarting in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + }); + break Some(pool); } Err(e) => { diff --git a/src/metrics.rs b/src/metrics.rs index 4f7f4b6..b7272b2 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -16,7 +16,9 @@ use tracing::{info, warn, debug}; use crate::config::ProxyConfig; use crate::ip_tracker::UserIpTracker; use crate::stats::beobachten::BeobachtenStore; -use crate::stats::Stats; +use crate::stats::{ + MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason, Stats, +}; use crate::transport::{ListenOptions, create_listener}; pub async fn serve( @@ -1770,6 +1772,169 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp } ); + let _ = writeln!( + out, + "# HELP telemt_me_writer_teardown_attempt_total ME writer teardown attempts by reason and mode" + ); + let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_attempt_total counter"); + for reason in MeWriterTeardownReason::ALL { + for mode in MeWriterTeardownMode::ALL { + let _ = writeln!( + out, + "telemt_me_writer_teardown_attempt_total{{reason=\"{}\",mode=\"{}\"}} {}", + reason.as_str(), + mode.as_str(), + if me_allows_normal { + stats.get_me_writer_teardown_attempt_total(reason, mode) + } else { + 0 + } + ); + } + } + + let _ = writeln!( + out, + "# HELP telemt_me_writer_teardown_success_total ME writer teardown successes by mode" + ); + let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_success_total counter"); + for mode in MeWriterTeardownMode::ALL { + let _ = writeln!( + out, + "telemt_me_writer_teardown_success_total{{mode=\"{}\"}} {}", + mode.as_str(), + if me_allows_normal { + stats.get_me_writer_teardown_success_total(mode) + } else { + 0 + } + ); + } + + let _ = writeln!( + out, + "# HELP telemt_me_writer_teardown_timeout_total Teardown operations that timed out" + ); + let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_timeout_total counter"); + let _ = writeln!( + out, + "telemt_me_writer_teardown_timeout_total {}", + if me_allows_normal { + stats.get_me_writer_teardown_timeout_total() + } else { + 0 + } + ); + + let _ = writeln!( + out, + "# HELP telemt_me_writer_teardown_escalation_total Watchdog teardown escalations to hard detach" + ); + let _ = writeln!( + out, + "# TYPE telemt_me_writer_teardown_escalation_total counter" + ); + let _ = writeln!( + out, + "telemt_me_writer_teardown_escalation_total {}", + if me_allows_normal { + stats.get_me_writer_teardown_escalation_total() + } else { + 0 + } + ); + + let _ = writeln!( + out, + "# HELP telemt_me_writer_teardown_noop_total Teardown operations that became no-op" + ); + let _ = writeln!(out, "# TYPE telemt_me_writer_teardown_noop_total counter"); + let _ = writeln!( + out, + "telemt_me_writer_teardown_noop_total {}", + if me_allows_normal { + stats.get_me_writer_teardown_noop_total() + } else { + 0 + } + ); + + let _ = writeln!( + out, + "# HELP telemt_me_writer_teardown_duration_seconds ME writer teardown latency histogram by mode" + ); + let _ = writeln!( + out, + "# TYPE telemt_me_writer_teardown_duration_seconds histogram" + ); + let bucket_labels = Stats::me_writer_teardown_duration_bucket_labels(); + for mode in MeWriterTeardownMode::ALL { + for (bucket_idx, label) in bucket_labels.iter().enumerate() { + let _ = writeln!( + out, + "telemt_me_writer_teardown_duration_seconds_bucket{{mode=\"{}\",le=\"{}\"}} {}", + mode.as_str(), + label, + if me_allows_normal { + stats.get_me_writer_teardown_duration_bucket_total(mode, bucket_idx) + } else { + 0 + } + ); + } + let _ = writeln!( + out, + "telemt_me_writer_teardown_duration_seconds_bucket{{mode=\"{}\",le=\"+Inf\"}} {}", + mode.as_str(), + if me_allows_normal { + stats.get_me_writer_teardown_duration_count(mode) + } else { + 0 + } + ); + let _ = writeln!( + out, + "telemt_me_writer_teardown_duration_seconds_sum{{mode=\"{}\"}} {:.6}", + mode.as_str(), + if me_allows_normal { + stats.get_me_writer_teardown_duration_sum_seconds(mode) + } else { + 0.0 + } + ); + let _ = writeln!( + out, + "telemt_me_writer_teardown_duration_seconds_count{{mode=\"{}\"}} {}", + mode.as_str(), + if me_allows_normal { + stats.get_me_writer_teardown_duration_count(mode) + } else { + 0 + } + ); + } + + let _ = writeln!( + out, + "# HELP telemt_me_writer_cleanup_side_effect_failures_total Failed cleanup side effects by step" + ); + let _ = writeln!( + out, + "# TYPE telemt_me_writer_cleanup_side_effect_failures_total counter" + ); + for step in MeWriterCleanupSideEffectStep::ALL { + let _ = writeln!( + out, + "telemt_me_writer_cleanup_side_effect_failures_total{{step=\"{}\"}} {}", + step.as_str(), + if me_allows_normal { + stats.get_me_writer_cleanup_side_effect_failures_total(step) + } else { + 0 + } + ); + } + let _ = writeln!(out, "# HELP telemt_me_refill_triggered_total Immediate ME refill runs started"); let _ = writeln!(out, "# TYPE telemt_me_refill_triggered_total counter"); let _ = writeln!( @@ -2175,6 +2340,17 @@ mod tests { assert!(output.contains("# TYPE telemt_me_rpc_proxy_req_signal_sent_total counter")); assert!(output.contains("# TYPE telemt_me_idle_close_by_peer_total counter")); assert!(output.contains("# TYPE telemt_me_writer_removed_total counter")); + assert!(output.contains("# TYPE telemt_me_writer_teardown_attempt_total counter")); + assert!(output.contains("# TYPE telemt_me_writer_teardown_success_total counter")); + assert!(output.contains("# TYPE telemt_me_writer_teardown_timeout_total counter")); + assert!(output.contains("# TYPE telemt_me_writer_teardown_escalation_total counter")); + assert!(output.contains("# TYPE telemt_me_writer_teardown_noop_total counter")); + assert!(output.contains( + "# TYPE telemt_me_writer_teardown_duration_seconds histogram" + )); + assert!(output.contains( + "# TYPE telemt_me_writer_cleanup_side_effect_failures_total counter" + )); assert!(output.contains("# TYPE telemt_me_writer_close_signal_drop_total counter")); assert!(output.contains( "# TYPE telemt_me_writer_close_signal_channel_full_total counter" diff --git a/src/stats/mod.rs b/src/stats/mod.rs index ad1d16b..0df4dc0 100644 --- a/src/stats/mod.rs +++ b/src/stats/mod.rs @@ -19,6 +19,137 @@ use tracing::debug; use crate::config::{MeTelemetryLevel, MeWriterPickMode}; use self::telemetry::TelemetryPolicy; +const ME_WRITER_TEARDOWN_MODE_COUNT: usize = 2; +const ME_WRITER_TEARDOWN_REASON_COUNT: usize = 11; +const ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT: usize = 2; +const ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT: usize = 12; +const ME_WRITER_TEARDOWN_DURATION_BUCKET_BOUNDS_MICROS: [u64; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] = [ + 1_000, + 5_000, + 10_000, + 25_000, + 50_000, + 100_000, + 250_000, + 500_000, + 1_000_000, + 2_500_000, + 5_000_000, + 10_000_000, +]; +const ME_WRITER_TEARDOWN_DURATION_BUCKET_LABELS: [&str; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] = [ + "0.001", + "0.005", + "0.01", + "0.025", + "0.05", + "0.1", + "0.25", + "0.5", + "1", + "2.5", + "5", + "10", +]; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum MeWriterTeardownMode { + Normal = 0, + HardDetach = 1, +} + +impl MeWriterTeardownMode { + pub const ALL: [Self; ME_WRITER_TEARDOWN_MODE_COUNT] = + [Self::Normal, Self::HardDetach]; + + pub const fn as_str(self) -> &'static str { + match self { + Self::Normal => "normal", + Self::HardDetach => "hard_detach", + } + } + + const fn idx(self) -> usize { + self as usize + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum MeWriterTeardownReason { + ReaderExit = 0, + WriterTaskExit = 1, + PingSendFail = 2, + SignalSendFail = 3, + RouteChannelClosed = 4, + CloseRpcChannelClosed = 5, + PruneClosedWriter = 6, + ReapTimeoutExpired = 7, + ReapThresholdForce = 8, + ReapEmpty = 9, + WatchdogStuckDraining = 10, +} + +impl MeWriterTeardownReason { + pub const ALL: [Self; ME_WRITER_TEARDOWN_REASON_COUNT] = [ + Self::ReaderExit, + Self::WriterTaskExit, + Self::PingSendFail, + Self::SignalSendFail, + Self::RouteChannelClosed, + Self::CloseRpcChannelClosed, + Self::PruneClosedWriter, + Self::ReapTimeoutExpired, + Self::ReapThresholdForce, + Self::ReapEmpty, + Self::WatchdogStuckDraining, + ]; + + pub const fn as_str(self) -> &'static str { + match self { + Self::ReaderExit => "reader_exit", + Self::WriterTaskExit => "writer_task_exit", + Self::PingSendFail => "ping_send_fail", + Self::SignalSendFail => "signal_send_fail", + Self::RouteChannelClosed => "route_channel_closed", + Self::CloseRpcChannelClosed => "close_rpc_channel_closed", + Self::PruneClosedWriter => "prune_closed_writer", + Self::ReapTimeoutExpired => "reap_timeout_expired", + Self::ReapThresholdForce => "reap_threshold_force", + Self::ReapEmpty => "reap_empty", + Self::WatchdogStuckDraining => "watchdog_stuck_draining", + } + } + + const fn idx(self) -> usize { + self as usize + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum MeWriterCleanupSideEffectStep { + CloseSignalChannelFull = 0, + CloseSignalChannelClosed = 1, +} + +impl MeWriterCleanupSideEffectStep { + pub const ALL: [Self; ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT] = + [Self::CloseSignalChannelFull, Self::CloseSignalChannelClosed]; + + pub const fn as_str(self) -> &'static str { + match self { + Self::CloseSignalChannelFull => "close_signal_channel_full", + Self::CloseSignalChannelClosed => "close_signal_channel_closed", + } + } + + const fn idx(self) -> usize { + self as usize + } +} + // ============= Stats ============= #[derive(Default)] @@ -128,6 +259,18 @@ pub struct Stats { me_draining_writers_reap_progress_total: AtomicU64, me_writer_removed_total: AtomicU64, me_writer_removed_unexpected_total: AtomicU64, + me_writer_teardown_attempt_total: + [[AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT]; ME_WRITER_TEARDOWN_REASON_COUNT], + me_writer_teardown_success_total: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT], + me_writer_teardown_timeout_total: AtomicU64, + me_writer_teardown_escalation_total: AtomicU64, + me_writer_teardown_noop_total: AtomicU64, + me_writer_cleanup_side_effect_failures_total: + [AtomicU64; ME_WRITER_CLEANUP_SIDE_EFFECT_STEP_COUNT], + me_writer_teardown_duration_bucket_hits: + [[AtomicU64; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT + 1]; ME_WRITER_TEARDOWN_MODE_COUNT], + me_writer_teardown_duration_sum_micros: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT], + me_writer_teardown_duration_count: [AtomicU64; ME_WRITER_TEARDOWN_MODE_COUNT], me_refill_triggered_total: AtomicU64, me_refill_skipped_inflight_total: AtomicU64, me_refill_failed_total: AtomicU64, @@ -765,6 +908,74 @@ impl Stats { self.me_writer_removed_unexpected_total.fetch_add(1, Ordering::Relaxed); } } + pub fn increment_me_writer_teardown_attempt_total( + &self, + reason: MeWriterTeardownReason, + mode: MeWriterTeardownMode, + ) { + if self.telemetry_me_allows_normal() { + self.me_writer_teardown_attempt_total[reason.idx()][mode.idx()] + .fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_writer_teardown_success_total(&self, mode: MeWriterTeardownMode) { + if self.telemetry_me_allows_normal() { + self.me_writer_teardown_success_total[mode.idx()].fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_writer_teardown_timeout_total(&self) { + if self.telemetry_me_allows_normal() { + self.me_writer_teardown_timeout_total + .fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_writer_teardown_escalation_total(&self) { + if self.telemetry_me_allows_normal() { + self.me_writer_teardown_escalation_total + .fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_writer_teardown_noop_total(&self) { + if self.telemetry_me_allows_normal() { + self.me_writer_teardown_noop_total + .fetch_add(1, Ordering::Relaxed); + } + } + pub fn increment_me_writer_cleanup_side_effect_failures_total( + &self, + step: MeWriterCleanupSideEffectStep, + ) { + if self.telemetry_me_allows_normal() { + self.me_writer_cleanup_side_effect_failures_total[step.idx()] + .fetch_add(1, Ordering::Relaxed); + } + } + pub fn observe_me_writer_teardown_duration( + &self, + mode: MeWriterTeardownMode, + duration: Duration, + ) { + if !self.telemetry_me_allows_normal() { + return; + } + let duration_micros = duration.as_micros().min(u64::MAX as u128) as u64; + let mut bucket_idx = ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT; + for (idx, upper_bound_micros) in ME_WRITER_TEARDOWN_DURATION_BUCKET_BOUNDS_MICROS + .iter() + .copied() + .enumerate() + { + if duration_micros <= upper_bound_micros { + bucket_idx = idx; + break; + } + } + self.me_writer_teardown_duration_bucket_hits[mode.idx()][bucket_idx] + .fetch_add(1, Ordering::Relaxed); + self.me_writer_teardown_duration_sum_micros[mode.idx()] + .fetch_add(duration_micros, Ordering::Relaxed); + self.me_writer_teardown_duration_count[mode.idx()].fetch_add(1, Ordering::Relaxed); + } pub fn increment_me_refill_triggered_total(&self) { if self.telemetry_me_allows_debug() { self.me_refill_triggered_total.fetch_add(1, Ordering::Relaxed); @@ -1297,6 +1508,79 @@ impl Stats { pub fn get_me_writer_removed_unexpected_total(&self) -> u64 { self.me_writer_removed_unexpected_total.load(Ordering::Relaxed) } + pub fn get_me_writer_teardown_attempt_total( + &self, + reason: MeWriterTeardownReason, + mode: MeWriterTeardownMode, + ) -> u64 { + self.me_writer_teardown_attempt_total[reason.idx()][mode.idx()] + .load(Ordering::Relaxed) + } + pub fn get_me_writer_teardown_attempt_total_by_mode(&self, mode: MeWriterTeardownMode) -> u64 { + MeWriterTeardownReason::ALL + .iter() + .copied() + .map(|reason| self.get_me_writer_teardown_attempt_total(reason, mode)) + .sum() + } + pub fn get_me_writer_teardown_success_total(&self, mode: MeWriterTeardownMode) -> u64 { + self.me_writer_teardown_success_total[mode.idx()].load(Ordering::Relaxed) + } + pub fn get_me_writer_teardown_timeout_total(&self) -> u64 { + self.me_writer_teardown_timeout_total.load(Ordering::Relaxed) + } + pub fn get_me_writer_teardown_escalation_total(&self) -> u64 { + self.me_writer_teardown_escalation_total + .load(Ordering::Relaxed) + } + pub fn get_me_writer_teardown_noop_total(&self) -> u64 { + self.me_writer_teardown_noop_total.load(Ordering::Relaxed) + } + pub fn get_me_writer_cleanup_side_effect_failures_total( + &self, + step: MeWriterCleanupSideEffectStep, + ) -> u64 { + self.me_writer_cleanup_side_effect_failures_total[step.idx()] + .load(Ordering::Relaxed) + } + pub fn get_me_writer_cleanup_side_effect_failures_total_all(&self) -> u64 { + MeWriterCleanupSideEffectStep::ALL + .iter() + .copied() + .map(|step| self.get_me_writer_cleanup_side_effect_failures_total(step)) + .sum() + } + pub fn me_writer_teardown_duration_bucket_labels( + ) -> &'static [&'static str; ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT] { + &ME_WRITER_TEARDOWN_DURATION_BUCKET_LABELS + } + pub fn get_me_writer_teardown_duration_bucket_hits( + &self, + mode: MeWriterTeardownMode, + bucket_idx: usize, + ) -> u64 { + self.me_writer_teardown_duration_bucket_hits[mode.idx()][bucket_idx] + .load(Ordering::Relaxed) + } + pub fn get_me_writer_teardown_duration_bucket_total( + &self, + mode: MeWriterTeardownMode, + bucket_idx: usize, + ) -> u64 { + let capped_idx = bucket_idx.min(ME_WRITER_TEARDOWN_DURATION_BUCKET_COUNT); + let mut total = 0u64; + for idx in 0..=capped_idx { + total = total.saturating_add(self.get_me_writer_teardown_duration_bucket_hits(mode, idx)); + } + total + } + pub fn get_me_writer_teardown_duration_count(&self, mode: MeWriterTeardownMode) -> u64 { + self.me_writer_teardown_duration_count[mode.idx()].load(Ordering::Relaxed) + } + pub fn get_me_writer_teardown_duration_sum_seconds(&self, mode: MeWriterTeardownMode) -> f64 { + self.me_writer_teardown_duration_sum_micros[mode.idx()].load(Ordering::Relaxed) as f64 + / 1_000_000.0 + } pub fn get_me_refill_triggered_total(&self) -> u64 { self.me_refill_triggered_total.load(Ordering::Relaxed) } @@ -1800,6 +2084,79 @@ mod tests { assert_eq!(stats.get_me_keepalive_sent(), 0); assert_eq!(stats.get_me_route_drop_queue_full(), 0); } + + #[test] + fn test_teardown_counters_and_duration() { + let stats = Stats::new(); + stats.increment_me_writer_teardown_attempt_total( + MeWriterTeardownReason::ReaderExit, + MeWriterTeardownMode::Normal, + ); + stats.increment_me_writer_teardown_success_total(MeWriterTeardownMode::Normal); + stats.observe_me_writer_teardown_duration( + MeWriterTeardownMode::Normal, + Duration::from_millis(3), + ); + stats.increment_me_writer_cleanup_side_effect_failures_total( + MeWriterCleanupSideEffectStep::CloseSignalChannelFull, + ); + + assert_eq!( + stats.get_me_writer_teardown_attempt_total( + MeWriterTeardownReason::ReaderExit, + MeWriterTeardownMode::Normal + ), + 1 + ); + assert_eq!( + stats.get_me_writer_teardown_success_total(MeWriterTeardownMode::Normal), + 1 + ); + assert_eq!( + stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal), + 1 + ); + assert!( + stats.get_me_writer_teardown_duration_sum_seconds(MeWriterTeardownMode::Normal) > 0.0 + ); + assert_eq!( + stats.get_me_writer_cleanup_side_effect_failures_total( + MeWriterCleanupSideEffectStep::CloseSignalChannelFull + ), + 1 + ); + } + + #[test] + fn test_teardown_counters_respect_me_silent() { + let stats = Stats::new(); + stats.apply_telemetry_policy(TelemetryPolicy { + core_enabled: true, + user_enabled: true, + me_level: MeTelemetryLevel::Silent, + }); + stats.increment_me_writer_teardown_attempt_total( + MeWriterTeardownReason::ReaderExit, + MeWriterTeardownMode::Normal, + ); + stats.increment_me_writer_teardown_timeout_total(); + stats.observe_me_writer_teardown_duration( + MeWriterTeardownMode::Normal, + Duration::from_millis(1), + ); + assert_eq!( + stats.get_me_writer_teardown_attempt_total( + MeWriterTeardownReason::ReaderExit, + MeWriterTeardownMode::Normal + ), + 0 + ); + assert_eq!(stats.get_me_writer_teardown_timeout_total(), 0); + assert_eq!( + stats.get_me_writer_teardown_duration_count(MeWriterTeardownMode::Normal), + 0 + ); + } #[test] fn test_replay_checker_basic() { diff --git a/src/transport/middle_proxy/config_updater.rs b/src/transport/middle_proxy/config_updater.rs index 43a3569..26ec497 100644 --- a/src/transport/middle_proxy/config_updater.rs +++ b/src/transport/middle_proxy/config_updater.rs @@ -298,6 +298,7 @@ async fn run_update_cycle( pool.update_runtime_reinit_policy( cfg.general.hardswap, cfg.general.me_pool_drain_ttl_secs, + cfg.general.me_instadrain, cfg.general.me_pool_drain_threshold, cfg.general.me_pool_drain_soft_evict_enabled, cfg.general.me_pool_drain_soft_evict_grace_secs, @@ -530,6 +531,7 @@ pub async fn me_config_updater( pool.update_runtime_reinit_policy( cfg.general.hardswap, cfg.general.me_pool_drain_ttl_secs, + cfg.general.me_instadrain, cfg.general.me_pool_drain_threshold, cfg.general.me_pool_drain_soft_evict_enabled, cfg.general.me_pool_drain_soft_evict_grace_secs, diff --git a/src/transport/middle_proxy/health.rs b/src/transport/middle_proxy/health.rs index 6d0af64..30e562b 100644 --- a/src/transport/middle_proxy/health.rs +++ b/src/transport/middle_proxy/health.rs @@ -10,8 +10,10 @@ use tracing::{debug, info, warn}; use crate::config::MeFloorMode; use crate::crypto::SecureRandom; use crate::network::IpFamily; +use crate::stats::MeWriterTeardownReason; use super::MePool; +use super::pool::MeWriter; const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff #[allow(dead_code)] @@ -30,6 +32,8 @@ const HEALTH_DRAIN_CLOSE_BUDGET_MIN: usize = 16; const HEALTH_DRAIN_CLOSE_BUDGET_MAX: usize = 256; const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MIN: usize = 8; const HEALTH_DRAIN_SOFT_EVICT_BUDGET_MAX: usize = 256; +const HEALTH_DRAIN_REAP_OPPORTUNISTIC_INTERVAL_SECS: u64 = 1; +const HEALTH_DRAIN_TIMEOUT_ENFORCER_INTERVAL_SECS: u64 = 1; #[derive(Debug, Clone)] struct DcFloorPlanEntry { @@ -99,6 +103,8 @@ pub async fn me_health_monitor(pool: Arc, rng: Arc, _min_c &mut adaptive_idle_since, &mut adaptive_recover_until, &mut floor_warn_next_allowed, + &mut drain_warn_next_allowed, + &mut drain_soft_evict_next_allowed, ) .await; let v6_degraded = check_family( @@ -116,12 +122,63 @@ pub async fn me_health_monitor(pool: Arc, rng: Arc, _min_c &mut adaptive_idle_since, &mut adaptive_recover_until, &mut floor_warn_next_allowed, + &mut drain_warn_next_allowed, + &mut drain_soft_evict_next_allowed, ) .await; degraded_interval = v4_degraded || v6_degraded; } } +pub async fn me_drain_timeout_enforcer(pool: Arc) { + let mut drain_warn_next_allowed: HashMap = HashMap::new(); + let mut drain_soft_evict_next_allowed: HashMap = HashMap::new(); + loop { + tokio::time::sleep(Duration::from_secs( + HEALTH_DRAIN_TIMEOUT_ENFORCER_INTERVAL_SECS, + )) + .await; + reap_draining_writers( + &pool, + &mut drain_warn_next_allowed, + &mut drain_soft_evict_next_allowed, + ) + .await; + } +} + +fn draining_writer_timeout_expired( + pool: &MePool, + writer: &MeWriter, + now_epoch_secs: u64, + drain_ttl_secs: u64, +) -> bool { + if pool + .me_instadrain + .load(std::sync::atomic::Ordering::Relaxed) + { + return true; + } + + let deadline_epoch_secs = writer + .drain_deadline_epoch_secs + .load(std::sync::atomic::Ordering::Relaxed); + if deadline_epoch_secs != 0 { + return now_epoch_secs >= deadline_epoch_secs; + } + + if drain_ttl_secs == 0 { + return false; + } + let drain_started_at_epoch_secs = writer + .draining_started_at_epoch_secs + .load(std::sync::atomic::Ordering::Relaxed); + if drain_started_at_epoch_secs == 0 { + return false; + } + now_epoch_secs.saturating_sub(drain_started_at_epoch_secs) > drain_ttl_secs +} + pub(super) async fn reap_draining_writers( pool: &Arc, warn_next_allowed: &mut HashMap, @@ -137,11 +194,16 @@ pub(super) async fn reap_draining_writers( let activity = pool.registry.writer_activity_snapshot().await; let mut draining_writers = Vec::new(); let mut empty_writer_ids = Vec::::new(); + let mut timeout_expired_writer_ids = Vec::::new(); let mut force_close_writer_ids = Vec::::new(); for writer in writers { if !writer.draining.load(std::sync::atomic::Ordering::Relaxed) { continue; } + if draining_writer_timeout_expired(pool, &writer, now_epoch_secs, drain_ttl_secs) { + timeout_expired_writer_ids.push(writer.id); + continue; + } if activity .bound_clients_by_writer .get(&writer.id) @@ -207,14 +269,6 @@ pub(super) async fn reap_draining_writers( "ME draining writer remains non-empty past drain TTL" ); } - let deadline_epoch_secs = writer - .drain_deadline_epoch_secs - .load(std::sync::atomic::Ordering::Relaxed); - if deadline_epoch_secs != 0 && now_epoch_secs >= deadline_epoch_secs { - warn!(writer_id = writer.id, "Drain timeout, force-closing"); - force_close_writer_ids.push(writer.id); - active_draining_writer_ids.remove(&writer.id); - } } warn_next_allowed.retain(|writer_id, _| active_draining_writer_ids.contains(writer_id)); @@ -299,11 +353,22 @@ pub(super) async fn reap_draining_writers( } } - let close_budget = health_drain_close_budget(); + let mut closed_writer_ids = HashSet::::new(); + for writer_id in timeout_expired_writer_ids { + if !closed_writer_ids.insert(writer_id) { + continue; + } + pool.stats.increment_pool_force_close_total(); + pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapTimeoutExpired) + .await; + pool.stats + .increment_me_draining_writers_reap_progress_total(); + } + let requested_force_close = force_close_writer_ids.len(); let requested_empty_close = empty_writer_ids.len(); let requested_close_total = requested_force_close.saturating_add(requested_empty_close); - let mut closed_writer_ids = HashSet::::new(); + let close_budget = health_drain_close_budget(); let mut closed_total = 0usize; for writer_id in force_close_writer_ids { if closed_total >= close_budget { @@ -313,7 +378,8 @@ pub(super) async fn reap_draining_writers( continue; } pool.stats.increment_pool_force_close_total(); - pool.remove_writer_and_close_clients(writer_id).await; + pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapThresholdForce) + .await; pool.stats .increment_me_draining_writers_reap_progress_total(); closed_total = closed_total.saturating_add(1); @@ -325,7 +391,8 @@ pub(super) async fn reap_draining_writers( if !closed_writer_ids.insert(writer_id) { continue; } - pool.remove_writer_and_close_clients(writer_id).await; + pool.remove_writer_and_close_clients(writer_id, MeWriterTeardownReason::ReapEmpty) + .await; pool.stats .increment_me_draining_writers_reap_progress_total(); closed_total = closed_total.saturating_add(1); @@ -396,6 +463,8 @@ async fn check_family( adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>, adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>, floor_warn_next_allowed: &mut HashMap<(i32, IpFamily), Instant>, + drain_warn_next_allowed: &mut HashMap, + drain_soft_evict_next_allowed: &mut HashMap, ) -> bool { let enabled = match family { IpFamily::V4 => pool.decision.ipv4_me, @@ -476,8 +545,15 @@ async fn check_family( floor_plan.active_writers_current, floor_plan.warm_writers_current, ); + let mut next_drain_reap_at = Instant::now(); for (dc, endpoints) in dc_endpoints { + if Instant::now() >= next_drain_reap_at { + reap_draining_writers(pool, drain_warn_next_allowed, drain_soft_evict_next_allowed) + .await; + next_drain_reap_at = Instant::now() + + Duration::from_secs(HEALTH_DRAIN_REAP_OPPORTUNISTIC_INTERVAL_SECS); + } if endpoints.is_empty() { continue; } @@ -621,6 +697,12 @@ async fn check_family( let mut restored = 0usize; for _ in 0..missing { + if Instant::now() >= next_drain_reap_at { + reap_draining_writers(pool, drain_warn_next_allowed, drain_soft_evict_next_allowed) + .await; + next_drain_reap_at = Instant::now() + + Duration::from_secs(HEALTH_DRAIN_REAP_OPPORTUNISTIC_INTERVAL_SECS); + } if reconnect_budget == 0 { break; } @@ -1472,6 +1554,187 @@ async fn maybe_rotate_single_endpoint_shadow( ); } +/// Last-resort safety net for draining writers stuck past their deadline. +/// +/// Runs every `TICK_SECS` and force-closes any draining writer whose +/// `drain_deadline_epoch_secs` has been exceeded by more than a threshold. +/// +/// Two thresholds: +/// - `SOFT_THRESHOLD_SECS` (60s): writers with no bound clients +/// - `HARD_THRESHOLD_SECS` (300s): writers WITH bound clients (unconditional) +/// +/// Intentionally kept trivial and independent of pool config to minimise +/// the probability of panicking itself. Uses `SystemTime` directly +/// as a fallback clock source and timeouts on every lock acquisition +/// and writer removal so one stuck writer cannot block the rest. +pub async fn me_zombie_writer_watchdog(pool: Arc) { + use std::time::{SystemTime, UNIX_EPOCH}; + + const TICK_SECS: u64 = 30; + const SOFT_THRESHOLD_SECS: u64 = 60; + const HARD_THRESHOLD_SECS: u64 = 300; + const LOCK_TIMEOUT_SECS: u64 = 5; + const REMOVE_TIMEOUT_SECS: u64 = 10; + const HARD_DETACH_TIMEOUT_STREAK: u8 = 3; + + let mut removal_timeout_streak = HashMap::::new(); + + loop { + tokio::time::sleep(Duration::from_secs(TICK_SECS)).await; + + let now = match SystemTime::now().duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_) => continue, + }; + + // Phase 1: collect zombie IDs under a short read-lock with timeout. + let zombie_ids_with_meta: Vec<(u64, bool)> = { + let Ok(ws) = tokio::time::timeout( + Duration::from_secs(LOCK_TIMEOUT_SECS), + pool.writers.read(), + ) + .await + else { + warn!("zombie_watchdog: writers read-lock timeout, skipping tick"); + continue; + }; + ws.iter() + .filter(|w| w.draining.load(std::sync::atomic::Ordering::Relaxed)) + .filter_map(|w| { + let deadline = w + .drain_deadline_epoch_secs + .load(std::sync::atomic::Ordering::Relaxed); + if deadline == 0 { + return None; + } + let overdue = now.saturating_sub(deadline); + if overdue == 0 { + return None; + } + let started = w + .draining_started_at_epoch_secs + .load(std::sync::atomic::Ordering::Relaxed); + let drain_age = now.saturating_sub(started); + if drain_age > HARD_THRESHOLD_SECS { + return Some((w.id, true)); + } + if overdue > SOFT_THRESHOLD_SECS { + return Some((w.id, false)); + } + None + }) + .collect() + }; + // read lock released here + + if zombie_ids_with_meta.is_empty() { + removal_timeout_streak.clear(); + continue; + } + + let mut active_zombie_ids = HashSet::::with_capacity(zombie_ids_with_meta.len()); + for (writer_id, _) in &zombie_ids_with_meta { + active_zombie_ids.insert(*writer_id); + } + removal_timeout_streak.retain(|writer_id, _| active_zombie_ids.contains(writer_id)); + + warn!( + zombie_count = zombie_ids_with_meta.len(), + soft_threshold_secs = SOFT_THRESHOLD_SECS, + hard_threshold_secs = HARD_THRESHOLD_SECS, + "Zombie draining writers detected by watchdog, force-closing" + ); + + // Phase 2: remove each writer individually with a timeout. + // One stuck removal cannot block the rest. + for (writer_id, had_clients) in &zombie_ids_with_meta { + let result = tokio::time::timeout( + Duration::from_secs(REMOVE_TIMEOUT_SECS), + pool.remove_writer_and_close_clients( + *writer_id, + MeWriterTeardownReason::WatchdogStuckDraining, + ), + ) + .await; + match result { + Ok(true) => { + removal_timeout_streak.remove(writer_id); + pool.stats.increment_pool_force_close_total(); + pool.stats + .increment_me_draining_writers_reap_progress_total(); + info!( + writer_id, + had_clients, + "Zombie writer removed by watchdog" + ); + } + Ok(false) => { + removal_timeout_streak.remove(writer_id); + debug!( + writer_id, + had_clients, + "Zombie writer watchdog removal became no-op" + ); + } + Err(_) => { + pool.stats.increment_me_writer_teardown_timeout_total(); + let streak = removal_timeout_streak + .entry(*writer_id) + .and_modify(|value| *value = value.saturating_add(1)) + .or_insert(1); + warn!( + writer_id, + had_clients, + timeout_streak = *streak, + "Zombie writer removal timed out" + ); + if *streak < HARD_DETACH_TIMEOUT_STREAK { + continue; + } + pool.stats.increment_me_writer_teardown_escalation_total(); + + let hard_detach = tokio::time::timeout( + Duration::from_secs(REMOVE_TIMEOUT_SECS), + pool.remove_draining_writer_hard_detach( + *writer_id, + MeWriterTeardownReason::WatchdogStuckDraining, + ), + ) + .await; + match hard_detach { + Ok(true) => { + removal_timeout_streak.remove(writer_id); + pool.stats.increment_pool_force_close_total(); + pool.stats + .increment_me_draining_writers_reap_progress_total(); + info!( + writer_id, + had_clients, + "Zombie writer hard-detached after repeated timeouts" + ); + } + Ok(false) => { + removal_timeout_streak.remove(writer_id); + debug!( + writer_id, + had_clients, + "Zombie hard-detach skipped (writer already gone or no longer draining)" + ); + } + Err(_) => { + pool.stats.increment_me_writer_teardown_timeout_total(); + warn!( + writer_id, + had_clients, + "Zombie hard-detach timed out, will retry next tick" + ); + } + } + } + } + } + } +} #[cfg(test)] mod tests { use std::collections::HashMap; @@ -1548,6 +1811,7 @@ mod tests { general.me_adaptive_floor_max_warm_writers_global, general.hardswap, general.me_pool_drain_ttl_secs, + general.me_instadrain, general.me_pool_drain_threshold, general.me_pool_drain_soft_evict_enabled, general.me_pool_drain_soft_evict_grace_secs, diff --git a/src/transport/middle_proxy/health_adversarial_tests.rs b/src/transport/middle_proxy/health_adversarial_tests.rs index 3f182e4..93b1d2b 100644 --- a/src/transport/middle_proxy/health_adversarial_tests.rs +++ b/src/transport/middle_proxy/health_adversarial_tests.rs @@ -81,6 +81,7 @@ async fn make_pool( general.me_adaptive_floor_max_warm_writers_global, general.hardswap, general.me_pool_drain_ttl_secs, + general.me_instadrain, general.me_pool_drain_threshold, general.me_pool_drain_soft_evict_enabled, general.me_pool_drain_soft_evict_grace_secs, @@ -213,7 +214,7 @@ async fn reap_draining_writers_respects_threshold_across_multiple_overflow_cycle insert_draining_writer( &pool, writer_id, - now_epoch_secs.saturating_sub(600).saturating_add(writer_id), + now_epoch_secs.saturating_sub(20), 1, 0, ) @@ -230,7 +231,7 @@ async fn reap_draining_writers_respects_threshold_across_multiple_overflow_cycle } assert_eq!(writer_count(&pool).await, threshold as usize); - assert_eq!(sorted_writer_ids(&pool).await, vec![58, 59, 60]); + assert_eq!(sorted_writer_ids(&pool).await, vec![1, 2, 3]); } #[tokio::test] @@ -315,7 +316,12 @@ async fn reap_draining_writers_maintains_warn_state_subset_property_under_bulk_c let ids = sorted_writer_ids(&pool).await; for writer_id in ids.into_iter().take(3) { - let _ = pool.remove_writer_and_close_clients(writer_id).await; + let _ = pool + .remove_writer_and_close_clients( + writer_id, + crate::stats::MeWriterTeardownReason::ReapEmpty, + ) + .await; } reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; diff --git a/src/transport/middle_proxy/health_integration_tests.rs b/src/transport/middle_proxy/health_integration_tests.rs index 7f99d2a..fbbffce 100644 --- a/src/transport/middle_proxy/health_integration_tests.rs +++ b/src/transport/middle_proxy/health_integration_tests.rs @@ -80,6 +80,7 @@ async fn make_pool( general.me_adaptive_floor_max_warm_writers_global, general.hardswap, general.me_pool_drain_ttl_secs, + general.me_instadrain, general.me_pool_drain_threshold, general.me_pool_drain_soft_evict_enabled, general.me_pool_drain_soft_evict_grace_secs, diff --git a/src/transport/middle_proxy/health_regression_tests.rs b/src/transport/middle_proxy/health_regression_tests.rs index 565ac74..3c7b919 100644 --- a/src/transport/middle_proxy/health_regression_tests.rs +++ b/src/transport/middle_proxy/health_regression_tests.rs @@ -12,7 +12,9 @@ use super::codec::WriterCommand; use super::health::{health_drain_close_budget, reap_draining_writers}; use super::pool::{MePool, MeWriter, WriterContour}; use super::registry::ConnMeta; -use crate::config::{GeneralConfig, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode}; +use crate::config::{ + GeneralConfig, MeBindStaleMode, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode, +}; use crate::crypto::SecureRandom; use crate::network::probe::NetworkDecision; use crate::stats::Stats; @@ -74,6 +76,7 @@ async fn make_pool(me_pool_drain_threshold: u64) -> Arc { general.me_adaptive_floor_max_warm_writers_global, general.hardswap, general.me_pool_drain_ttl_secs, + general.me_instadrain, general.me_pool_drain_threshold, general.me_pool_drain_soft_evict_enabled, general.me_pool_drain_soft_evict_grace_secs, @@ -180,15 +183,23 @@ async fn current_writer_ids(pool: &Arc) -> Vec { async fn reap_draining_writers_drops_warn_state_for_removed_writer() { let pool = make_pool(128).await; let now_epoch_secs = MePool::now_epoch_secs(); - let conn_ids = - insert_draining_writer(&pool, 7, now_epoch_secs.saturating_sub(180), 1, 0).await; + let conn_ids = insert_draining_writer( + &pool, + 7, + now_epoch_secs.saturating_sub(180), + 1, + now_epoch_secs.saturating_add(3_600), + ) + .await; let mut warn_next_allowed = HashMap::new(); let mut soft_evict_next_allowed = HashMap::new(); reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; assert!(warn_next_allowed.contains_key(&7)); - let _ = pool.remove_writer_and_close_clients(7).await; + let _ = pool + .remove_writer_and_close_clients(7, crate::stats::MeWriterTeardownReason::ReapEmpty) + .await; assert!(pool.registry.get_writer(conn_ids[0]).await.is_none()); reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; @@ -331,17 +342,17 @@ async fn reap_draining_writers_deadline_force_close_applies_under_threshold() { #[tokio::test] async fn reap_draining_writers_limits_closes_per_health_tick() { - let pool = make_pool(128).await; + let pool = make_pool(1).await; let now_epoch_secs = MePool::now_epoch_secs(); let close_budget = health_drain_close_budget(); - let writer_total = close_budget.saturating_add(19); + let writer_total = close_budget.saturating_add(20); for writer_id in 1..=writer_total as u64 { insert_draining_writer( &pool, writer_id, now_epoch_secs.saturating_sub(20), 1, - now_epoch_secs.saturating_sub(1), + 0, ) .await; } @@ -364,8 +375,8 @@ async fn reap_draining_writers_backlog_drains_across_ticks() { &pool, writer_id, now_epoch_secs.saturating_sub(20), - 1, - now_epoch_secs.saturating_sub(1), + 0, + 0, ) .await; } @@ -393,7 +404,7 @@ async fn reap_draining_writers_threshold_backlog_converges_to_threshold() { insert_draining_writer( &pool, writer_id, - now_epoch_secs.saturating_sub(200).saturating_add(writer_id), + now_epoch_secs.saturating_sub(20), 1, 0, ) @@ -429,27 +440,27 @@ async fn reap_draining_writers_threshold_zero_preserves_non_expired_non_empty_wr #[tokio::test] async fn reap_draining_writers_prioritizes_force_close_before_empty_cleanup() { - let pool = make_pool(128).await; + let pool = make_pool(1).await; let now_epoch_secs = MePool::now_epoch_secs(); let close_budget = health_drain_close_budget(); - for writer_id in 1..=close_budget as u64 { + for writer_id in 1..=close_budget.saturating_add(1) as u64 { insert_draining_writer( &pool, writer_id, now_epoch_secs.saturating_sub(20), 1, - now_epoch_secs.saturating_sub(1), + 0, ) .await; } - let empty_writer_id = close_budget as u64 + 1; + let empty_writer_id = close_budget.saturating_add(2) as u64; insert_draining_writer(&pool, empty_writer_id, now_epoch_secs.saturating_sub(20), 0, 0).await; let mut warn_next_allowed = HashMap::new(); let mut soft_evict_next_allowed = HashMap::new(); reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; - assert_eq!(current_writer_ids(&pool).await, vec![empty_writer_id]); + assert_eq!(current_writer_ids(&pool).await, vec![1, empty_writer_id]); } #[tokio::test] @@ -518,7 +529,12 @@ async fn reap_draining_writers_warn_state_never_exceeds_live_draining_population let existing_writer_ids = current_writer_ids(&pool).await; for writer_id in existing_writer_ids.into_iter().take(4) { - let _ = pool.remove_writer_and_close_clients(writer_id).await; + let _ = pool + .remove_writer_and_close_clients( + writer_id, + crate::stats::MeWriterTeardownReason::ReapEmpty, + ) + .await; } reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; assert!(warn_next_allowed.len() <= pool.writers.read().await.len()); @@ -571,7 +587,14 @@ async fn reap_draining_writers_soft_evicts_stuck_writer_with_per_writer_cap() { .store(1, Ordering::Relaxed); let now_epoch_secs = MePool::now_epoch_secs(); - insert_draining_writer(&pool, 77, now_epoch_secs.saturating_sub(240), 3, 0).await; + insert_draining_writer( + &pool, + 77, + now_epoch_secs.saturating_sub(240), + 3, + now_epoch_secs.saturating_add(3_600), + ) + .await; let mut warn_next_allowed = HashMap::new(); let mut soft_evict_next_allowed = HashMap::new(); @@ -595,7 +618,14 @@ async fn reap_draining_writers_soft_evict_respects_cooldown_per_writer() { .store(60_000, Ordering::Relaxed); let now_epoch_secs = MePool::now_epoch_secs(); - insert_draining_writer(&pool, 88, now_epoch_secs.saturating_sub(240), 3, 0).await; + insert_draining_writer( + &pool, + 88, + now_epoch_secs.saturating_sub(240), + 3, + now_epoch_secs.saturating_add(3_600), + ) + .await; let mut warn_next_allowed = HashMap::new(); let mut soft_evict_next_allowed = HashMap::new(); @@ -608,12 +638,40 @@ async fn reap_draining_writers_soft_evict_respects_cooldown_per_writer() { assert_eq!(pool.stats.get_pool_drain_soft_evict_writer_total(), 1); } +#[tokio::test] +async fn reap_draining_writers_instadrain_removes_non_expired_writers_immediately() { + let pool = make_pool(0).await; + pool.me_instadrain.store(true, Ordering::Relaxed); + let now_epoch_secs = MePool::now_epoch_secs(); + insert_draining_writer(&pool, 101, now_epoch_secs.saturating_sub(5), 1, 0).await; + insert_draining_writer(&pool, 102, now_epoch_secs.saturating_sub(4), 1, 0).await; + let mut warn_next_allowed = HashMap::new(); + let mut soft_evict_next_allowed = HashMap::new(); + + reap_draining_writers(&pool, &mut warn_next_allowed, &mut soft_evict_next_allowed).await; + + assert!(current_writer_ids(&pool).await.is_empty()); +} + #[test] fn general_config_default_drain_threshold_remains_enabled() { - assert_eq!(GeneralConfig::default().me_pool_drain_threshold, 128); + assert_eq!(GeneralConfig::default().me_pool_drain_threshold, 32); assert!(GeneralConfig::default().me_pool_drain_soft_evict_enabled); assert_eq!( - GeneralConfig::default().me_pool_drain_soft_evict_per_writer, - 1 + GeneralConfig::default().me_pool_drain_soft_evict_grace_secs, + 10 ); + assert_eq!( + GeneralConfig::default().me_pool_drain_soft_evict_per_writer, + 2 + ); + assert_eq!( + GeneralConfig::default().me_pool_drain_soft_evict_budget_per_core, + 16 + ); + assert_eq!( + GeneralConfig::default().me_pool_drain_soft_evict_cooldown_ms, + 1000 + ); + assert_eq!(GeneralConfig::default().me_bind_stale_mode, MeBindStaleMode::Never); } diff --git a/src/transport/middle_proxy/mod.rs b/src/transport/middle_proxy/mod.rs index 590c996..8c57717 100644 --- a/src/transport/middle_proxy/mod.rs +++ b/src/transport/middle_proxy/mod.rs @@ -30,7 +30,7 @@ mod health_adversarial_tests; use bytes::Bytes; -pub use health::me_health_monitor; +pub use health::{me_drain_timeout_enforcer, me_health_monitor, me_zombie_writer_watchdog}; #[allow(unused_imports)] pub use ping::{run_me_ping, format_sample_line, format_me_route, MePingReport, MePingSample, MePingFamily}; pub use pool::MePool; diff --git a/src/transport/middle_proxy/pool.rs b/src/transport/middle_proxy/pool.rs index d09f07c..f825058 100644 --- a/src/transport/middle_proxy/pool.rs +++ b/src/transport/middle_proxy/pool.rs @@ -18,6 +18,8 @@ use crate::transport::UpstreamManager; use super::ConnRegistry; use super::codec::WriterCommand; +const ME_FORCE_CLOSE_SAFETY_FALLBACK_SECS: u64 = 300; + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub(super) struct RefillDcKey { pub dc: i32, @@ -171,6 +173,7 @@ pub struct MePool { pub(super) endpoint_quarantine: Arc>>, pub(super) kdf_material_fingerprint: Arc>>, pub(super) me_pool_drain_ttl_secs: AtomicU64, + pub(super) me_instadrain: AtomicBool, pub(super) me_pool_drain_threshold: AtomicU64, pub(super) me_pool_drain_soft_evict_enabled: AtomicBool, pub(super) me_pool_drain_soft_evict_grace_secs: AtomicU64, @@ -228,6 +231,14 @@ impl MePool { .as_secs() } + fn normalize_force_close_secs(force_close_secs: u64) -> u64 { + if force_close_secs == 0 { + ME_FORCE_CLOSE_SAFETY_FALLBACK_SECS + } else { + force_close_secs + } + } + pub fn new( proxy_tag: Option>, proxy_secret: Vec, @@ -279,6 +290,7 @@ impl MePool { me_adaptive_floor_max_warm_writers_global: u32, hardswap: bool, me_pool_drain_ttl_secs: u64, + me_instadrain: bool, me_pool_drain_threshold: u64, me_pool_drain_soft_evict_enabled: bool, me_pool_drain_soft_evict_grace_secs: u64, @@ -462,6 +474,7 @@ impl MePool { endpoint_quarantine: Arc::new(Mutex::new(HashMap::new())), kdf_material_fingerprint: Arc::new(RwLock::new(HashMap::new())), me_pool_drain_ttl_secs: AtomicU64::new(me_pool_drain_ttl_secs), + me_instadrain: AtomicBool::new(me_instadrain), me_pool_drain_threshold: AtomicU64::new(me_pool_drain_threshold), me_pool_drain_soft_evict_enabled: AtomicBool::new(me_pool_drain_soft_evict_enabled), me_pool_drain_soft_evict_grace_secs: AtomicU64::new(me_pool_drain_soft_evict_grace_secs), @@ -474,7 +487,9 @@ impl MePool { me_pool_drain_soft_evict_cooldown_ms: AtomicU64::new( me_pool_drain_soft_evict_cooldown_ms.max(1), ), - me_pool_force_close_secs: AtomicU64::new(me_pool_force_close_secs), + me_pool_force_close_secs: AtomicU64::new(Self::normalize_force_close_secs( + me_pool_force_close_secs, + )), me_pool_min_fresh_ratio_permille: AtomicU32::new(Self::ratio_to_permille( me_pool_min_fresh_ratio, )), @@ -524,6 +539,7 @@ impl MePool { &self, hardswap: bool, drain_ttl_secs: u64, + instadrain: bool, pool_drain_threshold: u64, pool_drain_soft_evict_enabled: bool, pool_drain_soft_evict_grace_secs: u64, @@ -568,6 +584,7 @@ impl MePool { self.hardswap.store(hardswap, Ordering::Relaxed); self.me_pool_drain_ttl_secs .store(drain_ttl_secs, Ordering::Relaxed); + self.me_instadrain.store(instadrain, Ordering::Relaxed); self.me_pool_drain_threshold .store(pool_drain_threshold, Ordering::Relaxed); self.me_pool_drain_soft_evict_enabled @@ -582,8 +599,10 @@ impl MePool { ); self.me_pool_drain_soft_evict_cooldown_ms .store(pool_drain_soft_evict_cooldown_ms.max(1), Ordering::Relaxed); - self.me_pool_force_close_secs - .store(force_close_secs, Ordering::Relaxed); + self.me_pool_force_close_secs.store( + Self::normalize_force_close_secs(force_close_secs), + Ordering::Relaxed, + ); self.me_pool_min_fresh_ratio_permille .store(Self::ratio_to_permille(min_fresh_ratio), Ordering::Relaxed); self.me_hardswap_warmup_delay_min_ms @@ -728,12 +747,9 @@ impl MePool { } pub(super) fn force_close_timeout(&self) -> Option { - let secs = self.me_pool_force_close_secs.load(Ordering::Relaxed); - if secs == 0 { - None - } else { - Some(Duration::from_secs(secs)) - } + let secs = + Self::normalize_force_close_secs(self.me_pool_force_close_secs.load(Ordering::Relaxed)); + Some(Duration::from_secs(secs)) } pub(super) fn drain_soft_evict_enabled(&self) -> bool { diff --git a/src/transport/middle_proxy/pool_refill.rs b/src/transport/middle_proxy/pool_refill.rs index fc916f4..e4fb95f 100644 --- a/src/transport/middle_proxy/pool_refill.rs +++ b/src/transport/middle_proxy/pool_refill.rs @@ -74,9 +74,8 @@ impl MePool { debug!( %addr, wait_ms = expiry.saturating_duration_since(now).as_millis(), - "All ME endpoints are quarantined for the DC group; retrying earliest one" + "All ME endpoints are quarantined for the DC group; waiting for quarantine expiry" ); - return vec![addr]; } Vec::new() diff --git a/src/transport/middle_proxy/pool_status.rs b/src/transport/middle_proxy/pool_status.rs index 214ee49..5fe45cb 100644 --- a/src/transport/middle_proxy/pool_status.rs +++ b/src/transport/middle_proxy/pool_status.rs @@ -126,6 +126,7 @@ pub(crate) struct MeApiRuntimeSnapshot { pub me_reconnect_backoff_cap_ms: u64, pub me_reconnect_fast_retry_count: u32, pub me_pool_drain_ttl_secs: u64, + pub me_instadrain: bool, pub me_pool_drain_soft_evict_enabled: bool, pub me_pool_drain_soft_evict_grace_secs: u64, pub me_pool_drain_soft_evict_per_writer: u8, @@ -583,6 +584,7 @@ impl MePool { me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64, me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count, me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed), + me_instadrain: self.me_instadrain.load(Ordering::Relaxed), me_pool_drain_soft_evict_enabled: self .me_pool_drain_soft_evict_enabled .load(Ordering::Relaxed), diff --git a/src/transport/middle_proxy/pool_writer.rs b/src/transport/middle_proxy/pool_writer.rs index 7d78b84..b0ba776 100644 --- a/src/transport/middle_proxy/pool_writer.rs +++ b/src/transport/middle_proxy/pool_writer.rs @@ -16,11 +16,13 @@ use crate::config::MeBindStaleMode; use crate::crypto::SecureRandom; use crate::error::{ProxyError, Result}; use crate::protocol::constants::{RPC_CLOSE_EXT_U32, RPC_PING_U32}; +use crate::stats::{ + MeWriterCleanupSideEffectStep, MeWriterTeardownMode, MeWriterTeardownReason, +}; use super::codec::{RpcWriter, WriterCommand}; use super::pool::{MePool, MeWriter, WriterContour}; use super::reader::reader_loop; -use super::registry::BoundConn; use super::wire::build_proxy_req_payload; const ME_ACTIVE_PING_SECS: u64 = 25; @@ -28,6 +30,12 @@ const ME_ACTIVE_PING_JITTER_SECS: i64 = 5; const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5; const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700; +#[derive(Clone, Copy)] +enum WriterRemoveGuardMode { + Any, + DrainingOnly, +} + fn is_me_peer_closed_error(error: &ProxyError) -> bool { matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof) } @@ -44,9 +52,16 @@ impl MePool { for writer_id in closed_writer_ids { if self.registry.is_writer_empty(writer_id).await { - let _ = self.remove_writer_only(writer_id).await; + let _ = self + .remove_writer_only(writer_id, MeWriterTeardownReason::PruneClosedWriter) + .await; } else { - let _ = self.remove_writer_and_close_clients(writer_id).await; + let _ = self + .remove_writer_and_close_clients( + writer_id, + MeWriterTeardownReason::PruneClosedWriter, + ) + .await; } } } @@ -143,6 +158,9 @@ impl MePool { crc_mode: hs.crc_mode, }; let cancel_wr = cancel.clone(); + let cleanup_done = Arc::new(AtomicBool::new(false)); + let cleanup_for_writer = cleanup_done.clone(); + let pool_writer_task = Arc::downgrade(self); tokio::spawn(async move { loop { tokio::select! { @@ -160,6 +178,20 @@ impl MePool { _ = cancel_wr.cancelled() => break, } } + if cleanup_for_writer + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) + .is_ok() + { + if let Some(pool) = pool_writer_task.upgrade() { + pool.remove_writer_and_close_clients( + writer_id, + MeWriterTeardownReason::WriterTaskExit, + ) + .await; + } else { + cancel_wr.cancel(); + } + } }); let writer = MeWriter { id: writer_id, @@ -196,7 +228,6 @@ impl MePool { let cancel_ping = cancel.clone(); let tx_ping = tx.clone(); let ping_tracker_ping = ping_tracker.clone(); - let cleanup_done = Arc::new(AtomicBool::new(false)); let cleanup_for_reader = cleanup_done.clone(); let cleanup_for_ping = cleanup_done.clone(); let keepalive_enabled = self.me_keepalive_enabled; @@ -242,21 +273,29 @@ impl MePool { stats_reader_close.increment_me_idle_close_by_peer_total(); info!(writer_id, "ME socket closed by peer on idle writer"); } - if let Some(pool) = pool.upgrade() - && cleanup_for_reader - .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) - .is_ok() + if cleanup_for_reader + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) + .is_ok() { - pool.remove_writer_and_close_clients(writer_id).await; + if let Some(pool) = pool.upgrade() { + pool.remove_writer_and_close_clients( + writer_id, + MeWriterTeardownReason::ReaderExit, + ) + .await; + } else { + // Fallback for shutdown races: make writer task exit quickly so stale + // channels are observable by periodic prune. + cancel_reader_token.cancel(); + } } if let Err(e) = res { if !idle_close_by_peer { warn!(error = %e, "ME reader ended"); } } - let mut ws = writers_arc.write().await; - ws.retain(|w| w.id != writer_id); - info!(remaining = ws.len(), "Dead ME writer removed from pool"); + let remaining = writers_arc.read().await.len(); + debug!(writer_id, remaining, "ME reader task finished"); }); let pool_ping = Arc::downgrade(self); @@ -351,7 +390,11 @@ impl MePool { .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) .is_ok() { - pool.remove_writer_and_close_clients(writer_id).await; + pool.remove_writer_and_close_clients( + writer_id, + MeWriterTeardownReason::PingSendFail, + ) + .await; } break; } @@ -444,7 +487,11 @@ impl MePool { .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) .is_ok() { - pool.remove_writer_and_close_clients(writer_id).await; + pool.remove_writer_and_close_clients( + writer_id, + MeWriterTeardownReason::SignalSendFail, + ) + .await; } break; } @@ -478,7 +525,11 @@ impl MePool { .compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed) .is_ok() { - pool.remove_writer_and_close_clients(writer_id).await; + pool.remove_writer_and_close_clients( + writer_id, + MeWriterTeardownReason::SignalSendFail, + ) + .await; } break; } @@ -491,21 +542,83 @@ impl MePool { Ok(()) } - pub(crate) async fn remove_writer_and_close_clients(self: &Arc, writer_id: u64) { + pub(crate) async fn remove_writer_and_close_clients( + self: &Arc, + writer_id: u64, + reason: MeWriterTeardownReason, + ) -> bool { // Full client cleanup now happens inside `registry.writer_lost` to keep // writer reap/remove paths strictly non-blocking per connection. - let _ = self.remove_writer_only(writer_id).await; + self.remove_writer_with_mode( + writer_id, + reason, + MeWriterTeardownMode::Normal, + WriterRemoveGuardMode::Any, + ) + .await } - async fn remove_writer_only(self: &Arc, writer_id: u64) -> Vec { + pub(super) async fn remove_draining_writer_hard_detach( + self: &Arc, + writer_id: u64, + reason: MeWriterTeardownReason, + ) -> bool { + self.remove_writer_with_mode( + writer_id, + reason, + MeWriterTeardownMode::HardDetach, + WriterRemoveGuardMode::DrainingOnly, + ) + .await + } + + async fn remove_writer_only( + self: &Arc, + writer_id: u64, + reason: MeWriterTeardownReason, + ) -> bool { + self.remove_writer_with_mode( + writer_id, + reason, + MeWriterTeardownMode::Normal, + WriterRemoveGuardMode::Any, + ) + .await + } + + // Authoritative teardown primitive shared by normal cleanup and watchdog path. + // Lock-order invariant: + // 1) mutate `writers` under pool write lock, + // 2) release pool lock, + // 3) run registry/metrics/refill side effects. + // `registry.writer_lost` must never run while `writers` lock is held. + async fn remove_writer_with_mode( + self: &Arc, + writer_id: u64, + reason: MeWriterTeardownReason, + mode: MeWriterTeardownMode, + guard_mode: WriterRemoveGuardMode, + ) -> bool { + let started_at = Instant::now(); + self.stats + .increment_me_writer_teardown_attempt_total(reason, mode); let mut close_tx: Option> = None; let mut removed_addr: Option = None; let mut removed_dc: Option = None; let mut removed_uptime: Option = None; let mut trigger_refill = false; + let mut removed = false; { let mut ws = self.writers.write().await; if let Some(pos) = ws.iter().position(|w| w.id == writer_id) { + if matches!(guard_mode, WriterRemoveGuardMode::DrainingOnly) + && !ws[pos].draining.load(Ordering::Relaxed) + { + self.stats.increment_me_writer_teardown_noop_total(); + self.stats + .observe_me_writer_teardown_duration(mode, started_at.elapsed()); + return false; + } let w = ws.remove(pos); let was_draining = w.draining.load(Ordering::Relaxed); if was_draining { @@ -522,6 +635,7 @@ impl MePool { } close_tx = Some(w.tx.clone()); self.conn_count.fetch_sub(1, Ordering::Relaxed); + removed = true; } } // State invariant: @@ -529,7 +643,7 @@ impl MePool { // - writer is removed from registry routing/binding maps via `writer_lost`. // The close command below is only a best-effort accelerator for task shutdown. // Cleanup progress must never depend on command-channel availability. - let conns = self.registry.writer_lost(writer_id).await; + let _ = self.registry.writer_lost(writer_id).await; { let mut tracker = self.ping_tracker.lock().await; tracker.retain(|_, (_, wid)| *wid != writer_id); @@ -542,6 +656,9 @@ impl MePool { self.stats.increment_me_writer_close_signal_drop_total(); self.stats .increment_me_writer_close_signal_channel_full_total(); + self.stats.increment_me_writer_cleanup_side_effect_failures_total( + MeWriterCleanupSideEffectStep::CloseSignalChannelFull, + ); debug!( writer_id, "Skipping close signal for removed writer: command channel is full" @@ -549,6 +666,9 @@ impl MePool { } Err(TrySendError::Closed(_)) => { self.stats.increment_me_writer_close_signal_drop_total(); + self.stats.increment_me_writer_cleanup_side_effect_failures_total( + MeWriterCleanupSideEffectStep::CloseSignalChannelClosed, + ); debug!( writer_id, "Skipping close signal for removed writer: command channel is closed" @@ -556,16 +676,24 @@ impl MePool { } } } - if trigger_refill - && let Some(addr) = removed_addr - && let Some(writer_dc) = removed_dc - { + if let Some(addr) = removed_addr { if let Some(uptime) = removed_uptime { self.maybe_quarantine_flapping_endpoint(addr, uptime).await; } - self.trigger_immediate_refill_for_dc(addr, writer_dc); + if trigger_refill + && let Some(writer_dc) = removed_dc + { + self.trigger_immediate_refill_for_dc(addr, writer_dc); + } } - conns + if removed { + self.stats.increment_me_writer_teardown_success_total(mode); + } else { + self.stats.increment_me_writer_teardown_noop_total(); + } + self.stats + .observe_me_writer_teardown_duration(mode, started_at.elapsed()); + removed } pub(crate) async fn mark_writer_draining_with_timeout( diff --git a/src/transport/middle_proxy/send.rs b/src/transport/middle_proxy/send.rs index 6791064..82118d8 100644 --- a/src/transport/middle_proxy/send.rs +++ b/src/transport/middle_proxy/send.rs @@ -14,6 +14,7 @@ use crate::config::{MeRouteNoWriterMode, MeWriterPickMode}; use crate::error::{ProxyError, Result}; use crate::network::IpFamily; use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32}; +use crate::stats::MeWriterTeardownReason; use super::MePool; use super::codec::WriterCommand; @@ -134,7 +135,11 @@ impl MePool { Ok(()) => return Ok(()), Err(TimedSendError::Closed(_)) => { warn!(writer_id = current.writer_id, "ME writer channel closed"); - self.remove_writer_and_close_clients(current.writer_id).await; + self.remove_writer_and_close_clients( + current.writer_id, + MeWriterTeardownReason::RouteChannelClosed, + ) + .await; continue; } Err(TimedSendError::Timeout(_)) => { @@ -151,7 +156,11 @@ impl MePool { } Err(TrySendError::Closed(_)) => { warn!(writer_id = current.writer_id, "ME writer channel closed"); - self.remove_writer_and_close_clients(current.writer_id).await; + self.remove_writer_and_close_clients( + current.writer_id, + MeWriterTeardownReason::RouteChannelClosed, + ) + .await; continue; } } @@ -458,7 +467,11 @@ impl MePool { Err(TrySendError::Closed(_)) => { self.stats.increment_me_writer_pick_closed_total(pick_mode); warn!(writer_id = w.id, "ME writer channel closed"); - self.remove_writer_and_close_clients(w.id).await; + self.remove_writer_and_close_clients( + w.id, + MeWriterTeardownReason::RouteChannelClosed, + ) + .await; continue; } } @@ -503,7 +516,11 @@ impl MePool { Err(TimedSendError::Closed(_)) => { self.stats.increment_me_writer_pick_closed_total(pick_mode); warn!(writer_id = w.id, "ME writer channel closed (blocking)"); - self.remove_writer_and_close_clients(w.id).await; + self.remove_writer_and_close_clients( + w.id, + MeWriterTeardownReason::RouteChannelClosed, + ) + .await; } Err(TimedSendError::Timeout(_)) => { self.stats.increment_me_writer_pick_full_total(pick_mode); @@ -654,7 +671,11 @@ impl MePool { } Err(TrySendError::Closed(_)) => { debug!("ME close write failed"); - self.remove_writer_and_close_clients(w.writer_id).await; + self.remove_writer_and_close_clients( + w.writer_id, + MeWriterTeardownReason::CloseRpcChannelClosed, + ) + .await; } } } else {