mirror of
https://github.com/telemt/telemt.git
synced 2026-04-15 17:44:11 +03:00
Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
03ce267865 | ||
|
|
a6bfa3309e | ||
|
|
79a3720fd5 | ||
|
|
89543aed35 | ||
|
|
06292ff833 | ||
|
|
427294b103 | ||
|
|
fed9346444 | ||
|
|
f40b645c05 | ||
|
|
a66d5d56bb | ||
|
|
1b1bdfe99a | ||
|
|
49fc11ddfa | ||
|
|
5558900c44 | ||
|
|
5b1d976392 | ||
|
|
206f87fe64 | ||
|
|
5a09d30e1c | ||
|
|
f83e23c521 | ||
|
|
f9e9ddd0f7 | ||
|
|
6b8619d3c9 | ||
|
|
618b7a1837 | ||
|
|
16f166cec8 | ||
|
|
6efcbe9bbf | ||
|
|
e5ad27e26e | ||
|
|
53ec96b040 | ||
|
|
c6c3d71b08 | ||
|
|
e9a4281015 | ||
|
|
866c2fbd96 | ||
|
|
086c85d851 | ||
|
|
ce4e21c996 | ||
|
|
25ab79406f | ||
|
|
7538967d3c | ||
|
|
4a95f6d195 | ||
|
|
7d7ef84868 | ||
|
|
692d9476b9 | ||
|
|
b00b87032b |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -19,7 +19,5 @@ target
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
*.rs
|
||||
target
|
||||
Cargo.lock
|
||||
src
|
||||
|
||||
proxy-secret
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "telemt"
|
||||
version = "3.0.14"
|
||||
version = "3.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
|
||||
44
README.md
44
README.md
@@ -10,28 +10,18 @@
|
||||
|
||||
### 🇷🇺 RU
|
||||
|
||||
#### Драфтинг LTS и текущие улучшения
|
||||
#### Релиз 3.0.15 — 25 февраля
|
||||
|
||||
С 21 февраля мы начали подготовку LTS-версии.
|
||||
25 февраля мы выпустили версию **3.0.15**
|
||||
|
||||
Мы внимательно анализируем весь доступный фидбек.
|
||||
Наша цель — сделать LTS-кандидаты максимально стабильными, тщательно отлаженными и готовыми к long-run и highload production-сценариям.
|
||||
Мы предполагаем, что она станет завершающей версией поколения 3.0 и уже сейчас мы рассматриваем её как **LTS-кандидата** для версии **3.1.0**!
|
||||
|
||||
---
|
||||
После нескольких дней детального анализа особенностей работы Middle-End мы спроектировали и реализовали продуманный режим **ротации ME Writer**. Данный режим позволяет поддерживать стабильно высокую производительность в long-run сценариях без возникновения ошибок, связанных с некорректной конфигурацией прокси
|
||||
|
||||
#### Улучшения от 23 февраля
|
||||
|
||||
23 февраля были внесены улучшения производительности в режимах **DC** и **Middle-End (ME)**, с акцентом на обратный канал (путь клиент → DC / ME).
|
||||
|
||||
Дополнительно реализован ряд изменений, направленных на повышение устойчивости системы:
|
||||
|
||||
- Смягчение сетевой нестабильности
|
||||
- Повышение устойчивости к десинхронизации криптографии
|
||||
- Снижение дрейфа сессий при неблагоприятных условиях
|
||||
- Улучшение обработки ошибок в edge-case транспортных сценариях
|
||||
Будем рады вашему фидбеку и предложениям по улучшению — особенно в части **статистики** и **UX**
|
||||
|
||||
Релиз:
|
||||
[3.0.12](https://github.com/telemt/telemt/releases/tag/3.0.12)
|
||||
[3.0.15](https://github.com/telemt/telemt/releases/tag/3.0.15)
|
||||
|
||||
---
|
||||
|
||||
@@ -48,28 +38,18 @@
|
||||
|
||||
### 🇬🇧 EN
|
||||
|
||||
#### LTS Drafting and Ongoing Improvements
|
||||
#### Release 3.0.15 — February 25
|
||||
|
||||
Starting February 21, we began drafting the upcoming LTS version.
|
||||
On February 25, we released version **3.0.15**
|
||||
|
||||
We are carefully reviewing and analyzing all available feedback.
|
||||
The goal is to ensure that LTS candidates are максимально stable, thoroughly debugged, and ready for long-run and high-load production scenarios.
|
||||
We expect this to become the final release of the 3.0 generation and at this point, we already see it as a strong **LTS candidate** for the upcoming **3.1.0** release!
|
||||
|
||||
---
|
||||
After several days of deep analysis of Middle-End behavior, we designed and implemented a well-engineered **ME Writer rotation mode**. This mode enables sustained high throughput in long-run scenarios while preventing proxy misconfiguration errors
|
||||
|
||||
#### February 23 Improvements
|
||||
|
||||
On February 23, we introduced performance improvements for both **DC** and **Middle-End (ME)** modes, specifically optimizing the reverse channel (client → DC / ME data path).
|
||||
|
||||
Additionally, we implemented a set of robustness enhancements designed to:
|
||||
|
||||
- Mitigate network-related instability
|
||||
- Improve resilience against cryptographic desynchronization
|
||||
- Reduce session drift under adverse conditions
|
||||
- Improve error handling in edge-case transport scenarios
|
||||
We are looking forward to your feedback and improvement proposals — especially regarding **statistics** and **UX**
|
||||
|
||||
Release:
|
||||
[3.0.12](https://github.com/telemt/telemt/releases/tag/3.0.12)
|
||||
[3.0.15](https://github.com/telemt/telemt/releases/tag/3.0.15)
|
||||
|
||||
---
|
||||
|
||||
|
||||
204
config.full.toml
Normal file
204
config.full.toml
Normal file
@@ -0,0 +1,204 @@
|
||||
# Telemt full config with default values.
|
||||
# Examples are kept in comments after '#'.
|
||||
|
||||
# Top-level legacy field.
|
||||
show_link = [] # example: "*" or ["alice", "bob"]
|
||||
# default_dc = 2 # example: default DC for unmapped non-standard DCs
|
||||
|
||||
[general]
|
||||
fast_mode = true
|
||||
use_middle_proxy = false
|
||||
# ad_tag = "00000000000000000000000000000000" # example
|
||||
# proxy_secret_path = "proxy-secret" # example custom path
|
||||
# middle_proxy_nat_ip = "203.0.113.10" # example public NAT IP override
|
||||
middle_proxy_nat_probe = true
|
||||
# middle_proxy_nat_stun = "stun.l.google.com:19302" # example
|
||||
# middle_proxy_nat_stun_servers = [] # example: ["stun1.l.google.com:19302", "stun2.l.google.com:19302"]
|
||||
middle_proxy_pool_size = 8
|
||||
middle_proxy_warm_standby = 16
|
||||
me_keepalive_enabled = true
|
||||
me_keepalive_interval_secs = 25
|
||||
me_keepalive_jitter_secs = 5
|
||||
me_keepalive_payload_random = true
|
||||
crypto_pending_buffer = 262144
|
||||
max_client_frame = 16777216
|
||||
desync_all_full = false
|
||||
beobachten = true
|
||||
beobachten_minutes = 10
|
||||
beobachten_flush_secs = 15
|
||||
beobachten_file = "cache/beobachten.txt"
|
||||
hardswap = true
|
||||
me_warmup_stagger_enabled = true
|
||||
me_warmup_step_delay_ms = 500
|
||||
me_warmup_step_jitter_ms = 300
|
||||
me_reconnect_max_concurrent_per_dc = 8
|
||||
me_reconnect_backoff_base_ms = 500
|
||||
me_reconnect_backoff_cap_ms = 30000
|
||||
me_reconnect_fast_retry_count = 12
|
||||
stun_iface_mismatch_ignore = false
|
||||
unknown_dc_log_path = "unknown-dc.txt" # to disable: set to null
|
||||
log_level = "normal" # debug | verbose | normal | silent
|
||||
disable_colors = false
|
||||
fast_mode_min_tls_record = 0
|
||||
update_every = 300
|
||||
me_reinit_every_secs = 900
|
||||
me_hardswap_warmup_delay_min_ms = 1000
|
||||
me_hardswap_warmup_delay_max_ms = 2000
|
||||
me_hardswap_warmup_extra_passes = 3
|
||||
me_hardswap_warmup_pass_backoff_base_ms = 500
|
||||
me_config_stable_snapshots = 2
|
||||
me_config_apply_cooldown_secs = 300
|
||||
proxy_secret_stable_snapshots = 2
|
||||
proxy_secret_rotate_runtime = true
|
||||
proxy_secret_len_max = 256
|
||||
me_pool_drain_ttl_secs = 90
|
||||
me_pool_min_fresh_ratio = 0.8
|
||||
me_reinit_drain_timeout_secs = 120
|
||||
# Legacy compatibility fields used when update_every is omitted.
|
||||
proxy_secret_auto_reload_secs = 3600
|
||||
proxy_config_auto_reload_secs = 3600
|
||||
ntp_check = true
|
||||
ntp_servers = ["pool.ntp.org"] # example: ["pool.ntp.org", "time.cloudflare.com"]
|
||||
auto_degradation_enabled = true
|
||||
degradation_min_unavailable_dc_groups = 2
|
||||
|
||||
[general.modes]
|
||||
classic = false
|
||||
secure = false
|
||||
tls = true
|
||||
|
||||
[general.links]
|
||||
show ="*" # example: "*" or ["alice", "bob"]
|
||||
# public_host = "proxy.example.com" # example explicit host/IP for tg:// links
|
||||
# public_port = 443 # example explicit port for tg:// links
|
||||
|
||||
[network]
|
||||
ipv4 = true
|
||||
ipv6 = false # set true to enable IPv6
|
||||
prefer = 4 # 4 or 6
|
||||
multipath = false
|
||||
stun_servers = [
|
||||
"stun.l.google.com:5349",
|
||||
"stun1.l.google.com:3478",
|
||||
"stun.gmx.net:3478",
|
||||
"stun.l.google.com:19302",
|
||||
"stun.1und1.de:3478",
|
||||
"stun1.l.google.com:19302",
|
||||
"stun2.l.google.com:19302",
|
||||
"stun3.l.google.com:19302",
|
||||
"stun4.l.google.com:19302",
|
||||
"stun.services.mozilla.com:3478",
|
||||
"stun.stunprotocol.org:3478",
|
||||
"stun.nextcloud.com:3478",
|
||||
"stun.voip.eutelia.it:3478",
|
||||
]
|
||||
stun_tcp_fallback = true
|
||||
http_ip_detect_urls = ["https://ifconfig.me/ip", "https://api.ipify.org"]
|
||||
cache_public_ip_path = "cache/public_ip.txt"
|
||||
|
||||
[server]
|
||||
port = 443
|
||||
listen_addr_ipv4 = "0.0.0.0"
|
||||
listen_addr_ipv6 = "::"
|
||||
# listen_unix_sock = "/var/run/telemt.sock" # example
|
||||
# listen_unix_sock_perm = "0660" # example unix socket mode
|
||||
# listen_tcp = true # example explicit override (auto-detected when omitted)
|
||||
proxy_protocol = false
|
||||
# metrics_port = 9090 # example
|
||||
metrics_whitelist = ["127.0.0.1/32", "::1/128"]
|
||||
# Example explicit listeners (default: omitted, auto-generated from listen_addr_*):
|
||||
# [[server.listeners]]
|
||||
# ip = "0.0.0.0"
|
||||
# announce = "proxy-v4.example.com"
|
||||
# # announce_ip = "203.0.113.10" # deprecated alias
|
||||
# proxy_protocol = false
|
||||
# reuse_allow = false
|
||||
#
|
||||
# [[server.listeners]]
|
||||
# ip = "::"
|
||||
# announce = "proxy-v6.example.com"
|
||||
# proxy_protocol = false
|
||||
# reuse_allow = false
|
||||
|
||||
[timeouts]
|
||||
client_handshake = 15
|
||||
tg_connect = 10
|
||||
client_keepalive = 60
|
||||
client_ack = 300
|
||||
me_one_retry = 3
|
||||
me_one_timeout_ms = 1500
|
||||
|
||||
[censorship]
|
||||
tls_domain = "petrovich.ru"
|
||||
# tls_domains = ["example.com", "cdn.example.net"] # Additional domains for EE links
|
||||
mask = true
|
||||
# mask_host = "www.google.com" # example, defaults to tls_domain when both mask_host/mask_unix_sock are unset
|
||||
# mask_unix_sock = "/var/run/nginx.sock" # example, mutually exclusive with mask_host
|
||||
mask_port = 443
|
||||
fake_cert_len = 2048 # if tls_emulation=false and default value is used, loader may randomize this value at runtime
|
||||
tls_emulation = true
|
||||
tls_front_dir = "tlsfront"
|
||||
server_hello_delay_min_ms = 0
|
||||
server_hello_delay_max_ms = 0
|
||||
tls_new_session_tickets = 0
|
||||
tls_full_cert_ttl_secs = 90
|
||||
alpn_enforce = true
|
||||
|
||||
[access]
|
||||
replay_check_len = 65536
|
||||
replay_window_secs = 1800
|
||||
ignore_time_skew = false
|
||||
|
||||
[access.users]
|
||||
# format: "username" = "32_hex_chars_secret"
|
||||
hello = "00000000000000000000000000000000"
|
||||
# alice = "11111111111111111111111111111111" # example
|
||||
|
||||
[access.user_max_tcp_conns]
|
||||
# alice = 100 # example
|
||||
|
||||
[access.user_expirations]
|
||||
# alice = "2078-01-01T00:00:00Z" # example
|
||||
|
||||
[access.user_data_quota]
|
||||
# hello = 10737418240 # example bytes
|
||||
# alice = 10737418240 # example bytes
|
||||
|
||||
[access.user_max_unique_ips]
|
||||
# hello = 10 # example
|
||||
# alice = 100 # example
|
||||
|
||||
# Default behavior if [[upstreams]] is omitted: loader injects one direct upstream.
|
||||
# Example explicit upstreams:
|
||||
# [[upstreams]]
|
||||
# type = "direct"
|
||||
# interface = "eth0"
|
||||
# bind_addresses = ["192.0.2.10"]
|
||||
# weight = 1
|
||||
# enabled = true
|
||||
# scopes = "*"
|
||||
#
|
||||
# [[upstreams]]
|
||||
# type = "socks4"
|
||||
# address = "198.51.100.20:1080"
|
||||
# interface = "eth0"
|
||||
# user_id = "telemt"
|
||||
# weight = 1
|
||||
# enabled = true
|
||||
# scopes = "*"
|
||||
#
|
||||
# [[upstreams]]
|
||||
# type = "socks5"
|
||||
# address = "198.51.100.30:1080"
|
||||
# interface = "eth0"
|
||||
# username = "proxy-user"
|
||||
# password = "proxy-pass"
|
||||
# weight = 1
|
||||
# enabled = true
|
||||
# scopes = "*"
|
||||
|
||||
# === DC Address Overrides ===
|
||||
# [dc_overrides]
|
||||
# "201" = "149.154.175.50:443" # example
|
||||
# "202" = ["149.154.167.51:443", "149.154.175.100:443"] # example
|
||||
# "203" = "91.105.192.100:443" # loader auto-adds this one when omitted
|
||||
128
config.toml
128
config.toml
@@ -1,11 +1,7 @@
|
||||
# === General Settings ===
|
||||
[general]
|
||||
fast_mode = true
|
||||
use_middle_proxy = true
|
||||
# ad_tag = "00000000000000000000000000000000"
|
||||
# Path to proxy-secret binary (auto-downloaded if missing).
|
||||
proxy_secret_path = "proxy-secret"
|
||||
# disable_colors = false # Disable colored output in logs (useful for files/systemd)
|
||||
|
||||
# === Log Level ===
|
||||
# Log level: debug | verbose | normal | silent
|
||||
@@ -13,145 +9,29 @@ proxy_secret_path = "proxy-secret"
|
||||
# RUST_LOG env var takes absolute priority over all of these
|
||||
log_level = "normal"
|
||||
|
||||
# === Middle Proxy - ME ===
|
||||
# Public IP override for ME KDF when behind NAT; leave unset to auto-detect.
|
||||
# middle_proxy_nat_ip = "203.0.113.10"
|
||||
# Enable STUN probing to discover public IP:port for ME.
|
||||
middle_proxy_nat_probe = true
|
||||
# Primary STUN server (host:port); defaults to Telegram STUN when empty.
|
||||
middle_proxy_nat_stun = "stun.l.google.com:19302"
|
||||
# Optional fallback STUN servers list.
|
||||
middle_proxy_nat_stun_servers = ["stun1.l.google.com:19302", "stun2.l.google.com:19302"]
|
||||
# Desired number of concurrent ME writers in pool.
|
||||
middle_proxy_pool_size = 8
|
||||
# Pre-initialized warm-standby ME connections kept idle.
|
||||
middle_proxy_warm_standby = 8
|
||||
# Ignore STUN/interface mismatch and keep ME enabled even if IP differs.
|
||||
stun_iface_mismatch_ignore = false
|
||||
# Keepalive padding frames - fl==4
|
||||
me_keepalive_enabled = true
|
||||
me_keepalive_interval_secs = 25 # Period between keepalives
|
||||
me_keepalive_jitter_secs = 5 # Jitter added to interval
|
||||
me_keepalive_payload_random = true # Randomize 4-byte payload (vs zeros)
|
||||
# Stagger extra ME connections on warmup to de-phase lifecycles.
|
||||
me_warmup_stagger_enabled = true
|
||||
me_warmup_step_delay_ms = 500 # Base delay between extra connects
|
||||
me_warmup_step_jitter_ms = 300 # Jitter for warmup delay
|
||||
# Reconnect policy knobs.
|
||||
me_reconnect_max_concurrent_per_dc = 4 # Parallel reconnects per DC - EXPERIMENTAL! UNSTABLE!
|
||||
me_reconnect_backoff_base_ms = 500 # Backoff start
|
||||
me_reconnect_backoff_cap_ms = 30000 # Backoff cap
|
||||
me_reconnect_fast_retry_count = 11 # Quick retries before backoff
|
||||
update_every = 7200 # Resolve the active updater interval for ME infrastructure refresh tasks.
|
||||
crypto_pending_buffer = 262144 # Max pending ciphertext buffer per client writer (bytes). Controls FakeTLS backpressure vs throughput.
|
||||
max_client_frame = 16777216 # Maximum allowed client MTProto frame size (bytes).
|
||||
desync_all_full = false # Emit full crypto-desync forensic logs for every event. When false, full forensic details are emitted once per key window.
|
||||
auto_degradation_enabled = true # Enable auto-degradation from ME to Direct-DC.
|
||||
degradation_min_unavailable_dc_groups = 2 # Minimum unavailable ME DC groups before degrading.
|
||||
hardswap = true # Enable C-like hard-swap for ME pool generations. When true, Telemt prewarms a new generation and switches once full coverage is reached.
|
||||
me_pool_drain_ttl_secs = 90 # Drain-TTL in seconds for stale ME writers after endpoint map changes. During TTL, stale writers may be used only as fallback for new bindings.
|
||||
me_pool_min_fresh_ratio = 0.8 # Minimum desired-DC coverage ratio required before draining stale writers. Range: 0.0..=1.0.
|
||||
me_reinit_drain_timeout_secs = 120 # Drain timeout in seconds for stale ME writers after endpoint map changes. Set to 0 to keep stale writers draining indefinitely (no force-close).
|
||||
|
||||
[general.modes]
|
||||
classic = false
|
||||
secure = false
|
||||
tls = true
|
||||
|
||||
[general.links]
|
||||
show = "*"
|
||||
# show = ["alice", "bob"] # Only show links for alice and bob
|
||||
# show = "*" # Show links for all users
|
||||
# public_host = "proxy.example.com" # Host (IP or domain) for tg:// links
|
||||
# public_port = 443 # Port for tg:// links (default: server.port)
|
||||
|
||||
# === Network Parameters ===
|
||||
[network]
|
||||
# Enable/disable families: true/false/auto(None)
|
||||
ipv4 = true
|
||||
ipv6 = false # UNSTABLE WITH ME
|
||||
# prefer = 4 or 6
|
||||
prefer = 4
|
||||
multipath = false # EXPERIMENTAL!
|
||||
|
||||
# === Server Binding ===
|
||||
[server]
|
||||
port = 443
|
||||
listen_addr_ipv4 = "0.0.0.0"
|
||||
listen_addr_ipv6 = "::"
|
||||
# listen_unix_sock = "/var/run/telemt.sock" # Unix socket
|
||||
# listen_unix_sock_perm = "0666" # Socket file permissions
|
||||
# proxy_protocol = false # Enable if behind HAProxy/nginx with PROXY protocol
|
||||
# metrics_port = 9090
|
||||
# metrics_whitelist = ["127.0.0.1", "::1"]
|
||||
metrics_port = 9090
|
||||
metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
|
||||
|
||||
# Listen on multiple interfaces/IPs - IPv4
|
||||
[[server.listeners]]
|
||||
ip = "0.0.0.0"
|
||||
|
||||
# Listen on multiple interfaces/IPs - IPv6
|
||||
[[server.listeners]]
|
||||
ip = "::"
|
||||
|
||||
# === Timeouts (in seconds) ===
|
||||
[timeouts]
|
||||
client_handshake = 30
|
||||
tg_connect = 10
|
||||
client_keepalive = 60
|
||||
client_ack = 300
|
||||
# Quick ME reconnects for single-address DCs (count and per-attempt timeout, ms).
|
||||
me_one_retry = 12
|
||||
me_one_timeout_ms = 1200
|
||||
|
||||
# === Anti-Censorship & Masking ===
|
||||
[censorship]
|
||||
tls_domain = "petrovich.ru"
|
||||
# tls_domains = ["example.com", "cdn.example.net"] # Additional domains for EE links
|
||||
mask = true
|
||||
mask_port = 443
|
||||
# mask_host = "petrovich.ru" # Defaults to tls_domain if not set
|
||||
# mask_unix_sock = "/var/run/nginx.sock" # Unix socket (mutually exclusive with mask_host)
|
||||
fake_cert_len = 2048
|
||||
# tls_emulation = false # Fetch real cert lengths and emulate TLS records
|
||||
# tls_front_dir = "tlsfront" # Cache directory for TLS emulation
|
||||
|
||||
# === Access Control & Users ===
|
||||
[access]
|
||||
replay_check_len = 65536
|
||||
replay_window_secs = 1800
|
||||
ignore_time_skew = false
|
||||
tls_emulation = true # Fetch real cert lengths and emulate TLS records
|
||||
tls_front_dir = "tlsfront" # Cache directory for TLS emulation
|
||||
|
||||
[access.users]
|
||||
# format: "username" = "32_hex_chars_secret"
|
||||
hello = "00000000000000000000000000000000"
|
||||
|
||||
# [access.user_max_tcp_conns]
|
||||
# hello = 50
|
||||
|
||||
# [access.user_max_unique_ips]
|
||||
# hello = 5
|
||||
|
||||
# [access.user_data_quota]
|
||||
# hello = 1073741824 # 1 GB
|
||||
|
||||
# [access.user_expirations]
|
||||
# format: username = "[year]-[month]-[day]T[hour]:[minute]:[second]Z" UTC
|
||||
# hello = "2027-01-01T00:00:00Z"
|
||||
|
||||
# === Upstreams & Routing ===
|
||||
[[upstreams]]
|
||||
type = "direct"
|
||||
enabled = true
|
||||
weight = 10
|
||||
# interface = "192.168.1.100" # Bind outgoing to specific IP or iface name
|
||||
# bind_addresses = ["192.168.1.100"] # List for round-robin binding (family must match target)
|
||||
|
||||
# [[upstreams]]
|
||||
# type = "socks5"
|
||||
# address = "127.0.0.1:1080"
|
||||
# enabled = false
|
||||
# weight = 1
|
||||
|
||||
# === DC Address Overrides ===
|
||||
# [dc_overrides]
|
||||
# "203" = "91.105.192.100:443"
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
ΔωϊΚ–xζ»Hl~,εΐ<CEB5>D0d]UJέλUA<55>M¦'!ΠFκ«nR«©ZD>Ο³F>y Zfa*ί<>®Ϊ‹ι¨
|
||||
@@ -3,6 +3,15 @@ use ipnetwork::IpNetwork;
|
||||
use serde::Deserialize;
|
||||
|
||||
// Helper defaults kept private to the config module.
|
||||
const DEFAULT_NETWORK_IPV6: Option<bool> = Some(false);
|
||||
const DEFAULT_STUN_TCP_FALLBACK: bool = true;
|
||||
const DEFAULT_MIDDLE_PROXY_WARM_STANDBY: usize = 16;
|
||||
const DEFAULT_ME_RECONNECT_MAX_CONCURRENT_PER_DC: u32 = 8;
|
||||
const DEFAULT_ME_RECONNECT_FAST_RETRY_COUNT: u32 = 12;
|
||||
const DEFAULT_LISTEN_ADDR_IPV6: &str = "::";
|
||||
const DEFAULT_ACCESS_USER: &str = "default";
|
||||
const DEFAULT_ACCESS_SECRET: &str = "00000000000000000000000000000000";
|
||||
|
||||
pub(crate) fn default_true() -> bool {
|
||||
true
|
||||
}
|
||||
@@ -77,6 +86,14 @@ pub(crate) fn default_prefer_4() -> u8 {
|
||||
4
|
||||
}
|
||||
|
||||
pub(crate) fn default_network_ipv6() -> Option<bool> {
|
||||
DEFAULT_NETWORK_IPV6
|
||||
}
|
||||
|
||||
pub(crate) fn default_stun_tcp_fallback() -> bool {
|
||||
DEFAULT_STUN_TCP_FALLBACK
|
||||
}
|
||||
|
||||
pub(crate) fn default_unknown_dc_log_path() -> Option<String> {
|
||||
Some("unknown-dc.txt".to_string())
|
||||
}
|
||||
@@ -85,6 +102,10 @@ pub(crate) fn default_pool_size() -> usize {
|
||||
8
|
||||
}
|
||||
|
||||
pub(crate) fn default_middle_proxy_warm_standby() -> usize {
|
||||
DEFAULT_MIDDLE_PROXY_WARM_STANDBY
|
||||
}
|
||||
|
||||
pub(crate) fn default_keepalive_interval() -> u64 {
|
||||
25
|
||||
}
|
||||
@@ -109,6 +130,14 @@ pub(crate) fn default_reconnect_backoff_cap_ms() -> u64 {
|
||||
30_000
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_reconnect_max_concurrent_per_dc() -> u32 {
|
||||
DEFAULT_ME_RECONNECT_MAX_CONCURRENT_PER_DC
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_reconnect_fast_retry_count() -> u32 {
|
||||
DEFAULT_ME_RECONNECT_FAST_RETRY_COUNT
|
||||
}
|
||||
|
||||
pub(crate) fn default_crypto_pending_buffer() -> usize {
|
||||
256 * 1024
|
||||
}
|
||||
@@ -121,6 +150,18 @@ pub(crate) fn default_desync_all_full() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn default_beobachten_minutes() -> u64 {
|
||||
10
|
||||
}
|
||||
|
||||
pub(crate) fn default_beobachten_flush_secs() -> u64 {
|
||||
15
|
||||
}
|
||||
|
||||
pub(crate) fn default_beobachten_file() -> String {
|
||||
"cache/beobachten.txt".to_string()
|
||||
}
|
||||
|
||||
pub(crate) fn default_tls_new_session_tickets() -> u8 {
|
||||
0
|
||||
}
|
||||
@@ -179,7 +220,31 @@ pub(crate) fn default_proxy_config_reload_secs() -> u64 {
|
||||
}
|
||||
|
||||
pub(crate) fn default_update_every_secs() -> u64 {
|
||||
30 * 60
|
||||
5 * 60
|
||||
}
|
||||
|
||||
pub(crate) fn default_update_every() -> Option<u64> {
|
||||
Some(default_update_every_secs())
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_reinit_every_secs() -> u64 {
|
||||
15 * 60
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_hardswap_warmup_delay_min_ms() -> u64 {
|
||||
1000
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_hardswap_warmup_delay_max_ms() -> u64 {
|
||||
2000
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_hardswap_warmup_extra_passes() -> u8 {
|
||||
3
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_hardswap_warmup_pass_backoff_base_ms() -> u64 {
|
||||
500
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_config_stable_snapshots() -> u8 {
|
||||
@@ -234,6 +299,17 @@ pub(crate) fn default_degradation_min_unavailable_dc_groups() -> u8 {
|
||||
2
|
||||
}
|
||||
|
||||
pub(crate) fn default_listen_addr_ipv6() -> String {
|
||||
DEFAULT_LISTEN_ADDR_IPV6.to_string()
|
||||
}
|
||||
|
||||
pub(crate) fn default_access_users() -> HashMap<String, String> {
|
||||
HashMap::from([(
|
||||
DEFAULT_ACCESS_USER.to_string(),
|
||||
DEFAULT_ACCESS_SECRET.to_string(),
|
||||
)])
|
||||
}
|
||||
|
||||
// Custom deserializer helpers
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
||||
@@ -147,6 +147,56 @@ impl ProxyConfig {
|
||||
}
|
||||
}
|
||||
|
||||
if config.general.me_reinit_every_secs == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_reinit_every_secs must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.beobachten_minutes == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.beobachten_minutes must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.beobachten_flush_secs == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.beobachten_flush_secs must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.beobachten_file.trim().is_empty() {
|
||||
return Err(ProxyError::Config(
|
||||
"general.beobachten_file cannot be empty".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_hardswap_warmup_delay_max_ms == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_hardswap_warmup_delay_max_ms must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_hardswap_warmup_delay_min_ms
|
||||
> config.general.me_hardswap_warmup_delay_max_ms
|
||||
{
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_hardswap_warmup_delay_min_ms must be <= general.me_hardswap_warmup_delay_max_ms".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_hardswap_warmup_extra_passes > 10 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_hardswap_warmup_extra_passes must be within [0, 10]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_hardswap_warmup_pass_backoff_base_ms == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_hardswap_warmup_pass_backoff_base_ms must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_config_stable_snapshots == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_config_stable_snapshots must be > 0".to_string(),
|
||||
@@ -377,6 +427,55 @@ impl ProxyConfig {
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn serde_defaults_remain_unchanged_for_present_sections() {
|
||||
let toml = r#"
|
||||
[network]
|
||||
[general]
|
||||
[server]
|
||||
[access]
|
||||
"#;
|
||||
let cfg: ProxyConfig = toml::from_str(toml).unwrap();
|
||||
|
||||
assert_eq!(cfg.network.ipv6, None);
|
||||
assert!(!cfg.network.stun_tcp_fallback);
|
||||
assert_eq!(cfg.general.middle_proxy_warm_standby, 0);
|
||||
assert_eq!(cfg.general.me_reconnect_max_concurrent_per_dc, 0);
|
||||
assert_eq!(cfg.general.me_reconnect_fast_retry_count, 0);
|
||||
assert_eq!(cfg.general.update_every, None);
|
||||
assert_eq!(cfg.server.listen_addr_ipv4, None);
|
||||
assert_eq!(cfg.server.listen_addr_ipv6, None);
|
||||
assert!(cfg.access.users.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn impl_defaults_are_sourced_from_default_helpers() {
|
||||
let network = NetworkConfig::default();
|
||||
assert_eq!(network.ipv6, default_network_ipv6());
|
||||
assert_eq!(network.stun_tcp_fallback, default_stun_tcp_fallback());
|
||||
|
||||
let general = GeneralConfig::default();
|
||||
assert_eq!(
|
||||
general.middle_proxy_warm_standby,
|
||||
default_middle_proxy_warm_standby()
|
||||
);
|
||||
assert_eq!(
|
||||
general.me_reconnect_max_concurrent_per_dc,
|
||||
default_me_reconnect_max_concurrent_per_dc()
|
||||
);
|
||||
assert_eq!(
|
||||
general.me_reconnect_fast_retry_count,
|
||||
default_me_reconnect_fast_retry_count()
|
||||
);
|
||||
assert_eq!(general.update_every, default_update_every());
|
||||
|
||||
let server = ServerConfig::default();
|
||||
assert_eq!(server.listen_addr_ipv6, Some(default_listen_addr_ipv6()));
|
||||
|
||||
let access = AccessConfig::default();
|
||||
assert_eq!(access.users, default_access_users());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dc_overrides_allow_string_and_array() {
|
||||
let toml = r#"
|
||||
@@ -480,6 +579,161 @@ mod tests {
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_reinit_every_default_is_set() {
|
||||
let toml = r#"
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_reinit_every_default_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let cfg = ProxyConfig::load(&path).unwrap();
|
||||
assert_eq!(
|
||||
cfg.general.me_reinit_every_secs,
|
||||
default_me_reinit_every_secs()
|
||||
);
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_reinit_every_zero_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_reinit_every_secs = 0
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_reinit_every_zero_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.me_reinit_every_secs must be > 0"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_hardswap_warmup_defaults_are_set() {
|
||||
let toml = r#"
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_hardswap_warmup_defaults_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let cfg = ProxyConfig::load(&path).unwrap();
|
||||
assert_eq!(
|
||||
cfg.general.me_hardswap_warmup_delay_min_ms,
|
||||
default_me_hardswap_warmup_delay_min_ms()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.me_hardswap_warmup_delay_max_ms,
|
||||
default_me_hardswap_warmup_delay_max_ms()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.me_hardswap_warmup_extra_passes,
|
||||
default_me_hardswap_warmup_extra_passes()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
default_me_hardswap_warmup_pass_backoff_base_ms()
|
||||
);
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_hardswap_warmup_delay_range_is_validated() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_hardswap_warmup_delay_min_ms = 2001
|
||||
me_hardswap_warmup_delay_max_ms = 2000
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_hardswap_warmup_delay_range_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains(
|
||||
"general.me_hardswap_warmup_delay_min_ms must be <= general.me_hardswap_warmup_delay_max_ms"
|
||||
));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_hardswap_warmup_delay_max_zero_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_hardswap_warmup_delay_max_ms = 0
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_hardswap_warmup_delay_max_zero_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.me_hardswap_warmup_delay_max_ms must be > 0"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_hardswap_warmup_extra_passes_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_hardswap_warmup_extra_passes = 11
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_hardswap_warmup_extra_passes_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.me_hardswap_warmup_extra_passes must be within [0, 10]"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_hardswap_warmup_pass_backoff_zero_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_hardswap_warmup_pass_backoff_base_ms = 0
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_hardswap_warmup_backoff_zero_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.me_hardswap_warmup_pass_backoff_base_ms must be > 0"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_config_stable_snapshots_zero_is_rejected() {
|
||||
let toml = r#"
|
||||
|
||||
@@ -76,7 +76,7 @@ impl Default for ProxyModes {
|
||||
Self {
|
||||
classic: false,
|
||||
secure: false,
|
||||
tls: true,
|
||||
tls: default_true(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -117,12 +117,12 @@ pub struct NetworkConfig {
|
||||
impl Default for NetworkConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ipv4: true,
|
||||
ipv6: Some(false),
|
||||
prefer: 4,
|
||||
ipv4: default_true(),
|
||||
ipv6: default_network_ipv6(),
|
||||
prefer: default_prefer_4(),
|
||||
multipath: false,
|
||||
stun_servers: default_stun_servers(),
|
||||
stun_tcp_fallback: true,
|
||||
stun_tcp_fallback: default_stun_tcp_fallback(),
|
||||
http_ip_detect_urls: default_http_ip_detect_urls(),
|
||||
cache_public_ip_path: default_cache_public_ip_path(),
|
||||
}
|
||||
@@ -206,6 +206,22 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_desync_all_full")]
|
||||
pub desync_all_full: bool,
|
||||
|
||||
/// Enable per-IP forensic observation buckets for scanners and handshake failures.
|
||||
#[serde(default)]
|
||||
pub beobachten: bool,
|
||||
|
||||
/// Observation retention window in minutes for per-IP forensic buckets.
|
||||
#[serde(default = "default_beobachten_minutes")]
|
||||
pub beobachten_minutes: u64,
|
||||
|
||||
/// Snapshot flush interval in seconds for beob output file.
|
||||
#[serde(default = "default_beobachten_flush_secs")]
|
||||
pub beobachten_flush_secs: u64,
|
||||
|
||||
/// Snapshot file path for beob output.
|
||||
#[serde(default = "default_beobachten_file")]
|
||||
pub beobachten_file: String,
|
||||
|
||||
/// Enable C-like hard-swap for ME pool generations.
|
||||
/// When true, Telemt prewarms a new generation and switches once full coverage is reached.
|
||||
#[serde(default = "default_hardswap")]
|
||||
@@ -267,6 +283,26 @@ pub struct GeneralConfig {
|
||||
#[serde(default)]
|
||||
pub update_every: Option<u64>,
|
||||
|
||||
/// Periodic ME pool reinitialization interval in seconds.
|
||||
#[serde(default = "default_me_reinit_every_secs")]
|
||||
pub me_reinit_every_secs: u64,
|
||||
|
||||
/// Minimum delay in ms between hardswap warmup connect attempts.
|
||||
#[serde(default = "default_me_hardswap_warmup_delay_min_ms")]
|
||||
pub me_hardswap_warmup_delay_min_ms: u64,
|
||||
|
||||
/// Maximum delay in ms between hardswap warmup connect attempts.
|
||||
#[serde(default = "default_me_hardswap_warmup_delay_max_ms")]
|
||||
pub me_hardswap_warmup_delay_max_ms: u64,
|
||||
|
||||
/// Additional warmup passes in the same hardswap cycle after the base pass.
|
||||
#[serde(default = "default_me_hardswap_warmup_extra_passes")]
|
||||
pub me_hardswap_warmup_extra_passes: u8,
|
||||
|
||||
/// Base backoff in ms between hardswap warmup passes when floor is still incomplete.
|
||||
#[serde(default = "default_me_hardswap_warmup_pass_backoff_base_ms")]
|
||||
pub me_hardswap_warmup_pass_backoff_base_ms: u64,
|
||||
|
||||
/// Number of identical getProxyConfig snapshots required before applying ME map updates.
|
||||
#[serde(default = "default_me_config_stable_snapshots")]
|
||||
pub me_config_stable_snapshots: u8,
|
||||
@@ -334,27 +370,27 @@ impl Default for GeneralConfig {
|
||||
Self {
|
||||
modes: ProxyModes::default(),
|
||||
prefer_ipv6: false,
|
||||
fast_mode: true,
|
||||
fast_mode: default_true(),
|
||||
use_middle_proxy: false,
|
||||
ad_tag: None,
|
||||
proxy_secret_path: None,
|
||||
middle_proxy_nat_ip: None,
|
||||
middle_proxy_nat_probe: false,
|
||||
middle_proxy_nat_probe: true,
|
||||
middle_proxy_nat_stun: None,
|
||||
middle_proxy_nat_stun_servers: Vec::new(),
|
||||
middle_proxy_pool_size: default_pool_size(),
|
||||
middle_proxy_warm_standby: 16,
|
||||
me_keepalive_enabled: true,
|
||||
middle_proxy_warm_standby: default_middle_proxy_warm_standby(),
|
||||
me_keepalive_enabled: default_true(),
|
||||
me_keepalive_interval_secs: default_keepalive_interval(),
|
||||
me_keepalive_jitter_secs: default_keepalive_jitter(),
|
||||
me_keepalive_payload_random: true,
|
||||
me_warmup_stagger_enabled: true,
|
||||
me_keepalive_payload_random: default_true(),
|
||||
me_warmup_stagger_enabled: default_true(),
|
||||
me_warmup_step_delay_ms: default_warmup_step_delay_ms(),
|
||||
me_warmup_step_jitter_ms: default_warmup_step_jitter_ms(),
|
||||
me_reconnect_max_concurrent_per_dc: 8,
|
||||
me_reconnect_max_concurrent_per_dc: default_me_reconnect_max_concurrent_per_dc(),
|
||||
me_reconnect_backoff_base_ms: default_reconnect_backoff_base_ms(),
|
||||
me_reconnect_backoff_cap_ms: default_reconnect_backoff_cap_ms(),
|
||||
me_reconnect_fast_retry_count: 8,
|
||||
me_reconnect_fast_retry_count: default_me_reconnect_fast_retry_count(),
|
||||
stun_iface_mismatch_ignore: false,
|
||||
unknown_dc_log_path: default_unknown_dc_log_path(),
|
||||
log_level: LogLevel::Normal,
|
||||
@@ -363,9 +399,18 @@ impl Default for GeneralConfig {
|
||||
crypto_pending_buffer: default_crypto_pending_buffer(),
|
||||
max_client_frame: default_max_client_frame(),
|
||||
desync_all_full: default_desync_all_full(),
|
||||
beobachten: true,
|
||||
beobachten_minutes: default_beobachten_minutes(),
|
||||
beobachten_flush_secs: default_beobachten_flush_secs(),
|
||||
beobachten_file: default_beobachten_file(),
|
||||
hardswap: default_hardswap(),
|
||||
fast_mode_min_tls_record: default_fast_mode_min_tls_record(),
|
||||
update_every: Some(default_update_every_secs()),
|
||||
update_every: default_update_every(),
|
||||
me_reinit_every_secs: default_me_reinit_every_secs(),
|
||||
me_hardswap_warmup_delay_min_ms: default_me_hardswap_warmup_delay_min_ms(),
|
||||
me_hardswap_warmup_delay_max_ms: default_me_hardswap_warmup_delay_max_ms(),
|
||||
me_hardswap_warmup_extra_passes: default_me_hardswap_warmup_extra_passes(),
|
||||
me_hardswap_warmup_pass_backoff_base_ms: default_me_hardswap_warmup_pass_backoff_base_ms(),
|
||||
me_config_stable_snapshots: default_me_config_stable_snapshots(),
|
||||
me_config_apply_cooldown_secs: default_me_config_apply_cooldown_secs(),
|
||||
proxy_secret_stable_snapshots: default_proxy_secret_stable_snapshots(),
|
||||
@@ -378,7 +423,7 @@ impl Default for GeneralConfig {
|
||||
proxy_config_auto_reload_secs: default_proxy_config_reload_secs(),
|
||||
ntp_check: default_ntp_check(),
|
||||
ntp_servers: default_ntp_servers(),
|
||||
auto_degradation_enabled: true,
|
||||
auto_degradation_enabled: default_true(),
|
||||
degradation_min_unavailable_dc_groups: default_degradation_min_unavailable_dc_groups(),
|
||||
}
|
||||
}
|
||||
@@ -392,6 +437,11 @@ impl GeneralConfig {
|
||||
.unwrap_or_else(|| self.proxy_secret_auto_reload_secs.min(self.proxy_config_auto_reload_secs))
|
||||
}
|
||||
|
||||
/// Resolve periodic zero-downtime reinit interval for ME writers.
|
||||
pub fn effective_me_reinit_every_secs(&self) -> u64 {
|
||||
self.me_reinit_every_secs
|
||||
}
|
||||
|
||||
/// Resolve force-close timeout for stale writers.
|
||||
/// `me_reinit_drain_timeout_secs` remains backward-compatible alias.
|
||||
pub fn effective_me_pool_force_close_secs(&self) -> u64 {
|
||||
@@ -460,7 +510,7 @@ impl Default for ServerConfig {
|
||||
Self {
|
||||
port: default_port(),
|
||||
listen_addr_ipv4: Some(default_listen_addr()),
|
||||
listen_addr_ipv6: Some("::".to_string()),
|
||||
listen_addr_ipv6: Some(default_listen_addr_ipv6()),
|
||||
listen_unix_sock: None,
|
||||
listen_unix_sock_perm: None,
|
||||
listen_tcp: None,
|
||||
@@ -568,12 +618,12 @@ impl Default for AntiCensorshipConfig {
|
||||
Self {
|
||||
tls_domain: default_tls_domain(),
|
||||
tls_domains: Vec::new(),
|
||||
mask: true,
|
||||
mask: default_true(),
|
||||
mask_host: None,
|
||||
mask_port: default_mask_port(),
|
||||
mask_unix_sock: None,
|
||||
fake_cert_len: default_fake_cert_len(),
|
||||
tls_emulation: false,
|
||||
tls_emulation: true,
|
||||
tls_front_dir: default_tls_front_dir(),
|
||||
server_hello_delay_min_ms: default_server_hello_delay_min_ms(),
|
||||
server_hello_delay_max_ms: default_server_hello_delay_max_ms(),
|
||||
@@ -613,13 +663,8 @@ pub struct AccessConfig {
|
||||
|
||||
impl Default for AccessConfig {
|
||||
fn default() -> Self {
|
||||
let mut users = HashMap::new();
|
||||
users.insert(
|
||||
"default".to_string(),
|
||||
"00000000000000000000000000000000".to_string(),
|
||||
);
|
||||
Self {
|
||||
users,
|
||||
users: default_access_users(),
|
||||
user_max_tcp_conns: HashMap::new(),
|
||||
user_expirations: HashMap::new(),
|
||||
user_data_quota: HashMap::new(),
|
||||
|
||||
88
src/main.rs
88
src/main.rs
@@ -35,6 +35,7 @@ use crate::crypto::SecureRandom;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::network::probe::{decide_network_capabilities, log_probe_result, run_probe};
|
||||
use crate::proxy::ClientHandler;
|
||||
use crate::stats::beobachten::BeobachtenStore;
|
||||
use crate::stats::{ReplayChecker, Stats};
|
||||
use crate::stream::BufferPool;
|
||||
use crate::transport::middle_proxy::{
|
||||
@@ -159,6 +160,15 @@ fn print_proxy_links(host: &str, port: u16, config: &ProxyConfig) {
|
||||
info!(target: "telemt::links", "------------------------");
|
||||
}
|
||||
|
||||
async fn write_beobachten_snapshot(path: &str, payload: &str) -> std::io::Result<()> {
|
||||
if let Some(parent) = std::path::Path::new(path).parent()
|
||||
&& !parent.as_os_str().is_empty()
|
||||
{
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
tokio::fs::write(path, payload).await
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
let (config_path, cli_silent, cli_log_level) = parse_cli();
|
||||
@@ -193,14 +203,14 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
};
|
||||
|
||||
let (filter_layer, filter_handle) = reload::Layer::new(EnvFilter::new("info"));
|
||||
|
||||
|
||||
// Configure color output based on config
|
||||
let fmt_layer = if config.general.disable_colors {
|
||||
fmt::Layer::default().with_ansi(false)
|
||||
} else {
|
||||
fmt::Layer::default().with_ansi(true)
|
||||
};
|
||||
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(filter_layer)
|
||||
.with(fmt_layer)
|
||||
@@ -256,6 +266,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
let prefer_ipv6 = decision.prefer_ipv6();
|
||||
let mut use_middle_proxy = config.general.use_middle_proxy && (decision.ipv4_me || decision.ipv6_me);
|
||||
let stats = Arc::new(Stats::new());
|
||||
let beobachten = Arc::new(BeobachtenStore::new());
|
||||
let rng = Arc::new(SecureRandom::new());
|
||||
|
||||
// IP Tracker initialization
|
||||
@@ -373,6 +384,10 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
config.general.me_pool_drain_ttl_secs,
|
||||
config.general.effective_me_pool_force_close_secs(),
|
||||
config.general.me_pool_min_fresh_ratio,
|
||||
config.general.me_hardswap_warmup_delay_min_ms,
|
||||
config.general.me_hardswap_warmup_delay_max_ms,
|
||||
config.general.me_hardswap_warmup_extra_passes,
|
||||
config.general.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
);
|
||||
|
||||
let pool_size = config.general.middle_proxy_pool_size.max(1);
|
||||
@@ -391,18 +406,6 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
.await;
|
||||
});
|
||||
|
||||
// Periodic ME connection rotation
|
||||
let pool_clone_rot = pool.clone();
|
||||
let rng_clone_rot = rng.clone();
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_rotation_task(
|
||||
pool_clone_rot,
|
||||
rng_clone_rot,
|
||||
std::time::Duration::from_secs(1800),
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
Some(pool)
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -603,9 +606,9 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
info!(" IPv4 in use / IPv6 is fallback");
|
||||
}
|
||||
} else if v6_works && !v4_works {
|
||||
info!(" IPv6 only / IPv4 unavailable)");
|
||||
info!(" IPv6 only / IPv4 unavailable");
|
||||
} else if v4_works && !v6_works {
|
||||
info!(" IPv4 only / IPv6 unavailable)");
|
||||
info!(" IPv4 only / IPv6 unavailable");
|
||||
} else if !v6_works && !v4_works {
|
||||
info!(" No DC connectivity");
|
||||
}
|
||||
@@ -674,14 +677,8 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
rc_clone.run_periodic_cleanup().await;
|
||||
});
|
||||
|
||||
let detected_ip_v4: Option<std::net::IpAddr> = probe
|
||||
.reflected_ipv4
|
||||
.map(|s| s.ip())
|
||||
.or_else(|| probe.detected_ipv4.map(std::net::IpAddr::V4));
|
||||
let detected_ip_v6: Option<std::net::IpAddr> = probe
|
||||
.reflected_ipv6
|
||||
.map(|s| s.ip())
|
||||
.or_else(|| probe.detected_ipv6.map(std::net::IpAddr::V6));
|
||||
let detected_ip_v4: Option<std::net::IpAddr> = probe.detected_ipv4.map(std::net::IpAddr::V4);
|
||||
let detected_ip_v6: Option<std::net::IpAddr> = probe.detected_ipv6.map(std::net::IpAddr::V6);
|
||||
debug!(
|
||||
"Detected IPs: v4={:?} v6={:?}",
|
||||
detected_ip_v4, detected_ip_v6
|
||||
@@ -700,6 +697,26 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
detected_ip_v6,
|
||||
);
|
||||
|
||||
let beobachten_writer = beobachten.clone();
|
||||
let config_rx_beobachten = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
let cfg = config_rx_beobachten.borrow().clone();
|
||||
let sleep_secs = cfg.general.beobachten_flush_secs.max(1);
|
||||
|
||||
if cfg.general.beobachten {
|
||||
let ttl = Duration::from_secs(cfg.general.beobachten_minutes.saturating_mul(60));
|
||||
let path = cfg.general.beobachten_file.clone();
|
||||
let snapshot = beobachten_writer.snapshot_text(ttl);
|
||||
if let Err(e) = write_beobachten_snapshot(&path, &snapshot).await {
|
||||
warn!(error = %e, path = %path, "Failed to flush beobachten snapshot");
|
||||
}
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(sleep_secs)).await;
|
||||
}
|
||||
});
|
||||
|
||||
if let Some(ref pool) = me_pool {
|
||||
let pool_clone = pool.clone();
|
||||
let rng_clone = rng.clone();
|
||||
@@ -712,6 +729,18 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
let pool_clone_rot = pool.clone();
|
||||
let rng_clone_rot = rng.clone();
|
||||
let config_rx_clone_rot = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_rotation_task(
|
||||
pool_clone_rot,
|
||||
rng_clone_rot,
|
||||
config_rx_clone_rot,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
|
||||
let mut listeners = Vec::new();
|
||||
@@ -856,6 +885,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
let me_pool = me_pool.clone();
|
||||
let tls_cache = tls_cache.clone();
|
||||
let ip_tracker = ip_tracker.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let max_connections_unix = max_connections.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
@@ -883,6 +913,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
let me_pool = me_pool.clone();
|
||||
let tls_cache = tls_cache.clone();
|
||||
let ip_tracker = ip_tracker.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let proxy_protocol_enabled = config.server.proxy_protocol;
|
||||
|
||||
tokio::spawn(async move {
|
||||
@@ -890,7 +921,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
if let Err(e) = crate::proxy::client::handle_client_stream(
|
||||
stream, fake_peer, config, stats,
|
||||
upstream_manager, replay_checker, buffer_pool, rng,
|
||||
me_pool, tls_cache, ip_tracker, proxy_protocol_enabled,
|
||||
me_pool, tls_cache, ip_tracker, beobachten, proxy_protocol_enabled,
|
||||
).await {
|
||||
debug!(error = %e, "Unix socket connection error");
|
||||
}
|
||||
@@ -938,9 +969,11 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
if let Some(port) = config.server.metrics_port {
|
||||
let stats = stats.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let config_rx_metrics = config_rx.clone();
|
||||
let whitelist = config.server.metrics_whitelist.clone();
|
||||
tokio::spawn(async move {
|
||||
metrics::serve(port, stats, whitelist).await;
|
||||
metrics::serve(port, stats, beobachten, config_rx_metrics, whitelist).await;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -954,6 +987,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
let me_pool = me_pool.clone();
|
||||
let tls_cache = tls_cache.clone();
|
||||
let ip_tracker = ip_tracker.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let max_connections_tcp = max_connections.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
@@ -976,6 +1010,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
let me_pool = me_pool.clone();
|
||||
let tls_cache = tls_cache.clone();
|
||||
let ip_tracker = ip_tracker.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let proxy_protocol_enabled = listener_proxy_protocol;
|
||||
|
||||
tokio::spawn(async move {
|
||||
@@ -992,6 +1027,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
me_pool,
|
||||
tls_cache,
|
||||
ip_tracker,
|
||||
beobachten,
|
||||
proxy_protocol_enabled,
|
||||
)
|
||||
.run()
|
||||
|
||||
177
src/metrics.rs
177
src/metrics.rs
@@ -1,6 +1,7 @@
|
||||
use std::convert::Infallible;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use http_body_util::Full;
|
||||
use hyper::body::Bytes;
|
||||
@@ -11,9 +12,17 @@ use ipnetwork::IpNetwork;
|
||||
use tokio::net::TcpListener;
|
||||
use tracing::{info, warn, debug};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::stats::beobachten::BeobachtenStore;
|
||||
use crate::stats::Stats;
|
||||
|
||||
pub async fn serve(port: u16, stats: Arc<Stats>, whitelist: Vec<IpNetwork>) {
|
||||
pub async fn serve(
|
||||
port: u16,
|
||||
stats: Arc<Stats>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
config_rx: tokio::sync::watch::Receiver<Arc<ProxyConfig>>,
|
||||
whitelist: Vec<IpNetwork>,
|
||||
) {
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], port));
|
||||
let listener = match TcpListener::bind(addr).await {
|
||||
Ok(l) => l,
|
||||
@@ -22,7 +31,7 @@ pub async fn serve(port: u16, stats: Arc<Stats>, whitelist: Vec<IpNetwork>) {
|
||||
return;
|
||||
}
|
||||
};
|
||||
info!("Metrics endpoint: http://{}/metrics", addr);
|
||||
info!("Metrics endpoint: http://{}/metrics and /beobachten", addr);
|
||||
|
||||
loop {
|
||||
let (stream, peer) = match listener.accept().await {
|
||||
@@ -39,10 +48,14 @@ pub async fn serve(port: u16, stats: Arc<Stats>, whitelist: Vec<IpNetwork>) {
|
||||
}
|
||||
|
||||
let stats = stats.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let config_rx_conn = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
let svc = service_fn(move |req| {
|
||||
let stats = stats.clone();
|
||||
async move { handle(req, &stats) }
|
||||
let beobachten = beobachten.clone();
|
||||
let config = config_rx_conn.borrow().clone();
|
||||
async move { handle(req, &stats, &beobachten, &config) }
|
||||
});
|
||||
if let Err(e) = http1::Builder::new()
|
||||
.serve_connection(hyper_util::rt::TokioIo::new(stream), svc)
|
||||
@@ -54,24 +67,48 @@ pub async fn serve(port: u16, stats: Arc<Stats>, whitelist: Vec<IpNetwork>) {
|
||||
}
|
||||
}
|
||||
|
||||
fn handle<B>(req: Request<B>, stats: &Stats) -> Result<Response<Full<Bytes>>, Infallible> {
|
||||
if req.uri().path() != "/metrics" {
|
||||
fn handle<B>(
|
||||
req: Request<B>,
|
||||
stats: &Stats,
|
||||
beobachten: &BeobachtenStore,
|
||||
config: &ProxyConfig,
|
||||
) -> Result<Response<Full<Bytes>>, Infallible> {
|
||||
if req.uri().path() == "/metrics" {
|
||||
let body = render_metrics(stats);
|
||||
let resp = Response::builder()
|
||||
.status(StatusCode::NOT_FOUND)
|
||||
.body(Full::new(Bytes::from("Not Found\n")))
|
||||
.status(StatusCode::OK)
|
||||
.header("content-type", "text/plain; version=0.0.4; charset=utf-8")
|
||||
.body(Full::new(Bytes::from(body)))
|
||||
.unwrap();
|
||||
return Ok(resp);
|
||||
}
|
||||
|
||||
if req.uri().path() == "/beobachten" {
|
||||
let body = render_beobachten(beobachten, config);
|
||||
let resp = Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header("content-type", "text/plain; charset=utf-8")
|
||||
.body(Full::new(Bytes::from(body)))
|
||||
.unwrap();
|
||||
return Ok(resp);
|
||||
}
|
||||
|
||||
let body = render_metrics(stats);
|
||||
let resp = Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header("content-type", "text/plain; version=0.0.4; charset=utf-8")
|
||||
.body(Full::new(Bytes::from(body)))
|
||||
.status(StatusCode::NOT_FOUND)
|
||||
.body(Full::new(Bytes::from("Not Found\n")))
|
||||
.unwrap();
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
fn render_beobachten(beobachten: &BeobachtenStore, config: &ProxyConfig) -> String {
|
||||
if !config.general.beobachten {
|
||||
return "beobachten disabled\n".to_string();
|
||||
}
|
||||
|
||||
let ttl = Duration::from_secs(config.general.beobachten_minutes.saturating_mul(60));
|
||||
beobachten.snapshot_text(ttl)
|
||||
}
|
||||
|
||||
fn render_metrics(stats: &Stats) -> String {
|
||||
use std::fmt::Write;
|
||||
let mut out = String::with_capacity(4096);
|
||||
@@ -199,6 +236,95 @@ fn render_metrics(stats: &Stats) -> String {
|
||||
stats.get_pool_stale_pick_total()
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_me_writer_removed_total Total ME writer removals");
|
||||
let _ = writeln!(out, "# TYPE telemt_me_writer_removed_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_removed_total {}",
|
||||
stats.get_me_writer_removed_total()
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writer_removed_unexpected_total Unexpected ME writer removals that triggered refill"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_writer_removed_unexpected_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_removed_unexpected_total {}",
|
||||
stats.get_me_writer_removed_unexpected_total()
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_me_refill_triggered_total Immediate ME refill runs started");
|
||||
let _ = writeln!(out, "# TYPE telemt_me_refill_triggered_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_refill_triggered_total {}",
|
||||
stats.get_me_refill_triggered_total()
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_refill_skipped_inflight_total Immediate ME refill skips due to inflight dedup"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_refill_skipped_inflight_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_refill_skipped_inflight_total {}",
|
||||
stats.get_me_refill_skipped_inflight_total()
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_me_refill_failed_total Immediate ME refill failures");
|
||||
let _ = writeln!(out, "# TYPE telemt_me_refill_failed_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_refill_failed_total {}",
|
||||
stats.get_me_refill_failed_total()
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writer_restored_same_endpoint_total Refilled ME writer restored on the same endpoint"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_writer_restored_same_endpoint_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_restored_same_endpoint_total {}",
|
||||
stats.get_me_writer_restored_same_endpoint_total()
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writer_restored_fallback_total Refilled ME writer restored via fallback endpoint"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_writer_restored_fallback_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_restored_fallback_total {}",
|
||||
stats.get_me_writer_restored_fallback_total()
|
||||
);
|
||||
|
||||
let unresolved_writer_losses = stats
|
||||
.get_me_writer_removed_unexpected_total()
|
||||
.saturating_sub(
|
||||
stats
|
||||
.get_me_writer_restored_same_endpoint_total()
|
||||
.saturating_add(stats.get_me_writer_restored_fallback_total()),
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writer_removed_unexpected_minus_restored_total Unexpected writer removals not yet compensated by restore"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_writer_removed_unexpected_minus_restored_total gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_removed_unexpected_minus_restored_total {}",
|
||||
unresolved_writer_losses
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_user_connections_total Per-user total connections");
|
||||
let _ = writeln!(out, "# TYPE telemt_user_connections_total counter");
|
||||
let _ = writeln!(out, "# HELP telemt_user_connections_current Per-user active connections");
|
||||
@@ -229,6 +355,7 @@ fn render_metrics(stats: &Stats) -> String {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::net::IpAddr;
|
||||
use http_body_util::BodyExt;
|
||||
|
||||
#[test]
|
||||
@@ -277,11 +404,17 @@ mod tests {
|
||||
assert!(output.contains("# TYPE telemt_connections_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_connections_bad_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_handshake_timeouts_total counter"));
|
||||
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
|
||||
assert!(output.contains(
|
||||
"# TYPE telemt_me_writer_removed_unexpected_minus_restored_total gauge"
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_endpoint_integration() {
|
||||
let stats = Arc::new(Stats::new());
|
||||
let beobachten = Arc::new(BeobachtenStore::new());
|
||||
let mut config = ProxyConfig::default();
|
||||
stats.increment_connects_all();
|
||||
stats.increment_connects_all();
|
||||
stats.increment_connects_all();
|
||||
@@ -290,16 +423,34 @@ mod tests {
|
||||
.uri("/metrics")
|
||||
.body(())
|
||||
.unwrap();
|
||||
let resp = handle(req, &stats).unwrap();
|
||||
let resp = handle(req, &stats, &beobachten, &config).unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = resp.into_body().collect().await.unwrap().to_bytes();
|
||||
assert!(std::str::from_utf8(body.as_ref()).unwrap().contains("telemt_connections_total 3"));
|
||||
|
||||
config.general.beobachten = true;
|
||||
config.general.beobachten_minutes = 10;
|
||||
beobachten.record(
|
||||
"TLS-scanner",
|
||||
"203.0.113.10".parse::<IpAddr>().unwrap(),
|
||||
Duration::from_secs(600),
|
||||
);
|
||||
let req_beob = Request::builder()
|
||||
.uri("/beobachten")
|
||||
.body(())
|
||||
.unwrap();
|
||||
let resp_beob = handle(req_beob, &stats, &beobachten, &config).unwrap();
|
||||
assert_eq!(resp_beob.status(), StatusCode::OK);
|
||||
let body_beob = resp_beob.into_body().collect().await.unwrap().to_bytes();
|
||||
let beob_text = std::str::from_utf8(body_beob.as_ref()).unwrap();
|
||||
assert!(beob_text.contains("[TLS-scanner]"));
|
||||
assert!(beob_text.contains("203.0.113.10-1"));
|
||||
|
||||
let req404 = Request::builder()
|
||||
.uri("/other")
|
||||
.body(())
|
||||
.unwrap();
|
||||
let resp404 = handle(req404, &stats).unwrap();
|
||||
let resp404 = handle(req404, &stats, &beobachten, &config).unwrap();
|
||||
assert_eq!(resp404.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Client Handler
|
||||
|
||||
use std::future::Future;
|
||||
use std::net::SocketAddr;
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -27,6 +27,7 @@ use crate::error::{HandshakeResult, ProxyError, Result};
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::protocol::constants::*;
|
||||
use crate::protocol::tls;
|
||||
use crate::stats::beobachten::BeobachtenStore;
|
||||
use crate::stats::{ReplayChecker, Stats};
|
||||
use crate::stream::{BufferPool, CryptoReader, CryptoWriter};
|
||||
use crate::transport::middle_proxy::MePool;
|
||||
@@ -39,6 +40,36 @@ use crate::proxy::handshake::{HandshakeSuccess, handle_mtproto_handshake, handle
|
||||
use crate::proxy::masking::handle_bad_client;
|
||||
use crate::proxy::middle_relay::handle_via_middle_proxy;
|
||||
|
||||
fn beobachten_ttl(config: &ProxyConfig) -> Duration {
|
||||
Duration::from_secs(config.general.beobachten_minutes.saturating_mul(60))
|
||||
}
|
||||
|
||||
fn record_beobachten_class(
|
||||
beobachten: &BeobachtenStore,
|
||||
config: &ProxyConfig,
|
||||
peer_ip: IpAddr,
|
||||
class: &str,
|
||||
) {
|
||||
if !config.general.beobachten {
|
||||
return;
|
||||
}
|
||||
beobachten.record(class, peer_ip, beobachten_ttl(config));
|
||||
}
|
||||
|
||||
fn record_handshake_failure_class(
|
||||
beobachten: &BeobachtenStore,
|
||||
config: &ProxyConfig,
|
||||
peer_ip: IpAddr,
|
||||
error: &ProxyError,
|
||||
) {
|
||||
let class = if error.to_string().contains("expected 64 bytes, got 0") {
|
||||
"expected_64_got_0"
|
||||
} else {
|
||||
"other"
|
||||
};
|
||||
record_beobachten_class(beobachten, config, peer_ip, class);
|
||||
}
|
||||
|
||||
pub async fn handle_client_stream<S>(
|
||||
mut stream: S,
|
||||
peer: SocketAddr,
|
||||
@@ -51,6 +82,7 @@ pub async fn handle_client_stream<S>(
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
tls_cache: Option<Arc<TlsFrontCache>>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
proxy_protocol_enabled: bool,
|
||||
) -> Result<()>
|
||||
where
|
||||
@@ -73,6 +105,7 @@ where
|
||||
Err(e) => {
|
||||
stats.increment_connects_bad();
|
||||
warn!(peer = %peer, error = %e, "Invalid PROXY protocol header");
|
||||
record_beobachten_class(&beobachten, &config, peer.ip(), "other");
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
@@ -82,6 +115,9 @@ where
|
||||
|
||||
let handshake_timeout = Duration::from_secs(config.timeouts.client_handshake);
|
||||
let stats_for_timeout = stats.clone();
|
||||
let config_for_timeout = config.clone();
|
||||
let beobachten_for_timeout = beobachten.clone();
|
||||
let peer_for_timeout = real_peer.ip();
|
||||
|
||||
// For non-TCP streams, use a synthetic local address
|
||||
let local_addr: SocketAddr = format!("0.0.0.0:{}", config.server.port)
|
||||
@@ -103,7 +139,15 @@ where
|
||||
debug!(peer = %real_peer, tls_len = tls_len, "TLS handshake too short");
|
||||
stats.increment_connects_bad();
|
||||
let (reader, writer) = tokio::io::split(stream);
|
||||
handle_bad_client(reader, writer, &first_bytes, &config).await;
|
||||
handle_bad_client(
|
||||
reader,
|
||||
writer,
|
||||
&first_bytes,
|
||||
real_peer.ip(),
|
||||
&config,
|
||||
&beobachten,
|
||||
)
|
||||
.await;
|
||||
return Ok(HandshakeOutcome::Handled);
|
||||
}
|
||||
|
||||
@@ -120,7 +164,15 @@ where
|
||||
HandshakeResult::Success(result) => result,
|
||||
HandshakeResult::BadClient { reader, writer } => {
|
||||
stats.increment_connects_bad();
|
||||
handle_bad_client(reader, writer, &handshake, &config).await;
|
||||
handle_bad_client(
|
||||
reader,
|
||||
writer,
|
||||
&handshake,
|
||||
real_peer.ip(),
|
||||
&config,
|
||||
&beobachten,
|
||||
)
|
||||
.await;
|
||||
return Ok(HandshakeOutcome::Handled);
|
||||
}
|
||||
HandshakeResult::Error(e) => return Err(e),
|
||||
@@ -156,7 +208,15 @@ where
|
||||
debug!(peer = %real_peer, "Non-TLS modes disabled");
|
||||
stats.increment_connects_bad();
|
||||
let (reader, writer) = tokio::io::split(stream);
|
||||
handle_bad_client(reader, writer, &first_bytes, &config).await;
|
||||
handle_bad_client(
|
||||
reader,
|
||||
writer,
|
||||
&first_bytes,
|
||||
real_peer.ip(),
|
||||
&config,
|
||||
&beobachten,
|
||||
)
|
||||
.await;
|
||||
return Ok(HandshakeOutcome::Handled);
|
||||
}
|
||||
|
||||
@@ -173,7 +233,15 @@ where
|
||||
HandshakeResult::Success(result) => result,
|
||||
HandshakeResult::BadClient { reader, writer } => {
|
||||
stats.increment_connects_bad();
|
||||
handle_bad_client(reader, writer, &handshake, &config).await;
|
||||
handle_bad_client(
|
||||
reader,
|
||||
writer,
|
||||
&handshake,
|
||||
real_peer.ip(),
|
||||
&config,
|
||||
&beobachten,
|
||||
)
|
||||
.await;
|
||||
return Ok(HandshakeOutcome::Handled);
|
||||
}
|
||||
HandshakeResult::Error(e) => return Err(e),
|
||||
@@ -200,11 +268,23 @@ where
|
||||
Ok(Ok(outcome)) => outcome,
|
||||
Ok(Err(e)) => {
|
||||
debug!(peer = %peer, error = %e, "Handshake failed");
|
||||
record_handshake_failure_class(
|
||||
&beobachten_for_timeout,
|
||||
&config_for_timeout,
|
||||
peer_for_timeout,
|
||||
&e,
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
Err(_) => {
|
||||
stats_for_timeout.increment_handshake_timeouts();
|
||||
debug!(peer = %peer, "Handshake timeout");
|
||||
record_beobachten_class(
|
||||
&beobachten_for_timeout,
|
||||
&config_for_timeout,
|
||||
peer_for_timeout,
|
||||
"other",
|
||||
);
|
||||
return Err(ProxyError::TgHandshakeTimeout);
|
||||
}
|
||||
};
|
||||
@@ -230,6 +310,7 @@ pub struct RunningClientHandler {
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
tls_cache: Option<Arc<TlsFrontCache>>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
proxy_protocol_enabled: bool,
|
||||
}
|
||||
|
||||
@@ -246,6 +327,7 @@ impl ClientHandler {
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
tls_cache: Option<Arc<TlsFrontCache>>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
proxy_protocol_enabled: bool,
|
||||
) -> RunningClientHandler {
|
||||
RunningClientHandler {
|
||||
@@ -260,6 +342,7 @@ impl ClientHandler {
|
||||
me_pool,
|
||||
tls_cache,
|
||||
ip_tracker,
|
||||
beobachten,
|
||||
proxy_protocol_enabled,
|
||||
}
|
||||
}
|
||||
@@ -284,17 +367,32 @@ impl RunningClientHandler {
|
||||
|
||||
let handshake_timeout = Duration::from_secs(self.config.timeouts.client_handshake);
|
||||
let stats = self.stats.clone();
|
||||
let config_for_timeout = self.config.clone();
|
||||
let beobachten_for_timeout = self.beobachten.clone();
|
||||
let peer_for_timeout = peer.ip();
|
||||
|
||||
// Phase 1: handshake (with timeout)
|
||||
let outcome = match timeout(handshake_timeout, self.do_handshake()).await {
|
||||
Ok(Ok(outcome)) => outcome,
|
||||
Ok(Err(e)) => {
|
||||
debug!(peer = %peer, error = %e, "Handshake failed");
|
||||
record_handshake_failure_class(
|
||||
&beobachten_for_timeout,
|
||||
&config_for_timeout,
|
||||
peer_for_timeout,
|
||||
&e,
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
Err(_) => {
|
||||
stats.increment_handshake_timeouts();
|
||||
debug!(peer = %peer, "Handshake timeout");
|
||||
record_beobachten_class(
|
||||
&beobachten_for_timeout,
|
||||
&config_for_timeout,
|
||||
peer_for_timeout,
|
||||
"other",
|
||||
);
|
||||
return Err(ProxyError::TgHandshakeTimeout);
|
||||
}
|
||||
};
|
||||
@@ -321,6 +419,12 @@ impl RunningClientHandler {
|
||||
Err(e) => {
|
||||
self.stats.increment_connects_bad();
|
||||
warn!(peer = %self.peer, error = %e, "Invalid PROXY protocol header");
|
||||
record_beobachten_class(
|
||||
&self.beobachten,
|
||||
&self.config,
|
||||
self.peer.ip(),
|
||||
"other",
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
@@ -354,7 +458,15 @@ impl RunningClientHandler {
|
||||
debug!(peer = %peer, tls_len = tls_len, "TLS handshake too short");
|
||||
self.stats.increment_connects_bad();
|
||||
let (reader, writer) = self.stream.into_split();
|
||||
handle_bad_client(reader, writer, &first_bytes, &self.config).await;
|
||||
handle_bad_client(
|
||||
reader,
|
||||
writer,
|
||||
&first_bytes,
|
||||
peer.ip(),
|
||||
&self.config,
|
||||
&self.beobachten,
|
||||
)
|
||||
.await;
|
||||
return Ok(HandshakeOutcome::Handled);
|
||||
}
|
||||
|
||||
@@ -385,7 +497,15 @@ impl RunningClientHandler {
|
||||
HandshakeResult::Success(result) => result,
|
||||
HandshakeResult::BadClient { reader, writer } => {
|
||||
stats.increment_connects_bad();
|
||||
handle_bad_client(reader, writer, &handshake, &config).await;
|
||||
handle_bad_client(
|
||||
reader,
|
||||
writer,
|
||||
&handshake,
|
||||
peer.ip(),
|
||||
&config,
|
||||
&self.beobachten,
|
||||
)
|
||||
.await;
|
||||
return Ok(HandshakeOutcome::Handled);
|
||||
}
|
||||
HandshakeResult::Error(e) => return Err(e),
|
||||
@@ -446,7 +566,15 @@ impl RunningClientHandler {
|
||||
debug!(peer = %peer, "Non-TLS modes disabled");
|
||||
self.stats.increment_connects_bad();
|
||||
let (reader, writer) = self.stream.into_split();
|
||||
handle_bad_client(reader, writer, &first_bytes, &self.config).await;
|
||||
handle_bad_client(
|
||||
reader,
|
||||
writer,
|
||||
&first_bytes,
|
||||
peer.ip(),
|
||||
&self.config,
|
||||
&self.beobachten,
|
||||
)
|
||||
.await;
|
||||
return Ok(HandshakeOutcome::Handled);
|
||||
}
|
||||
|
||||
@@ -476,7 +604,15 @@ impl RunningClientHandler {
|
||||
HandshakeResult::Success(result) => result,
|
||||
HandshakeResult::BadClient { reader, writer } => {
|
||||
stats.increment_connects_bad();
|
||||
handle_bad_client(reader, writer, &handshake, &config).await;
|
||||
handle_bad_client(
|
||||
reader,
|
||||
writer,
|
||||
&handshake,
|
||||
peer.ip(),
|
||||
&config,
|
||||
&self.beobachten,
|
||||
)
|
||||
.await;
|
||||
return Ok(HandshakeOutcome::Handled);
|
||||
}
|
||||
HandshakeResult::Error(e) => return Err(e),
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
//! Masking - forward unrecognized traffic to mask host
|
||||
|
||||
use std::str;
|
||||
use std::net::IpAddr;
|
||||
use std::time::Duration;
|
||||
use tokio::net::TcpStream;
|
||||
#[cfg(unix)]
|
||||
@@ -9,6 +10,7 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::time::timeout;
|
||||
use tracing::debug;
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::stats::beobachten::BeobachtenStore;
|
||||
|
||||
const MASK_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
/// Maximum duration for the entire masking relay.
|
||||
@@ -50,20 +52,26 @@ pub async fn handle_bad_client<R, W>(
|
||||
reader: R,
|
||||
writer: W,
|
||||
initial_data: &[u8],
|
||||
peer_ip: IpAddr,
|
||||
config: &ProxyConfig,
|
||||
beobachten: &BeobachtenStore,
|
||||
)
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + 'static,
|
||||
W: AsyncWrite + Unpin + Send + 'static,
|
||||
{
|
||||
let client_type = detect_client_type(initial_data);
|
||||
if config.general.beobachten {
|
||||
let ttl = Duration::from_secs(config.general.beobachten_minutes.saturating_mul(60));
|
||||
beobachten.record(client_type, peer_ip, ttl);
|
||||
}
|
||||
|
||||
if !config.censorship.mask {
|
||||
// Masking disabled, just consume data
|
||||
consume_client_data(reader).await;
|
||||
return;
|
||||
}
|
||||
|
||||
let client_type = detect_client_type(initial_data);
|
||||
|
||||
// Connect via Unix socket or TCP
|
||||
#[cfg(unix)]
|
||||
if let Some(ref sock_path) = config.censorship.mask_unix_sock {
|
||||
|
||||
117
src/stats/beobachten.rs
Normal file
117
src/stats/beobachten.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
//! Per-IP forensic buckets for scanner and handshake failure observation.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::net::IpAddr;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use parking_lot::Mutex;
|
||||
|
||||
const CLEANUP_INTERVAL: Duration = Duration::from_secs(30);
|
||||
|
||||
#[derive(Default)]
|
||||
struct BeobachtenInner {
|
||||
entries: HashMap<(String, IpAddr), BeobachtenEntry>,
|
||||
last_cleanup: Option<Instant>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct BeobachtenEntry {
|
||||
tries: u64,
|
||||
last_seen: Instant,
|
||||
}
|
||||
|
||||
/// In-memory, TTL-scoped per-IP counters keyed by source class.
|
||||
pub struct BeobachtenStore {
|
||||
inner: Mutex<BeobachtenInner>,
|
||||
}
|
||||
|
||||
impl Default for BeobachtenStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl BeobachtenStore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
inner: Mutex::new(BeobachtenInner::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn record(&self, class: &str, ip: IpAddr, ttl: Duration) {
|
||||
if class.is_empty() || ttl.is_zero() {
|
||||
return;
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
let mut guard = self.inner.lock();
|
||||
Self::cleanup_if_needed(&mut guard, now, ttl);
|
||||
|
||||
let key = (class.to_string(), ip);
|
||||
let entry = guard.entries.entry(key).or_insert(BeobachtenEntry {
|
||||
tries: 0,
|
||||
last_seen: now,
|
||||
});
|
||||
entry.tries = entry.tries.saturating_add(1);
|
||||
entry.last_seen = now;
|
||||
}
|
||||
|
||||
pub fn snapshot_text(&self, ttl: Duration) -> String {
|
||||
if ttl.is_zero() {
|
||||
return "beobachten disabled\n".to_string();
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
let mut guard = self.inner.lock();
|
||||
Self::cleanup(&mut guard, now, ttl);
|
||||
guard.last_cleanup = Some(now);
|
||||
|
||||
let mut grouped = BTreeMap::<String, Vec<(IpAddr, u64)>>::new();
|
||||
for ((class, ip), entry) in &guard.entries {
|
||||
grouped
|
||||
.entry(class.clone())
|
||||
.or_default()
|
||||
.push((*ip, entry.tries));
|
||||
}
|
||||
|
||||
if grouped.is_empty() {
|
||||
return "empty\n".to_string();
|
||||
}
|
||||
|
||||
let mut out = String::with_capacity(grouped.len() * 64);
|
||||
for (class, entries) in &mut grouped {
|
||||
out.push('[');
|
||||
out.push_str(class);
|
||||
out.push_str("]\n");
|
||||
|
||||
entries.sort_by(|(ip_a, tries_a), (ip_b, tries_b)| {
|
||||
tries_b
|
||||
.cmp(tries_a)
|
||||
.then_with(|| ip_a.to_string().cmp(&ip_b.to_string()))
|
||||
});
|
||||
|
||||
for (ip, tries) in entries {
|
||||
out.push_str(&format!("{ip}-{tries}\n"));
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn cleanup_if_needed(inner: &mut BeobachtenInner, now: Instant, ttl: Duration) {
|
||||
let should_cleanup = match inner.last_cleanup {
|
||||
Some(last) => now.saturating_duration_since(last) >= CLEANUP_INTERVAL,
|
||||
None => true,
|
||||
};
|
||||
if should_cleanup {
|
||||
Self::cleanup(inner, now, ttl);
|
||||
inner.last_cleanup = Some(now);
|
||||
}
|
||||
}
|
||||
|
||||
fn cleanup(inner: &mut BeobachtenInner, now: Instant, ttl: Duration) {
|
||||
inner.entries.retain(|_, entry| {
|
||||
now.saturating_duration_since(entry.last_seen) <= ttl
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
pub mod beobachten;
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::{Instant, Duration};
|
||||
use dashmap::DashMap;
|
||||
@@ -43,6 +45,13 @@ pub struct Stats {
|
||||
pool_drain_active: AtomicU64,
|
||||
pool_force_close_total: AtomicU64,
|
||||
pool_stale_pick_total: AtomicU64,
|
||||
me_writer_removed_total: AtomicU64,
|
||||
me_writer_removed_unexpected_total: AtomicU64,
|
||||
me_refill_triggered_total: AtomicU64,
|
||||
me_refill_skipped_inflight_total: AtomicU64,
|
||||
me_refill_failed_total: AtomicU64,
|
||||
me_writer_restored_same_endpoint_total: AtomicU64,
|
||||
me_writer_restored_fallback_total: AtomicU64,
|
||||
user_stats: DashMap<String, UserStats>,
|
||||
start_time: parking_lot::RwLock<Option<Instant>>,
|
||||
}
|
||||
@@ -142,6 +151,27 @@ impl Stats {
|
||||
pub fn increment_pool_stale_pick_total(&self) {
|
||||
self.pool_stale_pick_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn increment_me_writer_removed_total(&self) {
|
||||
self.me_writer_removed_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn increment_me_writer_removed_unexpected_total(&self) {
|
||||
self.me_writer_removed_unexpected_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn increment_me_refill_triggered_total(&self) {
|
||||
self.me_refill_triggered_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn increment_me_refill_skipped_inflight_total(&self) {
|
||||
self.me_refill_skipped_inflight_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn increment_me_refill_failed_total(&self) {
|
||||
self.me_refill_failed_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn increment_me_writer_restored_same_endpoint_total(&self) {
|
||||
self.me_writer_restored_same_endpoint_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn increment_me_writer_restored_fallback_total(&self) {
|
||||
self.me_writer_restored_fallback_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn get_connects_all(&self) -> u64 { self.connects_all.load(Ordering::Relaxed) }
|
||||
pub fn get_connects_bad(&self) -> u64 { self.connects_bad.load(Ordering::Relaxed) }
|
||||
pub fn get_me_keepalive_sent(&self) -> u64 { self.me_keepalive_sent.load(Ordering::Relaxed) }
|
||||
@@ -195,6 +225,27 @@ impl Stats {
|
||||
pub fn get_pool_stale_pick_total(&self) -> u64 {
|
||||
self.pool_stale_pick_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_removed_total(&self) -> u64 {
|
||||
self.me_writer_removed_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_removed_unexpected_total(&self) -> u64 {
|
||||
self.me_writer_removed_unexpected_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_refill_triggered_total(&self) -> u64 {
|
||||
self.me_refill_triggered_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_refill_skipped_inflight_total(&self) -> u64 {
|
||||
self.me_refill_skipped_inflight_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_refill_failed_total(&self) -> u64 {
|
||||
self.me_refill_failed_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_restored_same_endpoint_total(&self) -> u64 {
|
||||
self.me_writer_restored_same_endpoint_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_restored_fallback_total(&self) -> u64 {
|
||||
self.me_writer_restored_fallback_total.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn increment_user_connects(&self, user: &str) {
|
||||
self.user_stats.entry(user.to_string()).or_default()
|
||||
|
||||
@@ -228,6 +228,10 @@ async fn run_update_cycle(
|
||||
cfg.general.me_pool_drain_ttl_secs,
|
||||
cfg.general.effective_me_pool_force_close_secs(),
|
||||
cfg.general.me_pool_min_fresh_ratio,
|
||||
cfg.general.me_hardswap_warmup_delay_min_ms,
|
||||
cfg.general.me_hardswap_warmup_delay_max_ms,
|
||||
cfg.general.me_hardswap_warmup_extra_passes,
|
||||
cfg.general.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
);
|
||||
|
||||
let required_cfg_snapshots = cfg.general.me_config_stable_snapshots.max(1);
|
||||
@@ -407,6 +411,10 @@ pub async fn me_config_updater(
|
||||
cfg.general.me_pool_drain_ttl_secs,
|
||||
cfg.general.effective_me_pool_force_close_secs(),
|
||||
cfg.general.me_pool_min_fresh_ratio,
|
||||
cfg.general.me_hardswap_warmup_delay_min_ms,
|
||||
cfg.general.me_hardswap_warmup_delay_max_ms,
|
||||
cfg.general.me_hardswap_warmup_extra_passes,
|
||||
cfg.general.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
);
|
||||
let new_secs = cfg.general.effective_update_every_secs().max(1);
|
||||
if new_secs == update_every_secs {
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use tracing::{debug, info, warn};
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
|
||||
use crate::crypto::SecureRandom;
|
||||
@@ -64,31 +63,43 @@ async fn check_family(
|
||||
IpFamily::V4 => pool.proxy_map_v4.read().await.clone(),
|
||||
IpFamily::V6 => pool.proxy_map_v6.read().await.clone(),
|
||||
};
|
||||
let writer_addrs: HashSet<SocketAddr> = pool
|
||||
|
||||
let mut dc_endpoints = HashMap::<i32, Vec<SocketAddr>>::new();
|
||||
for (dc, addrs) in map {
|
||||
let entry = dc_endpoints.entry(dc.abs()).or_default();
|
||||
for (ip, port) in addrs {
|
||||
entry.push(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
for endpoints in dc_endpoints.values_mut() {
|
||||
endpoints.sort_unstable();
|
||||
endpoints.dedup();
|
||||
}
|
||||
|
||||
let mut live_addr_counts = HashMap::<SocketAddr, usize>::new();
|
||||
for writer in pool
|
||||
.writers
|
||||
.read()
|
||||
.await
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(std::sync::atomic::Ordering::Relaxed))
|
||||
.map(|w| w.addr)
|
||||
.collect();
|
||||
{
|
||||
*live_addr_counts.entry(writer.addr).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
let entries: Vec<(i32, Vec<SocketAddr>)> = map
|
||||
.iter()
|
||||
.map(|(dc, addrs)| {
|
||||
let list = addrs
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect::<Vec<_>>();
|
||||
(*dc, list)
|
||||
})
|
||||
.collect();
|
||||
|
||||
for (dc, dc_addrs) in entries {
|
||||
let has_coverage = dc_addrs.iter().any(|a| writer_addrs.contains(a));
|
||||
if has_coverage {
|
||||
for (dc, endpoints) in dc_endpoints {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let required = MePool::required_writers_for_dc(endpoints.len());
|
||||
let alive = endpoints
|
||||
.iter()
|
||||
.map(|addr| *live_addr_counts.get(addr).unwrap_or(&0))
|
||||
.sum::<usize>();
|
||||
if alive >= required {
|
||||
continue;
|
||||
}
|
||||
let missing = required - alive;
|
||||
|
||||
let key = (dc, family);
|
||||
let now = Instant::now();
|
||||
@@ -104,32 +115,45 @@ async fn check_family(
|
||||
}
|
||||
*inflight.entry(key).or_insert(0) += 1;
|
||||
|
||||
let mut shuffled = dc_addrs.clone();
|
||||
shuffled.shuffle(&mut rand::rng());
|
||||
let mut success = false;
|
||||
for addr in shuffled {
|
||||
let res = tokio::time::timeout(pool.me_one_timeout, pool.connect_one(addr, rng.as_ref())).await;
|
||||
let mut restored = 0usize;
|
||||
for _ in 0..missing {
|
||||
let res = tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_endpoints_round_robin(&endpoints, rng.as_ref()),
|
||||
)
|
||||
.await;
|
||||
match res {
|
||||
Ok(Ok(())) => {
|
||||
info!(%addr, dc = %dc, ?family, "ME reconnected for DC coverage");
|
||||
Ok(true) => {
|
||||
restored += 1;
|
||||
pool.stats.increment_me_reconnect_success();
|
||||
backoff.insert(key, pool.me_reconnect_backoff_base.as_millis() as u64);
|
||||
let jitter = pool.me_reconnect_backoff_base.as_millis() as u64 / JITTER_FRAC_NUM;
|
||||
let wait = pool.me_reconnect_backoff_base
|
||||
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
|
||||
next_attempt.insert(key, now + wait);
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
Ok(false) => {
|
||||
pool.stats.increment_me_reconnect_attempt();
|
||||
debug!(%addr, dc = %dc, error = %e, ?family, "ME reconnect failed")
|
||||
debug!(dc = %dc, ?family, "ME round-robin reconnect failed")
|
||||
}
|
||||
Err(_) => {
|
||||
pool.stats.increment_me_reconnect_attempt();
|
||||
debug!(dc = %dc, ?family, "ME reconnect timed out");
|
||||
}
|
||||
Err(_) => debug!(%addr, dc = %dc, ?family, "ME reconnect timed out"),
|
||||
}
|
||||
}
|
||||
if !success {
|
||||
pool.stats.increment_me_reconnect_attempt();
|
||||
|
||||
let now_alive = alive + restored;
|
||||
if now_alive >= required {
|
||||
info!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive = now_alive,
|
||||
required,
|
||||
endpoint_count = endpoints.len(),
|
||||
"ME writer floor restored for DC"
|
||||
);
|
||||
backoff.insert(key, pool.me_reconnect_backoff_base.as_millis() as u64);
|
||||
let jitter = pool.me_reconnect_backoff_base.as_millis() as u64 / JITTER_FRAC_NUM;
|
||||
let wait = pool.me_reconnect_backoff_base
|
||||
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
|
||||
next_attempt.insert(key, now + wait);
|
||||
} else {
|
||||
let curr = *backoff.get(&key).unwrap_or(&(pool.me_reconnect_backoff_base.as_millis() as u64));
|
||||
let next_ms = (curr.saturating_mul(2)).min(pool.me_reconnect_backoff_cap.as_millis() as u64);
|
||||
backoff.insert(key, next_ms);
|
||||
@@ -137,7 +161,15 @@ async fn check_family(
|
||||
let wait = Duration::from_millis(next_ms)
|
||||
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
|
||||
next_attempt.insert(key, now + wait);
|
||||
warn!(dc = %dc, backoff_ms = next_ms, ?family, "DC has no ME coverage, scheduled reconnect");
|
||||
warn!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive = now_alive,
|
||||
required,
|
||||
endpoint_count = endpoints.len(),
|
||||
backoff_ms = next_ms,
|
||||
"DC writer floor is below required level, scheduled reconnect"
|
||||
);
|
||||
}
|
||||
if let Some(v) = inflight.get_mut(&key) {
|
||||
*v = v.saturating_sub(1);
|
||||
|
||||
@@ -75,6 +75,7 @@ pub struct MePool {
|
||||
pub(super) rtt_stats: Arc<Mutex<HashMap<u64, (f64, f64)>>>,
|
||||
pub(super) nat_reflection_cache: Arc<Mutex<NatReflectionCache>>,
|
||||
pub(super) writer_available: Arc<Notify>,
|
||||
pub(super) refill_inflight: Arc<Mutex<HashSet<SocketAddr>>>,
|
||||
pub(super) conn_count: AtomicUsize,
|
||||
pub(super) stats: Arc<crate::stats::Stats>,
|
||||
pub(super) generation: AtomicU64,
|
||||
@@ -82,6 +83,10 @@ pub struct MePool {
|
||||
pub(super) me_pool_drain_ttl_secs: AtomicU64,
|
||||
pub(super) me_pool_force_close_secs: AtomicU64,
|
||||
pub(super) me_pool_min_fresh_ratio_permille: AtomicU32,
|
||||
pub(super) me_hardswap_warmup_delay_min_ms: AtomicU64,
|
||||
pub(super) me_hardswap_warmup_delay_max_ms: AtomicU64,
|
||||
pub(super) me_hardswap_warmup_extra_passes: AtomicU32,
|
||||
pub(super) me_hardswap_warmup_pass_backoff_base_ms: AtomicU64,
|
||||
pool_size: usize,
|
||||
}
|
||||
|
||||
@@ -139,6 +144,10 @@ impl MePool {
|
||||
me_pool_drain_ttl_secs: u64,
|
||||
me_pool_force_close_secs: u64,
|
||||
me_pool_min_fresh_ratio: f32,
|
||||
me_hardswap_warmup_delay_min_ms: u64,
|
||||
me_hardswap_warmup_delay_max_ms: u64,
|
||||
me_hardswap_warmup_extra_passes: u8,
|
||||
me_hardswap_warmup_pass_backoff_base_ms: u64,
|
||||
) -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
registry: Arc::new(ConnRegistry::new()),
|
||||
@@ -180,12 +189,17 @@ impl MePool {
|
||||
rtt_stats: Arc::new(Mutex::new(HashMap::new())),
|
||||
nat_reflection_cache: Arc::new(Mutex::new(NatReflectionCache::default())),
|
||||
writer_available: Arc::new(Notify::new()),
|
||||
refill_inflight: Arc::new(Mutex::new(HashSet::new())),
|
||||
conn_count: AtomicUsize::new(0),
|
||||
generation: AtomicU64::new(1),
|
||||
hardswap: AtomicBool::new(hardswap),
|
||||
me_pool_drain_ttl_secs: AtomicU64::new(me_pool_drain_ttl_secs),
|
||||
me_pool_force_close_secs: AtomicU64::new(me_pool_force_close_secs),
|
||||
me_pool_min_fresh_ratio_permille: AtomicU32::new(Self::ratio_to_permille(me_pool_min_fresh_ratio)),
|
||||
me_hardswap_warmup_delay_min_ms: AtomicU64::new(me_hardswap_warmup_delay_min_ms),
|
||||
me_hardswap_warmup_delay_max_ms: AtomicU64::new(me_hardswap_warmup_delay_max_ms),
|
||||
me_hardswap_warmup_extra_passes: AtomicU32::new(me_hardswap_warmup_extra_passes as u32),
|
||||
me_hardswap_warmup_pass_backoff_base_ms: AtomicU64::new(me_hardswap_warmup_pass_backoff_base_ms),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -203,6 +217,10 @@ impl MePool {
|
||||
drain_ttl_secs: u64,
|
||||
force_close_secs: u64,
|
||||
min_fresh_ratio: f32,
|
||||
hardswap_warmup_delay_min_ms: u64,
|
||||
hardswap_warmup_delay_max_ms: u64,
|
||||
hardswap_warmup_extra_passes: u8,
|
||||
hardswap_warmup_pass_backoff_base_ms: u64,
|
||||
) {
|
||||
self.hardswap.store(hardswap, Ordering::Relaxed);
|
||||
self.me_pool_drain_ttl_secs.store(drain_ttl_secs, Ordering::Relaxed);
|
||||
@@ -210,6 +228,14 @@ impl MePool {
|
||||
.store(force_close_secs, Ordering::Relaxed);
|
||||
self.me_pool_min_fresh_ratio_permille
|
||||
.store(Self::ratio_to_permille(min_fresh_ratio), Ordering::Relaxed);
|
||||
self.me_hardswap_warmup_delay_min_ms
|
||||
.store(hardswap_warmup_delay_min_ms, Ordering::Relaxed);
|
||||
self.me_hardswap_warmup_delay_max_ms
|
||||
.store(hardswap_warmup_delay_max_ms, Ordering::Relaxed);
|
||||
self.me_hardswap_warmup_extra_passes
|
||||
.store(hardswap_warmup_extra_passes as u32, Ordering::Relaxed);
|
||||
self.me_hardswap_warmup_pass_backoff_base_ms
|
||||
.store(hardswap_warmup_pass_backoff_base_ms, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn reset_stun_state(&self) {
|
||||
@@ -324,36 +350,172 @@ impl MePool {
|
||||
out
|
||||
}
|
||||
|
||||
pub(super) fn required_writers_for_dc(endpoint_count: usize) -> usize {
|
||||
endpoint_count.max(3)
|
||||
}
|
||||
|
||||
fn hardswap_warmup_connect_delay_ms(&self) -> u64 {
|
||||
let min_ms = self
|
||||
.me_hardswap_warmup_delay_min_ms
|
||||
.load(Ordering::Relaxed);
|
||||
let max_ms = self
|
||||
.me_hardswap_warmup_delay_max_ms
|
||||
.load(Ordering::Relaxed);
|
||||
let (min_ms, max_ms) = if min_ms <= max_ms {
|
||||
(min_ms, max_ms)
|
||||
} else {
|
||||
(max_ms, min_ms)
|
||||
};
|
||||
if min_ms == max_ms {
|
||||
return min_ms;
|
||||
}
|
||||
rand::rng().random_range(min_ms..=max_ms)
|
||||
}
|
||||
|
||||
fn hardswap_warmup_backoff_ms(&self, pass_idx: usize) -> u64 {
|
||||
let base_ms = self
|
||||
.me_hardswap_warmup_pass_backoff_base_ms
|
||||
.load(Ordering::Relaxed);
|
||||
let cap_ms = (self.me_reconnect_backoff_cap.as_millis() as u64).max(base_ms);
|
||||
let shift = (pass_idx as u32).min(20);
|
||||
let scaled = base_ms.saturating_mul(1u64 << shift);
|
||||
let core = scaled.min(cap_ms);
|
||||
let jitter = (core / 2).max(1);
|
||||
core.saturating_add(rand::rng().random_range(0..=jitter))
|
||||
}
|
||||
|
||||
async fn fresh_writer_count_for_endpoints(
|
||||
&self,
|
||||
generation: u64,
|
||||
endpoints: &HashSet<SocketAddr>,
|
||||
) -> usize {
|
||||
let ws = self.writers.read().await;
|
||||
ws.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| w.generation == generation)
|
||||
.filter(|w| endpoints.contains(&w.addr))
|
||||
.count()
|
||||
}
|
||||
|
||||
pub(super) async fn connect_endpoints_round_robin(
|
||||
self: &Arc<Self>,
|
||||
endpoints: &[SocketAddr],
|
||||
rng: &SecureRandom,
|
||||
) -> bool {
|
||||
if endpoints.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let start = (self.rr.fetch_add(1, Ordering::Relaxed) as usize) % endpoints.len();
|
||||
for offset in 0..endpoints.len() {
|
||||
let idx = (start + offset) % endpoints.len();
|
||||
let addr = endpoints[idx];
|
||||
match self.connect_one(addr, rng).await {
|
||||
Ok(()) => return true,
|
||||
Err(e) => debug!(%addr, error = %e, "ME connect failed during round-robin warmup"),
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
async fn warmup_generation_for_all_dcs(
|
||||
self: &Arc<Self>,
|
||||
rng: &SecureRandom,
|
||||
generation: u64,
|
||||
desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>,
|
||||
) {
|
||||
for endpoints in desired_by_dc.values() {
|
||||
let extra_passes = self
|
||||
.me_hardswap_warmup_extra_passes
|
||||
.load(Ordering::Relaxed)
|
||||
.min(10) as usize;
|
||||
let total_passes = 1 + extra_passes;
|
||||
|
||||
for (dc, endpoints) in desired_by_dc {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let has_fresh = {
|
||||
let ws = self.writers.read().await;
|
||||
ws.iter().any(|w| {
|
||||
!w.draining.load(Ordering::Relaxed)
|
||||
&& w.generation == generation
|
||||
&& endpoints.contains(&w.addr)
|
||||
})
|
||||
};
|
||||
let mut endpoint_list: Vec<SocketAddr> = endpoints.iter().copied().collect();
|
||||
endpoint_list.sort_unstable();
|
||||
let required = Self::required_writers_for_dc(endpoint_list.len());
|
||||
let mut completed = false;
|
||||
let mut last_fresh_count = self
|
||||
.fresh_writer_count_for_endpoints(generation, endpoints)
|
||||
.await;
|
||||
|
||||
if has_fresh {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut shuffled: Vec<SocketAddr> = endpoints.iter().copied().collect();
|
||||
shuffled.shuffle(&mut rand::rng());
|
||||
for addr in shuffled {
|
||||
if self.connect_one(addr, rng).await.is_ok() {
|
||||
for pass_idx in 0..total_passes {
|
||||
if last_fresh_count >= required {
|
||||
completed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
let missing = required.saturating_sub(last_fresh_count);
|
||||
debug!(
|
||||
dc = *dc,
|
||||
pass = pass_idx + 1,
|
||||
total_passes,
|
||||
fresh_count = last_fresh_count,
|
||||
required,
|
||||
missing,
|
||||
endpoint_count = endpoint_list.len(),
|
||||
"ME hardswap warmup pass started"
|
||||
);
|
||||
|
||||
for attempt_idx in 0..missing {
|
||||
let delay_ms = self.hardswap_warmup_connect_delay_ms();
|
||||
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
|
||||
|
||||
let connected = self.connect_endpoints_round_robin(&endpoint_list, rng).await;
|
||||
debug!(
|
||||
dc = *dc,
|
||||
pass = pass_idx + 1,
|
||||
total_passes,
|
||||
attempt = attempt_idx + 1,
|
||||
delay_ms,
|
||||
connected,
|
||||
"ME hardswap warmup connect attempt finished"
|
||||
);
|
||||
}
|
||||
|
||||
last_fresh_count = self
|
||||
.fresh_writer_count_for_endpoints(generation, endpoints)
|
||||
.await;
|
||||
if last_fresh_count >= required {
|
||||
completed = true;
|
||||
info!(
|
||||
dc = *dc,
|
||||
pass = pass_idx + 1,
|
||||
total_passes,
|
||||
fresh_count = last_fresh_count,
|
||||
required,
|
||||
"ME hardswap warmup floor reached for DC"
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if pass_idx + 1 < total_passes {
|
||||
let backoff_ms = self.hardswap_warmup_backoff_ms(pass_idx);
|
||||
debug!(
|
||||
dc = *dc,
|
||||
pass = pass_idx + 1,
|
||||
total_passes,
|
||||
fresh_count = last_fresh_count,
|
||||
required,
|
||||
backoff_ms,
|
||||
"ME hardswap warmup pass incomplete, delaying next pass"
|
||||
);
|
||||
tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
|
||||
}
|
||||
}
|
||||
|
||||
if !completed {
|
||||
warn!(
|
||||
dc = *dc,
|
||||
fresh_count = last_fresh_count,
|
||||
required,
|
||||
endpoint_count = endpoint_list.len(),
|
||||
total_passes,
|
||||
"ME warmup stopped: unable to reach required writer floor for DC"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -364,7 +526,7 @@ impl MePool {
|
||||
) {
|
||||
let desired_by_dc = self.desired_dc_endpoints().await;
|
||||
if desired_by_dc.is_empty() {
|
||||
warn!("ME endpoint map is empty after update; skipping stale writer drain");
|
||||
warn!("ME endpoint map is empty; skipping stale writer drain");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -403,19 +565,26 @@ impl MePool {
|
||||
}
|
||||
|
||||
if hardswap {
|
||||
let fresh_writer_addrs: HashSet<SocketAddr> = writers
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| w.generation == generation)
|
||||
.map(|w| w.addr)
|
||||
.collect();
|
||||
let (fresh_ratio, fresh_missing_dc) =
|
||||
Self::coverage_ratio(&desired_by_dc, &fresh_writer_addrs);
|
||||
let mut fresh_missing_dc = Vec::<(i32, usize, usize)>::new();
|
||||
for (dc, endpoints) in &desired_by_dc {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let required = Self::required_writers_for_dc(endpoints.len());
|
||||
let fresh_count = writers
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| w.generation == generation)
|
||||
.filter(|w| endpoints.contains(&w.addr))
|
||||
.count();
|
||||
if fresh_count < required {
|
||||
fresh_missing_dc.push((*dc, fresh_count, required));
|
||||
}
|
||||
}
|
||||
if !fresh_missing_dc.is_empty() {
|
||||
warn!(
|
||||
previous_generation,
|
||||
generation,
|
||||
fresh_ratio = format_args!("{fresh_ratio:.3}"),
|
||||
missing_dc = ?fresh_missing_dc,
|
||||
"ME hardswap pending: fresh generation coverage incomplete"
|
||||
);
|
||||
@@ -425,7 +594,7 @@ impl MePool {
|
||||
warn!(
|
||||
missing_dc = ?missing_dc,
|
||||
// Keep stale writers alive when fresh coverage is incomplete.
|
||||
"ME reinit coverage incomplete after map update; keeping stale writers"
|
||||
"ME reinit coverage incomplete; keeping stale writers"
|
||||
);
|
||||
return;
|
||||
}
|
||||
@@ -450,7 +619,7 @@ impl MePool {
|
||||
drop(writers);
|
||||
|
||||
if stale_writer_ids.is_empty() {
|
||||
debug!("ME map update completed with no stale writers");
|
||||
debug!("ME reinit cycle completed with no stale writers");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -464,7 +633,7 @@ impl MePool {
|
||||
coverage_ratio = format_args!("{coverage_ratio:.3}"),
|
||||
min_ratio = format_args!("{min_ratio:.3}"),
|
||||
drain_timeout_secs,
|
||||
"ME map update covered; draining stale writers"
|
||||
"ME reinit cycle covered; draining stale writers"
|
||||
);
|
||||
self.stats.increment_pool_swap_total();
|
||||
for writer_id in stale_writer_ids {
|
||||
@@ -473,6 +642,140 @@ impl MePool {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn zero_downtime_reinit_periodic(
|
||||
self: &Arc<Self>,
|
||||
rng: &SecureRandom,
|
||||
) {
|
||||
self.zero_downtime_reinit_after_map_change(rng).await;
|
||||
}
|
||||
|
||||
async fn endpoints_for_same_dc(&self, addr: SocketAddr) -> Vec<SocketAddr> {
|
||||
let mut target_dc = HashSet::<i32>::new();
|
||||
let mut endpoints = HashSet::<SocketAddr>::new();
|
||||
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await.clone();
|
||||
for (dc, addrs) in &map {
|
||||
if addrs
|
||||
.iter()
|
||||
.any(|(ip, port)| SocketAddr::new(*ip, *port) == addr)
|
||||
{
|
||||
target_dc.insert(dc.abs());
|
||||
}
|
||||
}
|
||||
for dc in &target_dc {
|
||||
for key in [*dc, -*dc] {
|
||||
if let Some(addrs) = map.get(&key) {
|
||||
for (ip, port) in addrs {
|
||||
endpoints.insert(SocketAddr::new(*ip, *port));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await.clone();
|
||||
for (dc, addrs) in &map {
|
||||
if addrs
|
||||
.iter()
|
||||
.any(|(ip, port)| SocketAddr::new(*ip, *port) == addr)
|
||||
{
|
||||
target_dc.insert(dc.abs());
|
||||
}
|
||||
}
|
||||
for dc in &target_dc {
|
||||
for key in [*dc, -*dc] {
|
||||
if let Some(addrs) = map.get(&key) {
|
||||
for (ip, port) in addrs {
|
||||
endpoints.insert(SocketAddr::new(*ip, *port));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut sorted: Vec<SocketAddr> = endpoints.into_iter().collect();
|
||||
sorted.sort_unstable();
|
||||
sorted
|
||||
}
|
||||
|
||||
async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr) -> bool {
|
||||
let fast_retries = self.me_reconnect_fast_retry_count.max(1);
|
||||
|
||||
for attempt in 0..fast_retries {
|
||||
self.stats.increment_me_reconnect_attempt();
|
||||
match self.connect_one(addr, self.rng.as_ref()).await {
|
||||
Ok(()) => {
|
||||
self.stats.increment_me_reconnect_success();
|
||||
self.stats.increment_me_writer_restored_same_endpoint_total();
|
||||
info!(
|
||||
%addr,
|
||||
attempt = attempt + 1,
|
||||
"ME writer restored on the same endpoint"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(
|
||||
%addr,
|
||||
attempt = attempt + 1,
|
||||
error = %e,
|
||||
"ME immediate same-endpoint reconnect failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let dc_endpoints = self.endpoints_for_same_dc(addr).await;
|
||||
if dc_endpoints.is_empty() {
|
||||
self.stats.increment_me_refill_failed_total();
|
||||
return false;
|
||||
}
|
||||
|
||||
for attempt in 0..fast_retries {
|
||||
self.stats.increment_me_reconnect_attempt();
|
||||
if self
|
||||
.connect_endpoints_round_robin(&dc_endpoints, self.rng.as_ref())
|
||||
.await
|
||||
{
|
||||
self.stats.increment_me_reconnect_success();
|
||||
self.stats.increment_me_writer_restored_fallback_total();
|
||||
info!(
|
||||
%addr,
|
||||
attempt = attempt + 1,
|
||||
"ME writer restored via DC fallback endpoint"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
self.stats.increment_me_refill_failed_total();
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn trigger_immediate_refill(self: &Arc<Self>, addr: SocketAddr) {
|
||||
let pool = Arc::clone(self);
|
||||
tokio::spawn(async move {
|
||||
{
|
||||
let mut guard = pool.refill_inflight.lock().await;
|
||||
if !guard.insert(addr) {
|
||||
pool.stats.increment_me_refill_skipped_inflight_total();
|
||||
return;
|
||||
}
|
||||
}
|
||||
pool.stats.increment_me_refill_triggered_total();
|
||||
|
||||
let restored = pool.refill_writer_after_loss(addr).await;
|
||||
if !restored {
|
||||
warn!(%addr, "ME immediate refill failed");
|
||||
}
|
||||
|
||||
let mut guard = pool.refill_inflight.lock().await;
|
||||
guard.remove(&addr);
|
||||
});
|
||||
}
|
||||
|
||||
pub async fn update_proxy_maps(
|
||||
&self,
|
||||
new_v4: HashMap<i32, Vec<(IpAddr, u16)>>,
|
||||
@@ -880,16 +1183,25 @@ impl MePool {
|
||||
}
|
||||
}
|
||||
|
||||
async fn remove_writer_only(&self, writer_id: u64) -> Vec<BoundConn> {
|
||||
async fn remove_writer_only(self: &Arc<Self>, writer_id: u64) -> Vec<BoundConn> {
|
||||
let mut close_tx: Option<mpsc::Sender<WriterCommand>> = None;
|
||||
let mut removed_addr: Option<SocketAddr> = None;
|
||||
let mut trigger_refill = false;
|
||||
{
|
||||
let mut ws = self.writers.write().await;
|
||||
if let Some(pos) = ws.iter().position(|w| w.id == writer_id) {
|
||||
let w = ws.remove(pos);
|
||||
if w.draining.load(Ordering::Relaxed) {
|
||||
let was_draining = w.draining.load(Ordering::Relaxed);
|
||||
if was_draining {
|
||||
self.stats.decrement_pool_drain_active();
|
||||
}
|
||||
self.stats.increment_me_writer_removed_total();
|
||||
w.cancel.cancel();
|
||||
removed_addr = Some(w.addr);
|
||||
trigger_refill = !was_draining;
|
||||
if trigger_refill {
|
||||
self.stats.increment_me_writer_removed_unexpected_total();
|
||||
}
|
||||
close_tx = Some(w.tx.clone());
|
||||
self.conn_count.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
@@ -897,6 +1209,11 @@ impl MePool {
|
||||
if let Some(tx) = close_tx {
|
||||
let _ = tx.send(WriterCommand::Close).await;
|
||||
}
|
||||
if trigger_refill
|
||||
&& let Some(addr) = removed_addr
|
||||
{
|
||||
self.trigger_immediate_refill(addr);
|
||||
}
|
||||
self.rtt_stats.lock().await.remove(&writer_id);
|
||||
self.registry.writer_lost(writer_id).await
|
||||
}
|
||||
|
||||
@@ -1,50 +1,87 @@
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::sync::watch;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::crypto::SecureRandom;
|
||||
|
||||
use super::MePool;
|
||||
|
||||
/// Periodically refresh ME connections to avoid long-lived degradation.
|
||||
pub async fn me_rotation_task(pool: Arc<MePool>, rng: Arc<SecureRandom>, interval: Duration) {
|
||||
let interval = interval.max(Duration::from_secs(600));
|
||||
/// Periodically reinitialize ME generations and swap them after full warmup.
|
||||
pub async fn me_rotation_task(
|
||||
pool: Arc<MePool>,
|
||||
rng: Arc<SecureRandom>,
|
||||
mut config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
) {
|
||||
let mut interval_secs = config_rx
|
||||
.borrow()
|
||||
.general
|
||||
.effective_me_reinit_every_secs()
|
||||
.max(1);
|
||||
let mut interval = Duration::from_secs(interval_secs);
|
||||
let mut next_tick = tokio::time::Instant::now() + interval;
|
||||
|
||||
info!(interval_secs, "ME periodic reinit task started");
|
||||
|
||||
loop {
|
||||
tokio::time::sleep(interval).await;
|
||||
let sleep = tokio::time::sleep_until(next_tick);
|
||||
tokio::pin!(sleep);
|
||||
|
||||
let candidate = {
|
||||
let ws = pool.writers.read().await;
|
||||
if ws.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let idx = (pool.rr.load(std::sync::atomic::Ordering::Relaxed) as usize) % ws.len();
|
||||
ws.get(idx).cloned()
|
||||
}
|
||||
};
|
||||
|
||||
let Some(w) = candidate else {
|
||||
continue;
|
||||
};
|
||||
|
||||
info!(addr = %w.addr, writer_id = w.id, "Rotating ME connection");
|
||||
match pool.connect_one(w.addr, rng.as_ref()).await {
|
||||
Ok(()) => {
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
let ws = pool.writers.read().await;
|
||||
let new_alive = ws.iter().any(|nw|
|
||||
nw.id != w.id && nw.addr == w.addr && !nw.degraded.load(Ordering::Relaxed) && !nw.draining.load(Ordering::Relaxed)
|
||||
);
|
||||
drop(ws);
|
||||
if new_alive {
|
||||
pool.mark_writer_draining(w.id).await;
|
||||
} else {
|
||||
warn!(addr = %w.addr, writer_id = w.id, "New writer died, keeping old");
|
||||
tokio::select! {
|
||||
_ = &mut sleep => {
|
||||
pool.zero_downtime_reinit_periodic(rng.as_ref()).await;
|
||||
let refreshed_secs = config_rx
|
||||
.borrow()
|
||||
.general
|
||||
.effective_me_reinit_every_secs()
|
||||
.max(1);
|
||||
if refreshed_secs != interval_secs {
|
||||
info!(
|
||||
old_me_reinit_every_secs = interval_secs,
|
||||
new_me_reinit_every_secs = refreshed_secs,
|
||||
"ME periodic reinit interval changed"
|
||||
);
|
||||
interval_secs = refreshed_secs;
|
||||
interval = Duration::from_secs(interval_secs);
|
||||
}
|
||||
next_tick = tokio::time::Instant::now() + interval;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(addr = %w.addr, writer_id = w.id, error = %e, "ME rotation connect failed");
|
||||
changed = config_rx.changed() => {
|
||||
if changed.is_err() {
|
||||
warn!("ME periodic reinit task stopped: config channel closed");
|
||||
break;
|
||||
}
|
||||
let new_secs = config_rx
|
||||
.borrow()
|
||||
.general
|
||||
.effective_me_reinit_every_secs()
|
||||
.max(1);
|
||||
if new_secs == interval_secs {
|
||||
continue;
|
||||
}
|
||||
|
||||
if new_secs < interval_secs {
|
||||
info!(
|
||||
old_me_reinit_every_secs = interval_secs,
|
||||
new_me_reinit_every_secs = new_secs,
|
||||
"ME periodic reinit interval decreased, running immediate reinit"
|
||||
);
|
||||
interval_secs = new_secs;
|
||||
interval = Duration::from_secs(interval_secs);
|
||||
pool.zero_downtime_reinit_periodic(rng.as_ref()).await;
|
||||
next_tick = tokio::time::Instant::now() + interval;
|
||||
} else {
|
||||
info!(
|
||||
old_me_reinit_every_secs = interval_secs,
|
||||
new_me_reinit_every_secs = new_secs,
|
||||
"ME periodic reinit interval increased"
|
||||
);
|
||||
interval_secs = new_secs;
|
||||
interval = Duration::from_secs(interval_secs);
|
||||
next_tick = tokio::time::Instant::now() + interval;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user