mirror of
https://github.com/telemt/telemt.git
synced 2026-04-14 17:14:09 +03:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
286662fc51 | ||
|
|
c5390baaf1 | ||
|
|
1cd1e96079 | ||
|
|
2b995c31b0 | ||
|
|
442320302d | ||
|
|
ac0dde567b | ||
|
|
b2fe9b78d8 | ||
|
|
f039ce1827 | ||
|
|
5f5a3e3fa0 | ||
|
|
f9e54ee739 | ||
|
|
d477d6ee29 | ||
|
|
1383dfcbb1 | ||
|
|
107a7cc758 | ||
|
|
4f3193fdaa | ||
|
|
d6be691c67 | ||
|
|
0b0be07a9c |
@@ -2,6 +2,8 @@
|
||||
|
||||
   [](https://t.me/telemtrs)
|
||||
|
||||
[🇷🇺 README на русском](https://github.com/telemt/telemt/blob/main/README.ru.md)
|
||||
|
||||
***Löst Probleme, bevor andere überhaupt wissen, dass sie existieren*** / ***It solves problems before others even realize they exist***
|
||||
|
||||
> [!NOTE]
|
||||
@@ -25,6 +27,7 @@ curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh
|
||||
- [Quick Start Guide](docs/Quick_start/QUICK_START_GUIDE.en.md)
|
||||
- [Инструкция по быстрому запуску](docs/Quick_start/QUICK_START_GUIDE.ru.md)
|
||||
|
||||
## Features
|
||||
Our implementation of **TLS-fronting** is one of the most deeply debugged, focused, advanced and *almost* **"behaviorally consistent to real"**: we are confident we have it right - [see evidence on our validation and traces](docs/FAQ.en.md#recognizability-for-dpi-and-crawler)
|
||||
|
||||
Our ***Middle-End Pool*** is fastest by design in standard scenarios, compared to other implementations of connecting to the Middle-End Proxy: non dramatically, but usual
|
||||
|
||||
23
README.ru.md
23
README.ru.md
@@ -54,7 +54,6 @@ curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh
|
||||
- [FAQ EN](docs/FAQ.en.md)
|
||||
|
||||
## Сборка
|
||||
|
||||
```bash
|
||||
# Клонируйте репозиторий
|
||||
git clone https://github.com/telemt/telemt
|
||||
@@ -63,7 +62,6 @@ cd telemt
|
||||
# Начните процесс сборки
|
||||
cargo build --release
|
||||
|
||||
# Устройства с небольшим объёмом оперативной памяти (1 ГБ, например NanoPi Neo3 / Raspberry Pi Zero 2):
|
||||
# В текущем release-профиле используется lto = "fat" для максимальной оптимизации (см. Cargo.toml).
|
||||
# На системах с малым объёмом RAM (~1 ГБ) можно переопределить это значение на "thin".
|
||||
|
||||
@@ -87,4 +85,25 @@ telemt config.toml
|
||||
- Безопасность памяти;
|
||||
- Асинхронная архитектура Tokio.
|
||||
|
||||
## Поддержать Telemt
|
||||
|
||||
Telemt — это бесплатное программное обеспечение с открытым исходным кодом, разработанное в свободное время.
|
||||
Если оно оказалось вам полезным, вы можете поддержать дальнейшую разработку.
|
||||
|
||||
Принимаемые криптовалюты (BTC, ETH, USDT, 350+ и другие):
|
||||
|
||||
<p align="center">
|
||||
<a href="https://nowpayments.io/donation?api_key=2bf1afd2-abc2-49f9-a012-f1e715b37223" target="_blank" rel="noreferrer noopener">
|
||||
<img src="https://nowpayments.io/images/embeds/donation-button-white.svg" alt="Cryptocurrency & Bitcoin donation button by NOWPayments" height="80">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
Monero (XMR) напрямую:
|
||||
|
||||
```
|
||||
8Bk4tZEYPQWSypeD2hrUXG2rKbAKF16GqEN942ZdAP5cFdSqW6h4DwkP5cJMAdszzuPeHeHZPTyjWWFwzeFdjuci3ktfMoB
|
||||
```
|
||||
|
||||
Все пожертвования пойдут на инфраструктуру, разработку и исследования.
|
||||
|
||||

|
||||
|
||||
13
config.toml
13
config.toml
@@ -32,13 +32,13 @@ show = "*"
|
||||
port = 443
|
||||
# proxy_protocol = false # Enable if behind HAProxy/nginx with PROXY protocol
|
||||
# metrics_port = 9090
|
||||
# metrics_listen = "0.0.0.0:9090" # Listen address for metrics (overrides metrics_port)
|
||||
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
|
||||
# metrics_listen = "127.0.0.1:9090" # Listen address for metrics (overrides metrics_port)
|
||||
# metrics_whitelist = ["127.0.0.1/32", "::1/128"]
|
||||
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "0.0.0.0:9091"
|
||||
whitelist = ["127.0.0.0/8"]
|
||||
listen = "127.0.0.1:9091"
|
||||
whitelist = ["127.0.0.1/32", "::1/128"]
|
||||
minimal_runtime_enabled = false
|
||||
minimal_runtime_cache_ttl_ms = 1000
|
||||
|
||||
@@ -48,9 +48,12 @@ ip = "0.0.0.0"
|
||||
|
||||
# === Anti-Censorship & Masking ===
|
||||
[censorship]
|
||||
# Fake-TLS / SNI masking domain used in generated ee-links.
|
||||
# Changing tls_domain invalidates previously generated TLS links.
|
||||
tls_domain = "petrovich.ru"
|
||||
|
||||
mask = true
|
||||
tls_emulation = true # Fetch real cert lengths and emulate TLS records
|
||||
tls_emulation = true # Fetch real cert lengths and emulate TLS records
|
||||
tls_front_dir = "tlsfront" # Cache directory for TLS emulation
|
||||
|
||||
[access.users]
|
||||
|
||||
@@ -9,12 +9,12 @@ API runtime is configured in `[server.api]`.
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
| --- | --- | --- | --- |
|
||||
| `enabled` | `bool` | `false` | Enables REST API listener. |
|
||||
| `listen` | `string` (`IP:PORT`) | `127.0.0.1:9091` | API bind address. |
|
||||
| `whitelist` | `CIDR[]` | `127.0.0.1/32, ::1/128` | Source IP allowlist. Empty list means allow all. |
|
||||
| `enabled` | `bool` | `true` | Enables REST API listener. |
|
||||
| `listen` | `string` (`IP:PORT`) | `0.0.0.0:9091` | API bind address. |
|
||||
| `whitelist` | `CIDR[]` | `127.0.0.0/8` | Source IP allowlist. Empty list means allow all. |
|
||||
| `auth_header` | `string` | `""` | Exact value for `Authorization` header. Empty disables header auth. |
|
||||
| `request_body_limit_bytes` | `usize` | `65536` | Maximum request body size. Must be `> 0`. |
|
||||
| `minimal_runtime_enabled` | `bool` | `false` | Enables runtime snapshot endpoints requiring ME pool read-lock aggregation. |
|
||||
| `minimal_runtime_enabled` | `bool` | `true` | Enables runtime snapshot endpoints requiring ME pool read-lock aggregation. |
|
||||
| `minimal_runtime_cache_ttl_ms` | `u64` | `1000` | Cache TTL for minimal snapshots. `0` disables cache; valid range is `[0, 60000]`. |
|
||||
| `runtime_edge_enabled` | `bool` | `false` | Enables runtime edge endpoints with cached aggregation payloads. |
|
||||
| `runtime_edge_cache_ttl_ms` | `u64` | `1000` | Cache TTL for runtime edge summary payloads. `0` disables cache. |
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -27,7 +27,8 @@ cargo build --release
|
||||
./target/release/telemt --version
|
||||
```
|
||||
|
||||
For low-RAM systems, this repository already uses `lto = "thin"` in release profile.
|
||||
For low-RAM systems, note that this repository currently uses `lto = "fat"` in release profile.
|
||||
On constrained builders, a local override to `lto = "thin"` may be more practical.
|
||||
|
||||
## 3. Install binary and config
|
||||
|
||||
|
||||
@@ -1,9 +1,36 @@
|
||||
# Installation Options
|
||||
There are three options for installing Telemt:
|
||||
- [Automated installation using a script](#very-quick-start).
|
||||
- [Manual installation of Telemt as a service](#telemt-via-systemd).
|
||||
- [Installation using Docker Compose](#telemt-via-docker-compose).
|
||||
|
||||
# Very quick start
|
||||
|
||||
### One-command installation / update on re-run
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh
|
||||
```
|
||||
|
||||
After starting, the script will prompt for:
|
||||
- Your language (1 - English, 2 - Russian);
|
||||
- Your TLS domain (press Enter for petrovich.ru).
|
||||
|
||||
The script checks if the port (default **443**) is free. If the port is already in use, installation will fail. You need to free up the port or use the **-p** flag with a different port to retry the installation.
|
||||
|
||||
To modify the script’s startup parameters, you can use the following flags:
|
||||
- **-d, --domain** - TLS domain;
|
||||
- **-p, --port** - server port (1–65535);
|
||||
- **-s, --secret** - 32 hex secret;
|
||||
- **-a, --ad-tag** - ad_tag;
|
||||
- **-l, --lan**g - language (1/en or 2/ru);
|
||||
|
||||
Providing all options skips interactive prompts.
|
||||
|
||||
After completion, the script will provide a link for client connections:
|
||||
```bash
|
||||
tg://proxy?server=IP&port=PORT&secret=SECRET
|
||||
```
|
||||
|
||||
### Installing a specific version
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh -s -- 3.3.39
|
||||
@@ -110,15 +137,15 @@ show = "*"
|
||||
# === Server Binding ===
|
||||
[server]
|
||||
port = 443
|
||||
# proxy_protocol = false # Enable if behind HAProxy/nginx with PROXY protocol
|
||||
# proxy_protocol = false # Enable if behind HAProxy/nginx with PROXY protocol
|
||||
# metrics_port = 9090
|
||||
# metrics_listen = "0.0.0.0:9090" # Listen address for metrics (overrides metrics_port)
|
||||
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
|
||||
# metrics_listen = "127.0.0.1:9090" # Listen address for metrics (overrides metrics_port)
|
||||
# metrics_whitelist = ["127.0.0.1/32", "::1/128"]
|
||||
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "0.0.0.0:9091"
|
||||
whitelist = ["127.0.0.0/8"]
|
||||
listen = "127.0.0.1:9091"
|
||||
whitelist = ["127.0.0.1/32", "::1/128"]
|
||||
minimal_runtime_enabled = false
|
||||
minimal_runtime_cache_ttl_ms = 1000
|
||||
|
||||
@@ -128,9 +155,9 @@ ip = "0.0.0.0"
|
||||
|
||||
# === Anti-Censorship & Masking ===
|
||||
[censorship]
|
||||
tls_domain = "petrovich.ru"
|
||||
tls_domain = "petrovich.ru" # Fake-TLS / SNI masking domain used in generated ee-links
|
||||
mask = true
|
||||
tls_emulation = true # Fetch real cert lengths and emulate TLS records
|
||||
tls_emulation = true # Fetch real cert lengths and emulate TLS records
|
||||
tls_front_dir = "tlsfront" # Cache directory for TLS emulation
|
||||
|
||||
[access.users]
|
||||
@@ -141,9 +168,9 @@ hello = "00000000000000000000000000000000"
|
||||
then Ctrl+S -> Ctrl+X to save
|
||||
|
||||
> [!WARNING]
|
||||
> Replace the value of the hello parameter with the value you obtained in step 0.
|
||||
> Additionally, change the value of the tls_domain parameter to a different website.
|
||||
> Changing the tls_domain parameter will break all links that use the old domain!
|
||||
> Replace the value of the `hello` parameter with the value you obtained in step 0.
|
||||
> Additionally, change the value of the `tls_domain` parameter to a different website.
|
||||
> Changing the `tls_domain` parameter will break all links that use the old domain!
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,9 +1,35 @@
|
||||
# Варианты установки
|
||||
Имеется три варианта установки Telemt:
|
||||
- [Автоматизированная установка с помощью скрипта](#очень-быстрый-старт).
|
||||
- [Ручная установка Telemt в качестве службы](#telemt-через-systemd-вручную).
|
||||
- [Установка через Docker Compose](#telemt-через-docker-compose).
|
||||
|
||||
# Очень быстрый старт
|
||||
|
||||
### Установка одной командой / обновление при повторном запуске
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh
|
||||
```
|
||||
После запуска скрипт запросит:
|
||||
- ваш язык (1 - English, 2 - Русский);
|
||||
- ваш TLS-домен (нажмите Enter для petrovich.ru).
|
||||
|
||||
Во время установки скрипт проверяет, свободен ли порт (по умолчанию **443**). Если порт занят другим процессом - установка завершится с ошибкой. Для повторной установки необходимо освободить порт или указать другой через флаг **-p**.
|
||||
|
||||
Для изменения параметров запуска скрипта можно использовать следующие флаги:
|
||||
- **-d, --domain** - TLS-домен;
|
||||
- **-p, --port** - порт (1–65535);
|
||||
- **-s, --secret** - секрет (32 hex символа);
|
||||
- **-a, --ad-tag** - ad_tag;
|
||||
- **-l, --lang** - язык (1/en или 2/ru).
|
||||
|
||||
Если заданы флаги для языка и домена, интерактивных вопросов не будет.
|
||||
|
||||
После завершения установки скрипт выдаст ссылку для подключения клиентов:
|
||||
```bash
|
||||
tg://proxy?server=IP&port=PORT&secret=SECRET
|
||||
```
|
||||
|
||||
### Установка нужной версии
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh -s -- 3.3.39
|
||||
@@ -103,22 +129,22 @@ tls = true
|
||||
[general.links]
|
||||
show = "*"
|
||||
# show = ["alice", "bob"] # Показывать ссылки только для alice и bob
|
||||
# show = "*" # Показывать ссылки для всех пользователей
|
||||
# public_host = "proxy.example.com" # Хост (IP-адрес или домен) для ссылок tg://
|
||||
# public_port = 443 # Порт для ссылок tg:// (по умолчанию: server.port)
|
||||
# show = "*" # Показывать ссылки для всех пользователей
|
||||
# public_host = "proxy.example.com" # Хост (IP-адрес или домен) для ссылок tg://
|
||||
# public_port = 443 # Порт для ссылок tg:// (по умолчанию: server.port)
|
||||
|
||||
# === Привязка сервера ===
|
||||
[server]
|
||||
port = 443
|
||||
# proxy_protocol = false # Включите, если сервер находится за HAProxy/nginx с протоколом PROXY
|
||||
# proxy_protocol = false # Включите, если сервер находится за HAProxy/nginx с протоколом PROXY
|
||||
# metrics_port = 9090
|
||||
# metrics_listen = "0.0.0.0:9090" # Адрес прослушивания для метрик (переопределяет metrics_port)
|
||||
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
|
||||
# metrics_listen = "127.0.0.1:9090" # Адрес прослушивания для метрик (переопределяет metrics_port)
|
||||
# metrics_whitelist = ["127.0.0.1/32", "::1/128"]
|
||||
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "0.0.0.0:9091"
|
||||
whitelist = ["127.0.0.0/8"]
|
||||
listen = "127.0.0.1:9091"
|
||||
whitelist = ["127.0.0.1/32", "::1/128"]
|
||||
minimal_runtime_enabled = false
|
||||
minimal_runtime_cache_ttl_ms = 1000
|
||||
|
||||
@@ -128,9 +154,9 @@ ip = "0.0.0.0"
|
||||
|
||||
# === Обход блокировок и маскировка ===
|
||||
[censorship]
|
||||
tls_domain = "petrovich.ru"
|
||||
tls_domain = "petrovich.ru" # Домен Fake-TLS / SNI, который будет использоваться в сгенерированных ee-ссылках
|
||||
mask = true
|
||||
tls_emulation = true # Получить реальную длину сертификата и эмулировать запись TLS
|
||||
tls_emulation = true # Получить реальную длину сертификата и эмулировать запись TLS
|
||||
tls_front_dir = "tlsfront" # Директория кэша для эмуляции TLS
|
||||
|
||||
[access.users]
|
||||
@@ -141,9 +167,9 @@ hello = "00000000000000000000000000000000"
|
||||
Затем нажмите Ctrl+S -> Ctrl+X, чтобы сохранить
|
||||
|
||||
> [!WARNING]
|
||||
> Замените значение параметра hello на значение, которое вы получили в пункте 0.
|
||||
> Так же замените значение параметра tls_domain на другой сайт.
|
||||
> Изменение параметра tls_domain сделает нерабочими все ссылки, использующие старый домен!
|
||||
> Замените значение параметра `hello` на значение, которое вы получили в пункте 0.
|
||||
> Так же замените значение параметра `tls_domain` на другой сайт.
|
||||
> Изменение параметра `tls_domain` сделает нерабочими все ссылки, использующие старый домен!
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -615,26 +615,6 @@ pub(crate) fn default_mask_relay_max_bytes() -> usize {
|
||||
32 * 1024
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub(crate) fn default_mask_relay_timeout_ms() -> u64 {
|
||||
60_000
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn default_mask_relay_timeout_ms() -> u64 {
|
||||
200
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub(crate) fn default_mask_relay_idle_timeout_ms() -> u64 {
|
||||
5_000
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn default_mask_relay_idle_timeout_ms() -> u64 {
|
||||
100
|
||||
}
|
||||
|
||||
pub(crate) fn default_mask_classifier_prefetch_timeout_ms() -> u64 {
|
||||
5
|
||||
}
|
||||
|
||||
@@ -611,8 +611,6 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
|
||||
|| old.censorship.mask_shape_above_cap_blur_max_bytes
|
||||
!= new.censorship.mask_shape_above_cap_blur_max_bytes
|
||||
|| old.censorship.mask_relay_max_bytes != new.censorship.mask_relay_max_bytes
|
||||
|| old.censorship.mask_relay_timeout_ms != new.censorship.mask_relay_timeout_ms
|
||||
|| old.censorship.mask_relay_idle_timeout_ms != new.censorship.mask_relay_idle_timeout_ms
|
||||
|| old.censorship.mask_classifier_prefetch_timeout_ms
|
||||
!= new.censorship.mask_classifier_prefetch_timeout_ms
|
||||
|| old.censorship.mask_timing_normalization_enabled
|
||||
|
||||
@@ -1710,19 +1710,6 @@ pub struct AntiCensorshipConfig {
|
||||
#[serde(default = "default_mask_relay_max_bytes")]
|
||||
pub mask_relay_max_bytes: usize,
|
||||
|
||||
/// Wall-clock cap for the full masking relay on non-MTProto fallback paths.
|
||||
/// Raise when the mask target is a long-lived service (e.g. WebSocket).
|
||||
/// Default: 60 000 ms (60 s).
|
||||
#[serde(default = "default_mask_relay_timeout_ms")]
|
||||
pub mask_relay_timeout_ms: u64,
|
||||
|
||||
/// Per-read idle timeout on masking relay and drain paths.
|
||||
/// Limits resource consumption by slow-loris attacks and port scanners.
|
||||
/// A read call stalling beyond this is treated as an abandoned connection.
|
||||
/// Default: 5 000 ms (5 s).
|
||||
#[serde(default = "default_mask_relay_idle_timeout_ms")]
|
||||
pub mask_relay_idle_timeout_ms: u64,
|
||||
|
||||
/// Prefetch timeout (ms) for extending fragmented masking classifier window.
|
||||
#[serde(default = "default_mask_classifier_prefetch_timeout_ms")]
|
||||
pub mask_classifier_prefetch_timeout_ms: u64,
|
||||
@@ -1768,8 +1755,6 @@ impl Default for AntiCensorshipConfig {
|
||||
mask_shape_above_cap_blur: default_mask_shape_above_cap_blur(),
|
||||
mask_shape_above_cap_blur_max_bytes: default_mask_shape_above_cap_blur_max_bytes(),
|
||||
mask_relay_max_bytes: default_mask_relay_max_bytes(),
|
||||
mask_relay_timeout_ms: default_mask_relay_timeout_ms(),
|
||||
mask_relay_idle_timeout_ms: default_mask_relay_idle_timeout_ms(),
|
||||
mask_classifier_prefetch_timeout_ms: default_mask_classifier_prefetch_timeout_ms(),
|
||||
mask_timing_normalization_enabled: default_mask_timing_normalization_enabled(),
|
||||
mask_timing_normalization_floor_ms: default_mask_timing_normalization_floor_ms(),
|
||||
|
||||
@@ -28,10 +28,14 @@ use tracing::debug;
|
||||
const MASK_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
#[cfg(test)]
|
||||
const MASK_TIMEOUT: Duration = Duration::from_millis(50);
|
||||
/// Maximum duration for the entire masking relay under test (replaced by config at runtime).
|
||||
/// Maximum duration for the entire masking relay.
|
||||
/// Limits resource consumption from slow-loris attacks and port scanners.
|
||||
#[cfg(not(test))]
|
||||
const MASK_RELAY_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
#[cfg(test)]
|
||||
const MASK_RELAY_TIMEOUT: Duration = Duration::from_millis(200);
|
||||
/// Per-read idle timeout for masking relay and drain paths under test (replaced by config at runtime).
|
||||
#[cfg(not(test))]
|
||||
const MASK_RELAY_IDLE_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
#[cfg(test)]
|
||||
const MASK_RELAY_IDLE_TIMEOUT: Duration = Duration::from_millis(100);
|
||||
const MASK_BUFFER_SIZE: usize = 8192;
|
||||
@@ -51,7 +55,6 @@ async fn copy_with_idle_timeout<R, W>(
|
||||
writer: &mut W,
|
||||
byte_cap: usize,
|
||||
shutdown_on_eof: bool,
|
||||
idle_timeout: Duration,
|
||||
) -> CopyOutcome
|
||||
where
|
||||
R: AsyncRead + Unpin,
|
||||
@@ -75,7 +78,7 @@ where
|
||||
}
|
||||
|
||||
let read_len = remaining_budget.min(MASK_BUFFER_SIZE);
|
||||
let read_res = timeout(idle_timeout, reader.read(&mut buf[..read_len])).await;
|
||||
let read_res = timeout(MASK_RELAY_IDLE_TIMEOUT, reader.read(&mut buf[..read_len])).await;
|
||||
let n = match read_res {
|
||||
Ok(Ok(n)) => n,
|
||||
Ok(Err(_)) | Err(_) => break,
|
||||
@@ -83,13 +86,13 @@ where
|
||||
if n == 0 {
|
||||
ended_by_eof = true;
|
||||
if shutdown_on_eof {
|
||||
let _ = timeout(idle_timeout, writer.shutdown()).await;
|
||||
let _ = timeout(MASK_RELAY_IDLE_TIMEOUT, writer.shutdown()).await;
|
||||
}
|
||||
break;
|
||||
}
|
||||
total = total.saturating_add(n);
|
||||
|
||||
let write_res = timeout(idle_timeout, writer.write_all(&buf[..n])).await;
|
||||
let write_res = timeout(MASK_RELAY_IDLE_TIMEOUT, writer.write_all(&buf[..n])).await;
|
||||
match write_res {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(_)) | Err(_) => break,
|
||||
@@ -227,20 +230,13 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
async fn consume_client_data_with_timeout_and_cap<R>(
|
||||
reader: R,
|
||||
byte_cap: usize,
|
||||
relay_timeout: Duration,
|
||||
idle_timeout: Duration,
|
||||
) where
|
||||
async fn consume_client_data_with_timeout_and_cap<R>(reader: R, byte_cap: usize)
|
||||
where
|
||||
R: AsyncRead + Unpin,
|
||||
{
|
||||
if timeout(
|
||||
relay_timeout,
|
||||
consume_client_data(reader, byte_cap, idle_timeout),
|
||||
)
|
||||
.await
|
||||
.is_err()
|
||||
if timeout(MASK_RELAY_TIMEOUT, consume_client_data(reader, byte_cap))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
debug!("Timed out while consuming client data on masking fallback path");
|
||||
}
|
||||
@@ -643,18 +639,10 @@ pub async fn handle_bad_client<R, W>(
|
||||
beobachten.record(client_type, peer.ip(), ttl);
|
||||
}
|
||||
|
||||
let relay_timeout = Duration::from_millis(config.censorship.mask_relay_timeout_ms);
|
||||
let idle_timeout = Duration::from_millis(config.censorship.mask_relay_idle_timeout_ms);
|
||||
|
||||
if !config.censorship.mask {
|
||||
// Masking disabled, just consume data
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
config.censorship.mask_relay_max_bytes,
|
||||
relay_timeout,
|
||||
idle_timeout,
|
||||
)
|
||||
.await;
|
||||
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -686,7 +674,7 @@ pub async fn handle_bad_client<R, W>(
|
||||
return;
|
||||
}
|
||||
if timeout(
|
||||
relay_timeout,
|
||||
MASK_RELAY_TIMEOUT,
|
||||
relay_to_mask(
|
||||
reader,
|
||||
writer,
|
||||
@@ -700,7 +688,6 @@ pub async fn handle_bad_client<R, W>(
|
||||
config.censorship.mask_shape_above_cap_blur_max_bytes,
|
||||
config.censorship.mask_shape_hardening_aggressive_mode,
|
||||
config.censorship.mask_relay_max_bytes,
|
||||
idle_timeout,
|
||||
),
|
||||
)
|
||||
.await
|
||||
@@ -716,8 +703,6 @@ pub async fn handle_bad_client<R, W>(
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
config.censorship.mask_relay_max_bytes,
|
||||
relay_timeout,
|
||||
idle_timeout,
|
||||
)
|
||||
.await;
|
||||
wait_mask_outcome_budget(outcome_started, config).await;
|
||||
@@ -727,8 +712,6 @@ pub async fn handle_bad_client<R, W>(
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
config.censorship.mask_relay_max_bytes,
|
||||
relay_timeout,
|
||||
idle_timeout,
|
||||
)
|
||||
.await;
|
||||
wait_mask_outcome_budget(outcome_started, config).await;
|
||||
@@ -759,13 +742,8 @@ pub async fn handle_bad_client<R, W>(
|
||||
local = %local_addr,
|
||||
"Mask target resolves to local listener; refusing self-referential masking fallback"
|
||||
);
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
config.censorship.mask_relay_max_bytes,
|
||||
relay_timeout,
|
||||
idle_timeout,
|
||||
)
|
||||
.await;
|
||||
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes)
|
||||
.await;
|
||||
wait_mask_outcome_budget(outcome_started, config).await;
|
||||
return;
|
||||
}
|
||||
@@ -799,7 +777,7 @@ pub async fn handle_bad_client<R, W>(
|
||||
return;
|
||||
}
|
||||
if timeout(
|
||||
relay_timeout,
|
||||
MASK_RELAY_TIMEOUT,
|
||||
relay_to_mask(
|
||||
reader,
|
||||
writer,
|
||||
@@ -813,7 +791,6 @@ pub async fn handle_bad_client<R, W>(
|
||||
config.censorship.mask_shape_above_cap_blur_max_bytes,
|
||||
config.censorship.mask_shape_hardening_aggressive_mode,
|
||||
config.censorship.mask_relay_max_bytes,
|
||||
idle_timeout,
|
||||
),
|
||||
)
|
||||
.await
|
||||
@@ -829,8 +806,6 @@ pub async fn handle_bad_client<R, W>(
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
config.censorship.mask_relay_max_bytes,
|
||||
relay_timeout,
|
||||
idle_timeout,
|
||||
)
|
||||
.await;
|
||||
wait_mask_outcome_budget(outcome_started, config).await;
|
||||
@@ -840,8 +815,6 @@ pub async fn handle_bad_client<R, W>(
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
config.censorship.mask_relay_max_bytes,
|
||||
relay_timeout,
|
||||
idle_timeout,
|
||||
)
|
||||
.await;
|
||||
wait_mask_outcome_budget(outcome_started, config).await;
|
||||
@@ -863,7 +836,6 @@ async fn relay_to_mask<R, W, MR, MW>(
|
||||
shape_above_cap_blur_max_bytes: usize,
|
||||
shape_hardening_aggressive_mode: bool,
|
||||
mask_relay_max_bytes: usize,
|
||||
idle_timeout: Duration,
|
||||
) where
|
||||
R: AsyncRead + Unpin + Send + 'static,
|
||||
W: AsyncWrite + Unpin + Send + 'static,
|
||||
@@ -885,19 +857,11 @@ async fn relay_to_mask<R, W, MR, MW>(
|
||||
&mut mask_write,
|
||||
mask_relay_max_bytes,
|
||||
!shape_hardening_enabled,
|
||||
idle_timeout,
|
||||
)
|
||||
.await
|
||||
},
|
||||
async {
|
||||
copy_with_idle_timeout(
|
||||
&mut mask_read,
|
||||
&mut writer,
|
||||
mask_relay_max_bytes,
|
||||
true,
|
||||
idle_timeout,
|
||||
)
|
||||
.await
|
||||
copy_with_idle_timeout(&mut mask_read, &mut writer, mask_relay_max_bytes, true).await
|
||||
}
|
||||
);
|
||||
|
||||
@@ -925,11 +889,7 @@ async fn relay_to_mask<R, W, MR, MW>(
|
||||
}
|
||||
|
||||
/// Just consume all data from client without responding.
|
||||
async fn consume_client_data<R: AsyncRead + Unpin>(
|
||||
mut reader: R,
|
||||
byte_cap: usize,
|
||||
idle_timeout: Duration,
|
||||
) {
|
||||
async fn consume_client_data<R: AsyncRead + Unpin>(mut reader: R, byte_cap: usize) {
|
||||
if byte_cap == 0 {
|
||||
return;
|
||||
}
|
||||
@@ -945,7 +905,7 @@ async fn consume_client_data<R: AsyncRead + Unpin>(
|
||||
}
|
||||
|
||||
let read_len = remaining_budget.min(MASK_BUFFER_SIZE);
|
||||
let n = match timeout(idle_timeout, reader.read(&mut buf[..read_len])).await {
|
||||
let n = match timeout(MASK_RELAY_IDLE_TIMEOUT, reader.read(&mut buf[..read_len])).await {
|
||||
Ok(Ok(n)) => n,
|
||||
Ok(Err(_)) | Err(_) => break,
|
||||
};
|
||||
|
||||
@@ -47,7 +47,7 @@ async fn consume_client_data_stops_after_byte_cap_without_eof() {
|
||||
};
|
||||
let cap = 10_000usize;
|
||||
|
||||
consume_client_data(reader, cap, MASK_RELAY_IDLE_TIMEOUT).await;
|
||||
consume_client_data(reader, cap).await;
|
||||
|
||||
let total = produced.load(Ordering::Relaxed);
|
||||
assert!(
|
||||
|
||||
@@ -31,7 +31,7 @@ async fn stalling_client_terminates_at_idle_not_relay_timeout() {
|
||||
|
||||
let result = tokio::time::timeout(
|
||||
MASK_RELAY_TIMEOUT,
|
||||
consume_client_data(reader, MASK_BUFFER_SIZE * 4, MASK_RELAY_IDLE_TIMEOUT),
|
||||
consume_client_data(reader, MASK_BUFFER_SIZE * 4),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -57,12 +57,9 @@ async fn fast_reader_drains_to_eof() {
|
||||
let data = vec![0xAAu8; 32 * 1024];
|
||||
let reader = std::io::Cursor::new(data);
|
||||
|
||||
tokio::time::timeout(
|
||||
MASK_RELAY_TIMEOUT,
|
||||
consume_client_data(reader, usize::MAX, MASK_RELAY_IDLE_TIMEOUT),
|
||||
)
|
||||
.await
|
||||
.expect("consume_client_data did not complete for fast EOF reader");
|
||||
tokio::time::timeout(MASK_RELAY_TIMEOUT, consume_client_data(reader, usize::MAX))
|
||||
.await
|
||||
.expect("consume_client_data did not complete for fast EOF reader");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -84,7 +81,7 @@ async fn io_error_terminates_cleanly() {
|
||||
|
||||
tokio::time::timeout(
|
||||
MASK_RELAY_TIMEOUT,
|
||||
consume_client_data(ErrReader, usize::MAX, MASK_RELAY_IDLE_TIMEOUT),
|
||||
consume_client_data(ErrReader, usize::MAX),
|
||||
)
|
||||
.await
|
||||
.expect("consume_client_data did not return on I/O error");
|
||||
|
||||
@@ -34,11 +34,7 @@ async fn consume_stall_stress_finishes_within_idle_budget() {
|
||||
set.spawn(async {
|
||||
tokio::time::timeout(
|
||||
MASK_RELAY_TIMEOUT,
|
||||
consume_client_data(
|
||||
OneByteThenStall { sent: false },
|
||||
usize::MAX,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
),
|
||||
consume_client_data(OneByteThenStall { sent: false }, usize::MAX),
|
||||
)
|
||||
.await
|
||||
.expect("consume_client_data exceeded relay timeout under stall load");
|
||||
@@ -60,7 +56,7 @@ async fn consume_stall_stress_finishes_within_idle_budget() {
|
||||
#[tokio::test]
|
||||
async fn consume_zero_cap_returns_immediately() {
|
||||
let started = Instant::now();
|
||||
consume_client_data(tokio::io::empty(), 0, MASK_RELAY_IDLE_TIMEOUT).await;
|
||||
consume_client_data(tokio::io::empty(), 0).await;
|
||||
assert!(
|
||||
started.elapsed() < MASK_RELAY_IDLE_TIMEOUT,
|
||||
"zero byte cap must return immediately"
|
||||
|
||||
@@ -127,14 +127,7 @@ async fn positive_copy_with_production_cap_stops_exactly_at_budget() {
|
||||
let mut reader = FinitePatternReader::new(PROD_CAP_BYTES + (256 * 1024), 4096, read_calls);
|
||||
let mut writer = CountingWriter::default();
|
||||
|
||||
let outcome = copy_with_idle_timeout(
|
||||
&mut reader,
|
||||
&mut writer,
|
||||
PROD_CAP_BYTES,
|
||||
true,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
let outcome = copy_with_idle_timeout(&mut reader, &mut writer, PROD_CAP_BYTES, true).await;
|
||||
|
||||
assert_eq!(
|
||||
outcome.total, PROD_CAP_BYTES,
|
||||
@@ -152,13 +145,7 @@ async fn negative_consume_with_zero_cap_performs_no_reads() {
|
||||
let read_calls = Arc::new(AtomicUsize::new(0));
|
||||
let reader = FinitePatternReader::new(1024, 64, Arc::clone(&read_calls));
|
||||
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
0,
|
||||
MASK_RELAY_TIMEOUT,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
consume_client_data_with_timeout_and_cap(reader, 0).await;
|
||||
|
||||
assert_eq!(
|
||||
read_calls.load(Ordering::Relaxed),
|
||||
@@ -174,14 +161,7 @@ async fn edge_copy_below_cap_reports_eof_without_overread() {
|
||||
let mut reader = FinitePatternReader::new(payload, 3072, read_calls);
|
||||
let mut writer = CountingWriter::default();
|
||||
|
||||
let outcome = copy_with_idle_timeout(
|
||||
&mut reader,
|
||||
&mut writer,
|
||||
PROD_CAP_BYTES,
|
||||
true,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
let outcome = copy_with_idle_timeout(&mut reader, &mut writer, PROD_CAP_BYTES, true).await;
|
||||
|
||||
assert_eq!(outcome.total, payload);
|
||||
assert_eq!(writer.written, payload);
|
||||
@@ -195,13 +175,7 @@ async fn edge_copy_below_cap_reports_eof_without_overread() {
|
||||
async fn adversarial_blackhat_never_ready_reader_is_bounded_by_timeout_guards() {
|
||||
let started = Instant::now();
|
||||
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
NeverReadyReader,
|
||||
PROD_CAP_BYTES,
|
||||
MASK_RELAY_TIMEOUT,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
consume_client_data_with_timeout_and_cap(NeverReadyReader, PROD_CAP_BYTES).await;
|
||||
|
||||
assert!(
|
||||
started.elapsed() < Duration::from_millis(350),
|
||||
@@ -216,12 +190,7 @@ async fn integration_consume_path_honors_production_cap_for_large_payload() {
|
||||
|
||||
let bounded = timeout(
|
||||
Duration::from_millis(350),
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
PROD_CAP_BYTES,
|
||||
MASK_RELAY_TIMEOUT,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
),
|
||||
consume_client_data_with_timeout_and_cap(reader, PROD_CAP_BYTES),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -237,13 +206,7 @@ async fn adversarial_consume_path_never_reads_beyond_declared_byte_cap() {
|
||||
let total_read = Arc::new(AtomicUsize::new(0));
|
||||
let reader = BudgetProbeReader::new(256 * 1024, Arc::clone(&total_read));
|
||||
|
||||
consume_client_data_with_timeout_and_cap(
|
||||
reader,
|
||||
byte_cap,
|
||||
MASK_RELAY_TIMEOUT,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
consume_client_data_with_timeout_and_cap(reader, byte_cap).await;
|
||||
|
||||
assert!(
|
||||
total_read.load(Ordering::Relaxed) <= byte_cap,
|
||||
@@ -268,9 +231,7 @@ async fn light_fuzz_cap_and_payload_matrix_preserves_min_budget_invariant() {
|
||||
let mut reader = FinitePatternReader::new(payload, chunk, read_calls);
|
||||
let mut writer = CountingWriter::default();
|
||||
|
||||
let outcome =
|
||||
copy_with_idle_timeout(&mut reader, &mut writer, cap, true, MASK_RELAY_IDLE_TIMEOUT)
|
||||
.await;
|
||||
let outcome = copy_with_idle_timeout(&mut reader, &mut writer, cap, true).await;
|
||||
let expected = payload.min(cap);
|
||||
|
||||
assert_eq!(
|
||||
@@ -300,14 +261,7 @@ async fn stress_parallel_copy_tasks_with_production_cap_complete_without_leaks()
|
||||
read_calls,
|
||||
);
|
||||
let mut writer = CountingWriter::default();
|
||||
copy_with_idle_timeout(
|
||||
&mut reader,
|
||||
&mut writer,
|
||||
PROD_CAP_BYTES,
|
||||
true,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await
|
||||
copy_with_idle_timeout(&mut reader, &mut writer, PROD_CAP_BYTES, true).await
|
||||
}));
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,6 @@ async fn relay_to_mask_enforces_masking_session_byte_cap() {
|
||||
0,
|
||||
false,
|
||||
32 * 1024,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
@@ -82,7 +81,6 @@ async fn relay_to_mask_propagates_client_half_close_without_waiting_for_other_di
|
||||
0,
|
||||
false,
|
||||
32 * 1024,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
@@ -1377,7 +1377,6 @@ async fn relay_to_mask_keeps_backend_to_client_flow_when_client_to_backend_stall
|
||||
0,
|
||||
false,
|
||||
5 * 1024 * 1024,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
@@ -1509,7 +1508,6 @@ async fn relay_to_mask_timeout_cancels_and_drops_all_io_endpoints() {
|
||||
0,
|
||||
false,
|
||||
5 * 1024 * 1024,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -228,7 +228,6 @@ async fn relay_path_idle_timeout_eviction_remains_effective() {
|
||||
0,
|
||||
false,
|
||||
5 * 1024 * 1024,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
|
||||
@@ -44,7 +44,6 @@ async fn run_relay_case(
|
||||
above_cap_blur_max_bytes,
|
||||
false,
|
||||
5 * 1024 * 1024,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
@@ -89,7 +89,6 @@ async fn relay_to_mask_applies_cap_clamped_padding_for_non_power_of_two_cap() {
|
||||
0,
|
||||
false,
|
||||
5 * 1024 * 1024,
|
||||
MASK_RELAY_IDLE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
@@ -67,8 +67,10 @@ struct FamilyReconnectOutcome {
|
||||
key: (i32, IpFamily),
|
||||
dc: i32,
|
||||
family: IpFamily,
|
||||
alive: usize,
|
||||
required: usize,
|
||||
endpoint_count: usize,
|
||||
restored: usize,
|
||||
}
|
||||
|
||||
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
|
||||
@@ -80,6 +82,8 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
let mut single_endpoint_outage: HashSet<(i32, IpFamily)> = HashSet::new();
|
||||
let mut shadow_rotate_deadline: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut idle_refresh_next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut adaptive_idle_since: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut adaptive_recover_until: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut floor_warn_next_allowed: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut drain_warn_next_allowed: HashMap<u64, Instant> = HashMap::new();
|
||||
let mut degraded_interval = true;
|
||||
@@ -105,6 +109,8 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
&mut single_endpoint_outage,
|
||||
&mut shadow_rotate_deadline,
|
||||
&mut idle_refresh_next_attempt,
|
||||
&mut adaptive_idle_since,
|
||||
&mut adaptive_recover_until,
|
||||
&mut floor_warn_next_allowed,
|
||||
)
|
||||
.await;
|
||||
@@ -120,6 +126,8 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
&mut single_endpoint_outage,
|
||||
&mut shadow_rotate_deadline,
|
||||
&mut idle_refresh_next_attempt,
|
||||
&mut adaptive_idle_since,
|
||||
&mut adaptive_recover_until,
|
||||
&mut floor_warn_next_allowed,
|
||||
)
|
||||
.await;
|
||||
@@ -352,6 +360,8 @@ async fn check_family(
|
||||
single_endpoint_outage: &mut HashSet<(i32, IpFamily)>,
|
||||
shadow_rotate_deadline: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
floor_warn_next_allowed: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) -> bool {
|
||||
let enabled = match family {
|
||||
@@ -383,7 +393,10 @@ async fn check_family(
|
||||
let reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len());
|
||||
let reconnect_sem = Arc::new(Semaphore::new(reconnect_budget));
|
||||
|
||||
if pool.floor_mode() == MeFloorMode::Static {}
|
||||
if pool.floor_mode() == MeFloorMode::Static {
|
||||
adaptive_idle_since.clear();
|
||||
adaptive_recover_until.clear();
|
||||
}
|
||||
|
||||
let mut live_addr_counts = HashMap::<(i32, SocketAddr), usize>::new();
|
||||
let mut live_writer_ids_by_addr = HashMap::<(i32, SocketAddr), Vec<u64>>::new();
|
||||
@@ -422,6 +435,8 @@ async fn check_family(
|
||||
&live_addr_counts,
|
||||
&live_writer_ids_by_addr,
|
||||
&bound_clients_by_writer,
|
||||
adaptive_idle_since,
|
||||
adaptive_recover_until,
|
||||
)
|
||||
.await;
|
||||
pool.set_adaptive_floor_runtime_caps(
|
||||
@@ -488,6 +503,8 @@ async fn check_family(
|
||||
outage_next_attempt.remove(&key);
|
||||
shadow_rotate_deadline.remove(&key);
|
||||
idle_refresh_next_attempt.remove(&key);
|
||||
adaptive_idle_since.remove(&key);
|
||||
adaptive_recover_until.remove(&key);
|
||||
info!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
@@ -615,28 +632,22 @@ async fn check_family(
|
||||
restored += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let base_req = pool_for_reconnect
|
||||
.required_writers_for_dc_with_floor_mode(endpoints_for_dc.len(), false);
|
||||
if alive + restored >= base_req {
|
||||
pool_for_reconnect
|
||||
.stats
|
||||
.increment_me_floor_cap_block_total();
|
||||
pool_for_reconnect
|
||||
.stats
|
||||
.increment_me_floor_swap_idle_failed_total();
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive,
|
||||
required,
|
||||
active_cap_effective_total,
|
||||
"Adaptive floor cap reached, reconnect attempt blocked"
|
||||
);
|
||||
break;
|
||||
}
|
||||
pool_for_reconnect
|
||||
.stats
|
||||
.increment_me_floor_cap_block_total();
|
||||
pool_for_reconnect
|
||||
.stats
|
||||
.increment_me_floor_swap_idle_failed_total();
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive,
|
||||
required,
|
||||
active_cap_effective_total,
|
||||
"Adaptive floor cap reached, reconnect attempt blocked"
|
||||
);
|
||||
break;
|
||||
}
|
||||
pool_for_reconnect.stats.increment_me_reconnect_attempt();
|
||||
let res = tokio::time::timeout(
|
||||
pool_for_reconnect.reconnect_runtime.me_one_timeout,
|
||||
pool_for_reconnect.connect_endpoints_round_robin(
|
||||
@@ -652,9 +663,11 @@ async fn check_family(
|
||||
pool_for_reconnect.stats.increment_me_reconnect_success();
|
||||
}
|
||||
Ok(false) => {
|
||||
pool_for_reconnect.stats.increment_me_reconnect_attempt();
|
||||
debug!(dc = %dc, ?family, "ME round-robin reconnect failed")
|
||||
}
|
||||
Err(_) => {
|
||||
pool_for_reconnect.stats.increment_me_reconnect_attempt();
|
||||
debug!(dc = %dc, ?family, "ME reconnect timed out");
|
||||
}
|
||||
}
|
||||
@@ -665,8 +678,10 @@ async fn check_family(
|
||||
key,
|
||||
dc,
|
||||
family,
|
||||
alive,
|
||||
required,
|
||||
endpoint_count: endpoints_for_dc.len(),
|
||||
restored,
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -680,7 +695,7 @@ async fn check_family(
|
||||
}
|
||||
};
|
||||
let now = Instant::now();
|
||||
let now_alive = live_active_writers_for_dc_family(pool, outcome.dc, outcome.family).await;
|
||||
let now_alive = outcome.alive + outcome.restored;
|
||||
if now_alive >= outcome.required {
|
||||
info!(
|
||||
dc = %outcome.dc,
|
||||
@@ -836,33 +851,6 @@ fn should_emit_rate_limited_warn(
|
||||
false
|
||||
}
|
||||
|
||||
async fn live_active_writers_for_dc_family(pool: &Arc<MePool>, dc: i32, family: IpFamily) -> usize {
|
||||
let writers = pool.writers.read().await;
|
||||
writers
|
||||
.iter()
|
||||
.filter(|writer| {
|
||||
if writer.draining.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
return false;
|
||||
}
|
||||
if writer.writer_dc != dc {
|
||||
return false;
|
||||
}
|
||||
if !matches!(
|
||||
super::pool::WriterContour::from_u8(
|
||||
writer.contour.load(std::sync::atomic::Ordering::Relaxed),
|
||||
),
|
||||
super::pool::WriterContour::Active
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
match family {
|
||||
IpFamily::V4 => writer.addr.is_ipv4(),
|
||||
IpFamily::V6 => writer.addr.is_ipv6(),
|
||||
}
|
||||
})
|
||||
.count()
|
||||
}
|
||||
|
||||
fn adaptive_floor_class_min(
|
||||
pool: &Arc<MePool>,
|
||||
endpoint_count: usize,
|
||||
@@ -916,6 +904,8 @@ async fn build_family_floor_plan(
|
||||
live_addr_counts: &HashMap<(i32, SocketAddr), usize>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) -> FamilyFloorPlan {
|
||||
let mut entries = Vec::<DcFloorPlanEntry>::new();
|
||||
let mut by_dc = HashMap::<i32, DcFloorPlanEntry>::new();
|
||||
@@ -931,7 +921,18 @@ async fn build_family_floor_plan(
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let _key = (*dc, family);
|
||||
let key = (*dc, family);
|
||||
let reduce_for_idle = should_reduce_floor_for_idle(
|
||||
pool,
|
||||
key,
|
||||
*dc,
|
||||
endpoints,
|
||||
live_writer_ids_by_addr,
|
||||
bound_clients_by_writer,
|
||||
adaptive_idle_since,
|
||||
adaptive_recover_until,
|
||||
)
|
||||
.await;
|
||||
let base_required = pool.required_writers_for_dc(endpoints.len()).max(1);
|
||||
let min_required = if is_adaptive {
|
||||
adaptive_floor_class_min(pool, endpoints.len(), base_required)
|
||||
@@ -946,11 +947,11 @@ async fn build_family_floor_plan(
|
||||
if max_required < min_required {
|
||||
max_required = min_required;
|
||||
}
|
||||
// We initialize target_required at base_required to prevent 0-writer blackouts
|
||||
// caused by proactively dropping an idle DC to a single fragile connection.
|
||||
// The Adaptive Floor constraint loop below will gracefully compress idle DCs
|
||||
// (prioritized via has_bound_clients = false) to min_required only when global capacity is reached.
|
||||
let desired_raw = base_required;
|
||||
let desired_raw = if is_adaptive && reduce_for_idle {
|
||||
min_required
|
||||
} else {
|
||||
base_required
|
||||
};
|
||||
let target_required = desired_raw.clamp(min_required, max_required);
|
||||
let alive = endpoints
|
||||
.iter()
|
||||
@@ -1277,6 +1278,43 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
);
|
||||
}
|
||||
|
||||
async fn should_reduce_floor_for_idle(
|
||||
pool: &Arc<MePool>,
|
||||
key: (i32, IpFamily),
|
||||
dc: i32,
|
||||
endpoints: &[SocketAddr],
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) -> bool {
|
||||
if pool.floor_mode() != MeFloorMode::Adaptive {
|
||||
adaptive_idle_since.remove(&key);
|
||||
adaptive_recover_until.remove(&key);
|
||||
return false;
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
let writer_ids = list_writer_ids_for_endpoints(dc, endpoints, live_writer_ids_by_addr);
|
||||
let has_bound_clients = has_bound_clients_on_endpoint(&writer_ids, bound_clients_by_writer);
|
||||
if has_bound_clients {
|
||||
adaptive_idle_since.remove(&key);
|
||||
adaptive_recover_until.insert(key, now + pool.adaptive_floor_recover_grace_duration());
|
||||
return false;
|
||||
}
|
||||
|
||||
if let Some(recover_until) = adaptive_recover_until.get(&key)
|
||||
&& now < *recover_until
|
||||
{
|
||||
adaptive_idle_since.remove(&key);
|
||||
return false;
|
||||
}
|
||||
adaptive_recover_until.remove(&key);
|
||||
|
||||
let idle_since = adaptive_idle_since.entry(key).or_insert(now);
|
||||
now.saturating_duration_since(*idle_since) >= pool.adaptive_floor_idle_duration()
|
||||
}
|
||||
|
||||
fn has_bound_clients_on_endpoint(
|
||||
writer_ids: &[u64],
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
@@ -1326,7 +1364,6 @@ async fn recover_single_endpoint_outage(
|
||||
);
|
||||
return;
|
||||
};
|
||||
pool.stats.increment_me_reconnect_attempt();
|
||||
pool.stats
|
||||
.increment_me_single_endpoint_outage_reconnect_attempt_total();
|
||||
|
||||
@@ -1402,6 +1439,7 @@ async fn recover_single_endpoint_outage(
|
||||
return;
|
||||
}
|
||||
|
||||
pool.stats.increment_me_reconnect_attempt();
|
||||
let current_ms = *outage_backoff.get(&key).unwrap_or(&min_backoff_ms);
|
||||
let next_ms = current_ms.saturating_mul(2).min(max_backoff_ms);
|
||||
outage_backoff.insert(key, next_ms);
|
||||
|
||||
@@ -1422,6 +1422,22 @@ impl MePool {
|
||||
MeFloorMode::from_u8(self.floor_runtime.me_floor_mode.load(Ordering::Relaxed))
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_idle_duration(&self) -> Duration {
|
||||
Duration::from_secs(
|
||||
self.floor_runtime
|
||||
.me_adaptive_floor_idle_secs
|
||||
.load(Ordering::Relaxed),
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_recover_grace_duration(&self) -> Duration {
|
||||
Duration::from_secs(
|
||||
self.floor_runtime
|
||||
.me_adaptive_floor_recover_grace_secs
|
||||
.load(Ordering::Relaxed),
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_min_writers_multi_endpoint(&self) -> usize {
|
||||
(self
|
||||
.floor_runtime
|
||||
@@ -1643,7 +1659,6 @@ impl MePool {
|
||||
&self,
|
||||
contour: WriterContour,
|
||||
allow_coverage_override: bool,
|
||||
writer_dc: i32,
|
||||
) -> bool {
|
||||
let (active_writers, warm_writers, _) = self.non_draining_writer_counts_by_contour().await;
|
||||
match contour {
|
||||
@@ -1655,43 +1670,6 @@ impl MePool {
|
||||
if !allow_coverage_override {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut endpoints_len = 0;
|
||||
let now_epoch = Self::now_epoch_secs();
|
||||
if self.family_enabled_for_drain_coverage(IpFamily::V4, now_epoch) {
|
||||
if let Some(addrs) = self.proxy_map_v4.read().await.get(&writer_dc) {
|
||||
endpoints_len += addrs.len();
|
||||
}
|
||||
}
|
||||
if self.family_enabled_for_drain_coverage(IpFamily::V6, now_epoch) {
|
||||
if let Some(addrs) = self.proxy_map_v6.read().await.get(&writer_dc) {
|
||||
endpoints_len += addrs.len();
|
||||
}
|
||||
}
|
||||
|
||||
if endpoints_len > 0 {
|
||||
let base_req =
|
||||
self.required_writers_for_dc_with_floor_mode(endpoints_len, false);
|
||||
let active_for_dc = {
|
||||
let ws = self.writers.read().await;
|
||||
ws.iter()
|
||||
.filter(|w| {
|
||||
!w.draining.load(std::sync::atomic::Ordering::Relaxed)
|
||||
&& w.writer_dc == writer_dc
|
||||
&& matches!(
|
||||
WriterContour::from_u8(
|
||||
w.contour.load(std::sync::atomic::Ordering::Relaxed),
|
||||
),
|
||||
WriterContour::Active
|
||||
)
|
||||
})
|
||||
.count()
|
||||
};
|
||||
if active_for_dc < base_req {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
let coverage_required = self.active_coverage_required_total().await;
|
||||
active_writers < coverage_required
|
||||
}
|
||||
|
||||
@@ -77,12 +77,6 @@ impl MePool {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
if endpoints.len() == 1 && self.single_endpoint_outage_disable_quarantine() {
|
||||
let mut guard = self.endpoint_quarantine.lock().await;
|
||||
guard.retain(|_, expiry| *expiry > Instant::now());
|
||||
return endpoints.to_vec();
|
||||
}
|
||||
|
||||
let mut guard = self.endpoint_quarantine.lock().await;
|
||||
let now = Instant::now();
|
||||
guard.retain(|_, expiry| *expiry > now);
|
||||
@@ -242,18 +236,8 @@ impl MePool {
|
||||
let fast_retries = self.reconnect_runtime.me_reconnect_fast_retry_count.max(1);
|
||||
let mut total_attempts = 0u32;
|
||||
let same_endpoint_quarantined = self.is_endpoint_quarantined(addr).await;
|
||||
let dc_endpoints = self.endpoints_for_dc(writer_dc).await;
|
||||
let single_endpoint_dc = dc_endpoints.len() == 1 && dc_endpoints[0] == addr;
|
||||
let bypass_quarantine_for_single_endpoint =
|
||||
single_endpoint_dc && self.single_endpoint_outage_disable_quarantine();
|
||||
|
||||
if !same_endpoint_quarantined || bypass_quarantine_for_single_endpoint {
|
||||
if same_endpoint_quarantined && bypass_quarantine_for_single_endpoint {
|
||||
debug!(
|
||||
%addr,
|
||||
"Bypassing quarantine for immediate reconnect on single-endpoint DC"
|
||||
);
|
||||
}
|
||||
if !same_endpoint_quarantined {
|
||||
for attempt in 0..fast_retries {
|
||||
if total_attempts >= ME_REFILL_TOTAL_ATTEMPT_CAP {
|
||||
break;
|
||||
@@ -292,6 +276,7 @@ impl MePool {
|
||||
);
|
||||
}
|
||||
|
||||
let dc_endpoints = self.endpoints_for_dc(writer_dc).await;
|
||||
if dc_endpoints.is_empty() {
|
||||
self.stats.increment_me_refill_failed_total();
|
||||
return false;
|
||||
|
||||
@@ -342,7 +342,7 @@ impl MePool {
|
||||
allow_coverage_override: bool,
|
||||
) -> Result<()> {
|
||||
if !self
|
||||
.can_open_writer_for_contour(contour, allow_coverage_override, writer_dc)
|
||||
.can_open_writer_for_contour(contour, allow_coverage_override)
|
||||
.await
|
||||
{
|
||||
return Err(ProxyError::Proxy(format!(
|
||||
|
||||
@@ -109,16 +109,18 @@ async fn connectable_endpoints_waits_until_quarantine_expires() {
|
||||
|
||||
{
|
||||
let mut guard = pool.endpoint_quarantine.lock().await;
|
||||
guard.insert(addr, Instant::now() + Duration::from_millis(500));
|
||||
guard.insert(addr, Instant::now() + Duration::from_millis(80));
|
||||
}
|
||||
|
||||
let endpoints = tokio::time::timeout(
|
||||
Duration::from_millis(120),
|
||||
pool.connectable_endpoints_for_test(&[addr]),
|
||||
)
|
||||
.await
|
||||
.expect("single-endpoint outage mode should bypass quarantine delay");
|
||||
let started = Instant::now();
|
||||
let endpoints = pool.connectable_endpoints_for_test(&[addr]).await;
|
||||
let elapsed = started.elapsed();
|
||||
|
||||
assert_eq!(endpoints, vec![addr]);
|
||||
assert!(
|
||||
elapsed >= Duration::from_millis(50),
|
||||
"single-endpoint DC should honor quarantine before retry"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
Reference in New Issue
Block a user