Compare commits

..

16 Commits
flow ... main

Author SHA1 Message Date
Alexey
286662fc51 Merge pull request #697 from TWRoman/main
[docs] Updated QUICK START GUIDEs and READMEs
2026-04-13 19:39:05 +03:00
TWRoman
c5390baaf1 Merge branch 'main' of github.com:TWRoman/telemt_docs 2026-04-13 11:15:49 +03:00
TWRoman
1cd1e96079 Fixed server.listeners and upstreams description 2026-04-13 11:14:02 +03:00
Roman
2b995c31b0 Update README.md
Fixed the link for README.ru
2026-04-13 10:20:25 +03:00
Roman
442320302d Update QUICK_START_GUIDE.ru.md 2026-04-13 10:14:39 +03:00
Roman
ac0dde567b Update README.ru.md 2026-04-13 10:07:50 +03:00
TWRoman
b2fe9b78d8 [docs] Updated READMEs 2026-04-13 10:05:55 +03:00
TWRoman
f039ce1827 [docs] Updated QUICK START GUIDES 2026-04-13 09:56:44 +03:00
Alexey
5f5a3e3fa0 Merge pull request #673 from Artymediys/main
docs: align LTO notes, API docs, and Fake-TLS guidance
2026-04-12 19:15:45 +03:00
Alexey
f9e54ee739 Merge pull request #688 from TWRoman/main
[docs]Update CONFIG-PARAMS
2026-04-12 15:48:43 +03:00
Roman
d477d6ee29 Update CONFIG_PARAMS.ru.md
Corrected override_dc and default_dc descriptions.
2026-04-12 13:54:22 +03:00
TWRoman
1383dfcbb1 [docs]Update CONFIG-PARAMS 2026-04-12 12:37:38 +03:00
Artymediys
107a7cc758 Merge branch 'main' into main 2026-04-12 12:11:07 +03:00
Artymediys
4f3193fdaa Merge branch 'main' into main 2026-04-12 12:11:07 +03:00
Artymediys
d6be691c67 Merge branch 'main' into main 2026-04-12 12:10:26 +03:00
Artymediys
0b0be07a9c docs: align LTO notes, API docs, and Fake-TLS guidance 2026-04-12 12:02:14 +03:00
27 changed files with 3888 additions and 4488 deletions

View File

@@ -2,6 +2,8 @@
![Latest Release](https://img.shields.io/github/v/release/telemt/telemt?color=neon) ![Stars](https://img.shields.io/github/stars/telemt/telemt?style=social) ![Forks](https://img.shields.io/github/forks/telemt/telemt?style=social) [![Telegram](https://img.shields.io/badge/Telegram-Chat-24a1de?logo=telegram&logoColor=24a1de)](https://t.me/telemtrs)
[🇷🇺 README на русском](https://github.com/telemt/telemt/blob/main/README.ru.md)
***Löst Probleme, bevor andere überhaupt wissen, dass sie existieren*** / ***It solves problems before others even realize they exist***
> [!NOTE]
@@ -25,6 +27,7 @@ curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh
- [Quick Start Guide](docs/Quick_start/QUICK_START_GUIDE.en.md)
- [Инструкция по быстрому запуску](docs/Quick_start/QUICK_START_GUIDE.ru.md)
## Features
Our implementation of **TLS-fronting** is one of the most deeply debugged, focused, advanced and *almost* **"behaviorally consistent to real"**: we are confident we have it right - [see evidence on our validation and traces](docs/FAQ.en.md#recognizability-for-dpi-and-crawler)
Our ***Middle-End Pool*** is fastest by design in standard scenarios, compared to other implementations of connecting to the Middle-End Proxy: non dramatically, but usual

View File

@@ -54,7 +54,6 @@ curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh
- [FAQ EN](docs/FAQ.en.md)
## Сборка
```bash
# Клонируйте репозиторий
git clone https://github.com/telemt/telemt
@@ -63,7 +62,6 @@ cd telemt
# Начните процесс сборки
cargo build --release
# Устройства с небольшим объёмом оперативной памяти (1 ГБ, например NanoPi Neo3 / Raspberry Pi Zero 2):
# В текущем release-профиле используется lto = "fat" для максимальной оптимизации (см. Cargo.toml).
# На системах с малым объёмом RAM (~1 ГБ) можно переопределить это значение на "thin".
@@ -87,4 +85,25 @@ telemt config.toml
- Безопасность памяти;
- Асинхронная архитектура Tokio.
## Поддержать Telemt
Telemt — это бесплатное программное обеспечение с открытым исходным кодом, разработанное в свободное время.
Если оно оказалось вам полезным, вы можете поддержать дальнейшую разработку.
Принимаемые криптовалюты (BTC, ETH, USDT, 350+ и другие):
<p align="center">
<a href="https://nowpayments.io/donation?api_key=2bf1afd2-abc2-49f9-a012-f1e715b37223" target="_blank" rel="noreferrer noopener">
<img src="https://nowpayments.io/images/embeds/donation-button-white.svg" alt="Cryptocurrency & Bitcoin donation button by NOWPayments" height="80">
</a>
</p>
Monero (XMR) напрямую:
```
8Bk4tZEYPQWSypeD2hrUXG2rKbAKF16GqEN942ZdAP5cFdSqW6h4DwkP5cJMAdszzuPeHeHZPTyjWWFwzeFdjuci3ktfMoB
```
Все пожертвования пойдут на инфраструктуру, разработку и исследования.
![telemt_scheme](docs/assets/telemt.png)

View File

@@ -32,13 +32,13 @@ show = "*"
port = 443
# proxy_protocol = false # Enable if behind HAProxy/nginx with PROXY protocol
# metrics_port = 9090
# metrics_listen = "0.0.0.0:9090" # Listen address for metrics (overrides metrics_port)
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
# metrics_listen = "127.0.0.1:9090" # Listen address for metrics (overrides metrics_port)
# metrics_whitelist = ["127.0.0.1/32", "::1/128"]
[server.api]
enabled = true
listen = "0.0.0.0:9091"
whitelist = ["127.0.0.0/8"]
listen = "127.0.0.1:9091"
whitelist = ["127.0.0.1/32", "::1/128"]
minimal_runtime_enabled = false
minimal_runtime_cache_ttl_ms = 1000
@@ -48,9 +48,12 @@ ip = "0.0.0.0"
# === Anti-Censorship & Masking ===
[censorship]
# Fake-TLS / SNI masking domain used in generated ee-links.
# Changing tls_domain invalidates previously generated TLS links.
tls_domain = "petrovich.ru"
mask = true
tls_emulation = true # Fetch real cert lengths and emulate TLS records
tls_emulation = true # Fetch real cert lengths and emulate TLS records
tls_front_dir = "tlsfront" # Cache directory for TLS emulation
[access.users]

View File

@@ -9,12 +9,12 @@ API runtime is configured in `[server.api]`.
| Field | Type | Default | Description |
| --- | --- | --- | --- |
| `enabled` | `bool` | `false` | Enables REST API listener. |
| `listen` | `string` (`IP:PORT`) | `127.0.0.1:9091` | API bind address. |
| `whitelist` | `CIDR[]` | `127.0.0.1/32, ::1/128` | Source IP allowlist. Empty list means allow all. |
| `enabled` | `bool` | `true` | Enables REST API listener. |
| `listen` | `string` (`IP:PORT`) | `0.0.0.0:9091` | API bind address. |
| `whitelist` | `CIDR[]` | `127.0.0.0/8` | Source IP allowlist. Empty list means allow all. |
| `auth_header` | `string` | `""` | Exact value for `Authorization` header. Empty disables header auth. |
| `request_body_limit_bytes` | `usize` | `65536` | Maximum request body size. Must be `> 0`. |
| `minimal_runtime_enabled` | `bool` | `false` | Enables runtime snapshot endpoints requiring ME pool read-lock aggregation. |
| `minimal_runtime_enabled` | `bool` | `true` | Enables runtime snapshot endpoints requiring ME pool read-lock aggregation. |
| `minimal_runtime_cache_ttl_ms` | `u64` | `1000` | Cache TTL for minimal snapshots. `0` disables cache; valid range is `[0, 60000]`. |
| `runtime_edge_enabled` | `bool` | `false` | Enables runtime edge endpoints with cached aggregation payloads. |
| `runtime_edge_cache_ttl_ms` | `u64` | `1000` | Cache TTL for runtime edge summary payloads. `0` disables cache. |

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -27,7 +27,8 @@ cargo build --release
./target/release/telemt --version
```
For low-RAM systems, this repository already uses `lto = "thin"` in release profile.
For low-RAM systems, note that this repository currently uses `lto = "fat"` in release profile.
On constrained builders, a local override to `lto = "thin"` may be more practical.
## 3. Install binary and config

View File

@@ -1,9 +1,36 @@
# Installation Options
There are three options for installing Telemt:
- [Automated installation using a script](#very-quick-start).
- [Manual installation of Telemt as a service](#telemt-via-systemd).
- [Installation using Docker Compose](#telemt-via-docker-compose).
# Very quick start
### One-command installation / update on re-run
```bash
curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh
```
After starting, the script will prompt for:
- Your language (1 - English, 2 - Russian);
- Your TLS domain (press Enter for petrovich.ru).
The script checks if the port (default **443**) is free. If the port is already in use, installation will fail. You need to free up the port or use the **-p** flag with a different port to retry the installation.
To modify the scripts startup parameters, you can use the following flags:
- **-d, --domain** - TLS domain;
- **-p, --port** - server port (165535);
- **-s, --secret** - 32 hex secret;
- **-a, --ad-tag** - ad_tag;
- **-l, --lan**g - language (1/en or 2/ru);
Providing all options skips interactive prompts.
After completion, the script will provide a link for client connections:
```bash
tg://proxy?server=IP&port=PORT&secret=SECRET
```
### Installing a specific version
```bash
curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh -s -- 3.3.39
@@ -110,15 +137,15 @@ show = "*"
# === Server Binding ===
[server]
port = 443
# proxy_protocol = false # Enable if behind HAProxy/nginx with PROXY protocol
# proxy_protocol = false # Enable if behind HAProxy/nginx with PROXY protocol
# metrics_port = 9090
# metrics_listen = "0.0.0.0:9090" # Listen address for metrics (overrides metrics_port)
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
# metrics_listen = "127.0.0.1:9090" # Listen address for metrics (overrides metrics_port)
# metrics_whitelist = ["127.0.0.1/32", "::1/128"]
[server.api]
enabled = true
listen = "0.0.0.0:9091"
whitelist = ["127.0.0.0/8"]
listen = "127.0.0.1:9091"
whitelist = ["127.0.0.1/32", "::1/128"]
minimal_runtime_enabled = false
minimal_runtime_cache_ttl_ms = 1000
@@ -128,9 +155,9 @@ ip = "0.0.0.0"
# === Anti-Censorship & Masking ===
[censorship]
tls_domain = "petrovich.ru"
tls_domain = "petrovich.ru" # Fake-TLS / SNI masking domain used in generated ee-links
mask = true
tls_emulation = true # Fetch real cert lengths and emulate TLS records
tls_emulation = true # Fetch real cert lengths and emulate TLS records
tls_front_dir = "tlsfront" # Cache directory for TLS emulation
[access.users]
@@ -141,9 +168,9 @@ hello = "00000000000000000000000000000000"
then Ctrl+S -> Ctrl+X to save
> [!WARNING]
> Replace the value of the hello parameter with the value you obtained in step 0.
> Additionally, change the value of the tls_domain parameter to a different website.
> Changing the tls_domain parameter will break all links that use the old domain!
> Replace the value of the `hello` parameter with the value you obtained in step 0.
> Additionally, change the value of the `tls_domain` parameter to a different website.
> Changing the `tls_domain` parameter will break all links that use the old domain!
---

View File

@@ -1,9 +1,35 @@
# Варианты установки
Имеется три варианта установки Telemt:
- [Автоматизированная установка с помощью скрипта](#очень-быстрый-старт).
- [Ручная установка Telemt в качестве службы](#telemt-через-systemd-вручную).
- [Установка через Docker Compose](#telemt-через-docker-compose).
# Очень быстрый старт
### Установка одной командой / обновление при повторном запуске
```bash
curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh
```
После запуска скрипт запросит:
- ваш язык (1 - English, 2 - Русский);
- ваш TLS-домен (нажмите Enter для petrovich.ru).
Во время установки скрипт проверяет, свободен ли порт (по умолчанию **443**). Если порт занят другим процессом - установка завершится с ошибкой. Для повторной установки необходимо освободить порт или указать другой через флаг **-p**.
Для изменения параметров запуска скрипта можно использовать следующие флаги:
- **-d, --domain** - TLS-домен;
- **-p, --port** - порт (165535);
- **-s, --secret** - секрет (32 hex символа);
- **-a, --ad-tag** - ad_tag;
- **-l, --lang** - язык (1/en или 2/ru).
Если заданы флаги для языка и домена, интерактивных вопросов не будет.
После завершения установки скрипт выдаст ссылку для подключения клиентов:
```bash
tg://proxy?server=IP&port=PORT&secret=SECRET
```
### Установка нужной версии
```bash
curl -fsSL https://raw.githubusercontent.com/telemt/telemt/main/install.sh | sh -s -- 3.3.39
@@ -103,22 +129,22 @@ tls = true
[general.links]
show = "*"
# show = ["alice", "bob"] # Показывать ссылки только для alice и bob
# show = "*"              # Показывать ссылки для всех пользователей
# public_host = "proxy.example.com"  # Хост (IP-адрес или домен) для ссылок tg://
# public_port = 443                  # Порт для ссылок tg:// (по умолчанию: server.port)
# show = "*" # Показывать ссылки для всех пользователей
# public_host = "proxy.example.com" # Хост (IP-адрес или домен) для ссылок tg://
# public_port = 443 # Порт для ссылок tg:// (по умолчанию: server.port)
# === Привязка сервера ===
[server]
port = 443
# proxy_protocol = false           # Включите, если сервер находится за HAProxy/nginx с протоколом PROXY
# proxy_protocol = false # Включите, если сервер находится за HAProxy/nginx с протоколом PROXY
# metrics_port = 9090
# metrics_listen = "0.0.0.0:9090"  # Адрес прослушивания для метрик (переопределяет metrics_port)
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
# metrics_listen = "127.0.0.1:9090" # Адрес прослушивания для метрик (переопределяет metrics_port)
# metrics_whitelist = ["127.0.0.1/32", "::1/128"]
[server.api]
enabled = true
listen = "0.0.0.0:9091"
whitelist = ["127.0.0.0/8"]
listen = "127.0.0.1:9091"
whitelist = ["127.0.0.1/32", "::1/128"]
minimal_runtime_enabled = false
minimal_runtime_cache_ttl_ms = 1000
@@ -128,9 +154,9 @@ ip = "0.0.0.0"
# === Обход блокировок и маскировка ===
[censorship]
tls_domain = "petrovich.ru"
tls_domain = "petrovich.ru" # Домен Fake-TLS / SNI, который будет использоваться в сгенерированных ee-ссылках
mask = true
tls_emulation = true # Получить реальную длину сертификата и эмулировать запись TLS
tls_emulation = true # Получить реальную длину сертификата и эмулировать запись TLS
tls_front_dir = "tlsfront" # Директория кэша для эмуляции TLS
[access.users]
@@ -141,9 +167,9 @@ hello = "00000000000000000000000000000000"
Затем нажмите Ctrl+S -> Ctrl+X, чтобы сохранить
> [!WARNING]
> Замените значение параметра hello на значение, которое вы получили в пункте 0.
> Так же замените значение параметра tls_domain на другой сайт.
> Изменение параметра tls_domain сделает нерабочими все ссылки, использующие старый домен!
> Замените значение параметра `hello` на значение, которое вы получили в пункте 0.
> Так же замените значение параметра `tls_domain` на другой сайт.
> Изменение параметра `tls_domain` сделает нерабочими все ссылки, использующие старый домен!
---

View File

@@ -615,26 +615,6 @@ pub(crate) fn default_mask_relay_max_bytes() -> usize {
32 * 1024
}
#[cfg(not(test))]
pub(crate) fn default_mask_relay_timeout_ms() -> u64 {
60_000
}
#[cfg(test)]
pub(crate) fn default_mask_relay_timeout_ms() -> u64 {
200
}
#[cfg(not(test))]
pub(crate) fn default_mask_relay_idle_timeout_ms() -> u64 {
5_000
}
#[cfg(test)]
pub(crate) fn default_mask_relay_idle_timeout_ms() -> u64 {
100
}
pub(crate) fn default_mask_classifier_prefetch_timeout_ms() -> u64 {
5
}

View File

@@ -611,8 +611,6 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
|| old.censorship.mask_shape_above_cap_blur_max_bytes
!= new.censorship.mask_shape_above_cap_blur_max_bytes
|| old.censorship.mask_relay_max_bytes != new.censorship.mask_relay_max_bytes
|| old.censorship.mask_relay_timeout_ms != new.censorship.mask_relay_timeout_ms
|| old.censorship.mask_relay_idle_timeout_ms != new.censorship.mask_relay_idle_timeout_ms
|| old.censorship.mask_classifier_prefetch_timeout_ms
!= new.censorship.mask_classifier_prefetch_timeout_ms
|| old.censorship.mask_timing_normalization_enabled

View File

@@ -1710,19 +1710,6 @@ pub struct AntiCensorshipConfig {
#[serde(default = "default_mask_relay_max_bytes")]
pub mask_relay_max_bytes: usize,
/// Wall-clock cap for the full masking relay on non-MTProto fallback paths.
/// Raise when the mask target is a long-lived service (e.g. WebSocket).
/// Default: 60 000 ms (60 s).
#[serde(default = "default_mask_relay_timeout_ms")]
pub mask_relay_timeout_ms: u64,
/// Per-read idle timeout on masking relay and drain paths.
/// Limits resource consumption by slow-loris attacks and port scanners.
/// A read call stalling beyond this is treated as an abandoned connection.
/// Default: 5 000 ms (5 s).
#[serde(default = "default_mask_relay_idle_timeout_ms")]
pub mask_relay_idle_timeout_ms: u64,
/// Prefetch timeout (ms) for extending fragmented masking classifier window.
#[serde(default = "default_mask_classifier_prefetch_timeout_ms")]
pub mask_classifier_prefetch_timeout_ms: u64,
@@ -1768,8 +1755,6 @@ impl Default for AntiCensorshipConfig {
mask_shape_above_cap_blur: default_mask_shape_above_cap_blur(),
mask_shape_above_cap_blur_max_bytes: default_mask_shape_above_cap_blur_max_bytes(),
mask_relay_max_bytes: default_mask_relay_max_bytes(),
mask_relay_timeout_ms: default_mask_relay_timeout_ms(),
mask_relay_idle_timeout_ms: default_mask_relay_idle_timeout_ms(),
mask_classifier_prefetch_timeout_ms: default_mask_classifier_prefetch_timeout_ms(),
mask_timing_normalization_enabled: default_mask_timing_normalization_enabled(),
mask_timing_normalization_floor_ms: default_mask_timing_normalization_floor_ms(),

View File

@@ -28,10 +28,14 @@ use tracing::debug;
const MASK_TIMEOUT: Duration = Duration::from_secs(5);
#[cfg(test)]
const MASK_TIMEOUT: Duration = Duration::from_millis(50);
/// Maximum duration for the entire masking relay under test (replaced by config at runtime).
/// Maximum duration for the entire masking relay.
/// Limits resource consumption from slow-loris attacks and port scanners.
#[cfg(not(test))]
const MASK_RELAY_TIMEOUT: Duration = Duration::from_secs(60);
#[cfg(test)]
const MASK_RELAY_TIMEOUT: Duration = Duration::from_millis(200);
/// Per-read idle timeout for masking relay and drain paths under test (replaced by config at runtime).
#[cfg(not(test))]
const MASK_RELAY_IDLE_TIMEOUT: Duration = Duration::from_secs(5);
#[cfg(test)]
const MASK_RELAY_IDLE_TIMEOUT: Duration = Duration::from_millis(100);
const MASK_BUFFER_SIZE: usize = 8192;
@@ -51,7 +55,6 @@ async fn copy_with_idle_timeout<R, W>(
writer: &mut W,
byte_cap: usize,
shutdown_on_eof: bool,
idle_timeout: Duration,
) -> CopyOutcome
where
R: AsyncRead + Unpin,
@@ -75,7 +78,7 @@ where
}
let read_len = remaining_budget.min(MASK_BUFFER_SIZE);
let read_res = timeout(idle_timeout, reader.read(&mut buf[..read_len])).await;
let read_res = timeout(MASK_RELAY_IDLE_TIMEOUT, reader.read(&mut buf[..read_len])).await;
let n = match read_res {
Ok(Ok(n)) => n,
Ok(Err(_)) | Err(_) => break,
@@ -83,13 +86,13 @@ where
if n == 0 {
ended_by_eof = true;
if shutdown_on_eof {
let _ = timeout(idle_timeout, writer.shutdown()).await;
let _ = timeout(MASK_RELAY_IDLE_TIMEOUT, writer.shutdown()).await;
}
break;
}
total = total.saturating_add(n);
let write_res = timeout(idle_timeout, writer.write_all(&buf[..n])).await;
let write_res = timeout(MASK_RELAY_IDLE_TIMEOUT, writer.write_all(&buf[..n])).await;
match write_res {
Ok(Ok(())) => {}
Ok(Err(_)) | Err(_) => break,
@@ -227,20 +230,13 @@ where
}
}
async fn consume_client_data_with_timeout_and_cap<R>(
reader: R,
byte_cap: usize,
relay_timeout: Duration,
idle_timeout: Duration,
) where
async fn consume_client_data_with_timeout_and_cap<R>(reader: R, byte_cap: usize)
where
R: AsyncRead + Unpin,
{
if timeout(
relay_timeout,
consume_client_data(reader, byte_cap, idle_timeout),
)
.await
.is_err()
if timeout(MASK_RELAY_TIMEOUT, consume_client_data(reader, byte_cap))
.await
.is_err()
{
debug!("Timed out while consuming client data on masking fallback path");
}
@@ -643,18 +639,10 @@ pub async fn handle_bad_client<R, W>(
beobachten.record(client_type, peer.ip(), ttl);
}
let relay_timeout = Duration::from_millis(config.censorship.mask_relay_timeout_ms);
let idle_timeout = Duration::from_millis(config.censorship.mask_relay_idle_timeout_ms);
if !config.censorship.mask {
// Masking disabled, just consume data
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
relay_timeout,
idle_timeout,
)
.await;
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes)
.await;
return;
}
@@ -686,7 +674,7 @@ pub async fn handle_bad_client<R, W>(
return;
}
if timeout(
relay_timeout,
MASK_RELAY_TIMEOUT,
relay_to_mask(
reader,
writer,
@@ -700,7 +688,6 @@ pub async fn handle_bad_client<R, W>(
config.censorship.mask_shape_above_cap_blur_max_bytes,
config.censorship.mask_shape_hardening_aggressive_mode,
config.censorship.mask_relay_max_bytes,
idle_timeout,
),
)
.await
@@ -716,8 +703,6 @@ pub async fn handle_bad_client<R, W>(
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
relay_timeout,
idle_timeout,
)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
@@ -727,8 +712,6 @@ pub async fn handle_bad_client<R, W>(
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
relay_timeout,
idle_timeout,
)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
@@ -759,13 +742,8 @@ pub async fn handle_bad_client<R, W>(
local = %local_addr,
"Mask target resolves to local listener; refusing self-referential masking fallback"
);
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
relay_timeout,
idle_timeout,
)
.await;
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
return;
}
@@ -799,7 +777,7 @@ pub async fn handle_bad_client<R, W>(
return;
}
if timeout(
relay_timeout,
MASK_RELAY_TIMEOUT,
relay_to_mask(
reader,
writer,
@@ -813,7 +791,6 @@ pub async fn handle_bad_client<R, W>(
config.censorship.mask_shape_above_cap_blur_max_bytes,
config.censorship.mask_shape_hardening_aggressive_mode,
config.censorship.mask_relay_max_bytes,
idle_timeout,
),
)
.await
@@ -829,8 +806,6 @@ pub async fn handle_bad_client<R, W>(
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
relay_timeout,
idle_timeout,
)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
@@ -840,8 +815,6 @@ pub async fn handle_bad_client<R, W>(
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
relay_timeout,
idle_timeout,
)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
@@ -863,7 +836,6 @@ async fn relay_to_mask<R, W, MR, MW>(
shape_above_cap_blur_max_bytes: usize,
shape_hardening_aggressive_mode: bool,
mask_relay_max_bytes: usize,
idle_timeout: Duration,
) where
R: AsyncRead + Unpin + Send + 'static,
W: AsyncWrite + Unpin + Send + 'static,
@@ -885,19 +857,11 @@ async fn relay_to_mask<R, W, MR, MW>(
&mut mask_write,
mask_relay_max_bytes,
!shape_hardening_enabled,
idle_timeout,
)
.await
},
async {
copy_with_idle_timeout(
&mut mask_read,
&mut writer,
mask_relay_max_bytes,
true,
idle_timeout,
)
.await
copy_with_idle_timeout(&mut mask_read, &mut writer, mask_relay_max_bytes, true).await
}
);
@@ -925,11 +889,7 @@ async fn relay_to_mask<R, W, MR, MW>(
}
/// Just consume all data from client without responding.
async fn consume_client_data<R: AsyncRead + Unpin>(
mut reader: R,
byte_cap: usize,
idle_timeout: Duration,
) {
async fn consume_client_data<R: AsyncRead + Unpin>(mut reader: R, byte_cap: usize) {
if byte_cap == 0 {
return;
}
@@ -945,7 +905,7 @@ async fn consume_client_data<R: AsyncRead + Unpin>(
}
let read_len = remaining_budget.min(MASK_BUFFER_SIZE);
let n = match timeout(idle_timeout, reader.read(&mut buf[..read_len])).await {
let n = match timeout(MASK_RELAY_IDLE_TIMEOUT, reader.read(&mut buf[..read_len])).await {
Ok(Ok(n)) => n,
Ok(Err(_)) | Err(_) => break,
};

View File

@@ -47,7 +47,7 @@ async fn consume_client_data_stops_after_byte_cap_without_eof() {
};
let cap = 10_000usize;
consume_client_data(reader, cap, MASK_RELAY_IDLE_TIMEOUT).await;
consume_client_data(reader, cap).await;
let total = produced.load(Ordering::Relaxed);
assert!(

View File

@@ -31,7 +31,7 @@ async fn stalling_client_terminates_at_idle_not_relay_timeout() {
let result = tokio::time::timeout(
MASK_RELAY_TIMEOUT,
consume_client_data(reader, MASK_BUFFER_SIZE * 4, MASK_RELAY_IDLE_TIMEOUT),
consume_client_data(reader, MASK_BUFFER_SIZE * 4),
)
.await;
@@ -57,12 +57,9 @@ async fn fast_reader_drains_to_eof() {
let data = vec![0xAAu8; 32 * 1024];
let reader = std::io::Cursor::new(data);
tokio::time::timeout(
MASK_RELAY_TIMEOUT,
consume_client_data(reader, usize::MAX, MASK_RELAY_IDLE_TIMEOUT),
)
.await
.expect("consume_client_data did not complete for fast EOF reader");
tokio::time::timeout(MASK_RELAY_TIMEOUT, consume_client_data(reader, usize::MAX))
.await
.expect("consume_client_data did not complete for fast EOF reader");
}
#[tokio::test]
@@ -84,7 +81,7 @@ async fn io_error_terminates_cleanly() {
tokio::time::timeout(
MASK_RELAY_TIMEOUT,
consume_client_data(ErrReader, usize::MAX, MASK_RELAY_IDLE_TIMEOUT),
consume_client_data(ErrReader, usize::MAX),
)
.await
.expect("consume_client_data did not return on I/O error");

View File

@@ -34,11 +34,7 @@ async fn consume_stall_stress_finishes_within_idle_budget() {
set.spawn(async {
tokio::time::timeout(
MASK_RELAY_TIMEOUT,
consume_client_data(
OneByteThenStall { sent: false },
usize::MAX,
MASK_RELAY_IDLE_TIMEOUT,
),
consume_client_data(OneByteThenStall { sent: false }, usize::MAX),
)
.await
.expect("consume_client_data exceeded relay timeout under stall load");
@@ -60,7 +56,7 @@ async fn consume_stall_stress_finishes_within_idle_budget() {
#[tokio::test]
async fn consume_zero_cap_returns_immediately() {
let started = Instant::now();
consume_client_data(tokio::io::empty(), 0, MASK_RELAY_IDLE_TIMEOUT).await;
consume_client_data(tokio::io::empty(), 0).await;
assert!(
started.elapsed() < MASK_RELAY_IDLE_TIMEOUT,
"zero byte cap must return immediately"

View File

@@ -127,14 +127,7 @@ async fn positive_copy_with_production_cap_stops_exactly_at_budget() {
let mut reader = FinitePatternReader::new(PROD_CAP_BYTES + (256 * 1024), 4096, read_calls);
let mut writer = CountingWriter::default();
let outcome = copy_with_idle_timeout(
&mut reader,
&mut writer,
PROD_CAP_BYTES,
true,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
let outcome = copy_with_idle_timeout(&mut reader, &mut writer, PROD_CAP_BYTES, true).await;
assert_eq!(
outcome.total, PROD_CAP_BYTES,
@@ -152,13 +145,7 @@ async fn negative_consume_with_zero_cap_performs_no_reads() {
let read_calls = Arc::new(AtomicUsize::new(0));
let reader = FinitePatternReader::new(1024, 64, Arc::clone(&read_calls));
consume_client_data_with_timeout_and_cap(
reader,
0,
MASK_RELAY_TIMEOUT,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
consume_client_data_with_timeout_and_cap(reader, 0).await;
assert_eq!(
read_calls.load(Ordering::Relaxed),
@@ -174,14 +161,7 @@ async fn edge_copy_below_cap_reports_eof_without_overread() {
let mut reader = FinitePatternReader::new(payload, 3072, read_calls);
let mut writer = CountingWriter::default();
let outcome = copy_with_idle_timeout(
&mut reader,
&mut writer,
PROD_CAP_BYTES,
true,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
let outcome = copy_with_idle_timeout(&mut reader, &mut writer, PROD_CAP_BYTES, true).await;
assert_eq!(outcome.total, payload);
assert_eq!(writer.written, payload);
@@ -195,13 +175,7 @@ async fn edge_copy_below_cap_reports_eof_without_overread() {
async fn adversarial_blackhat_never_ready_reader_is_bounded_by_timeout_guards() {
let started = Instant::now();
consume_client_data_with_timeout_and_cap(
NeverReadyReader,
PROD_CAP_BYTES,
MASK_RELAY_TIMEOUT,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
consume_client_data_with_timeout_and_cap(NeverReadyReader, PROD_CAP_BYTES).await;
assert!(
started.elapsed() < Duration::from_millis(350),
@@ -216,12 +190,7 @@ async fn integration_consume_path_honors_production_cap_for_large_payload() {
let bounded = timeout(
Duration::from_millis(350),
consume_client_data_with_timeout_and_cap(
reader,
PROD_CAP_BYTES,
MASK_RELAY_TIMEOUT,
MASK_RELAY_IDLE_TIMEOUT,
),
consume_client_data_with_timeout_and_cap(reader, PROD_CAP_BYTES),
)
.await;
@@ -237,13 +206,7 @@ async fn adversarial_consume_path_never_reads_beyond_declared_byte_cap() {
let total_read = Arc::new(AtomicUsize::new(0));
let reader = BudgetProbeReader::new(256 * 1024, Arc::clone(&total_read));
consume_client_data_with_timeout_and_cap(
reader,
byte_cap,
MASK_RELAY_TIMEOUT,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
consume_client_data_with_timeout_and_cap(reader, byte_cap).await;
assert!(
total_read.load(Ordering::Relaxed) <= byte_cap,
@@ -268,9 +231,7 @@ async fn light_fuzz_cap_and_payload_matrix_preserves_min_budget_invariant() {
let mut reader = FinitePatternReader::new(payload, chunk, read_calls);
let mut writer = CountingWriter::default();
let outcome =
copy_with_idle_timeout(&mut reader, &mut writer, cap, true, MASK_RELAY_IDLE_TIMEOUT)
.await;
let outcome = copy_with_idle_timeout(&mut reader, &mut writer, cap, true).await;
let expected = payload.min(cap);
assert_eq!(
@@ -300,14 +261,7 @@ async fn stress_parallel_copy_tasks_with_production_cap_complete_without_leaks()
read_calls,
);
let mut writer = CountingWriter::default();
copy_with_idle_timeout(
&mut reader,
&mut writer,
PROD_CAP_BYTES,
true,
MASK_RELAY_IDLE_TIMEOUT,
)
.await
copy_with_idle_timeout(&mut reader, &mut writer, PROD_CAP_BYTES, true).await
}));
}

View File

@@ -26,7 +26,6 @@ async fn relay_to_mask_enforces_masking_session_byte_cap() {
0,
false,
32 * 1024,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
});
@@ -82,7 +81,6 @@ async fn relay_to_mask_propagates_client_half_close_without_waiting_for_other_di
0,
false,
32 * 1024,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
});

View File

@@ -1377,7 +1377,6 @@ async fn relay_to_mask_keeps_backend_to_client_flow_when_client_to_backend_stall
0,
false,
5 * 1024 * 1024,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
});
@@ -1509,7 +1508,6 @@ async fn relay_to_mask_timeout_cancels_and_drops_all_io_endpoints() {
0,
false,
5 * 1024 * 1024,
MASK_RELAY_IDLE_TIMEOUT,
),
)
.await;

View File

@@ -228,7 +228,6 @@ async fn relay_path_idle_timeout_eviction_remains_effective() {
0,
false,
5 * 1024 * 1024,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;

View File

@@ -44,7 +44,6 @@ async fn run_relay_case(
above_cap_blur_max_bytes,
false,
5 * 1024 * 1024,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
});

View File

@@ -89,7 +89,6 @@ async fn relay_to_mask_applies_cap_clamped_padding_for_non_power_of_two_cap() {
0,
false,
5 * 1024 * 1024,
MASK_RELAY_IDLE_TIMEOUT,
)
.await;
});

View File

@@ -67,8 +67,10 @@ struct FamilyReconnectOutcome {
key: (i32, IpFamily),
dc: i32,
family: IpFamily,
alive: usize,
required: usize,
endpoint_count: usize,
restored: usize,
}
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
@@ -80,6 +82,8 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
let mut single_endpoint_outage: HashSet<(i32, IpFamily)> = HashSet::new();
let mut shadow_rotate_deadline: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut idle_refresh_next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut adaptive_idle_since: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut adaptive_recover_until: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut floor_warn_next_allowed: HashMap<(i32, IpFamily), Instant> = HashMap::new();
let mut drain_warn_next_allowed: HashMap<u64, Instant> = HashMap::new();
let mut degraded_interval = true;
@@ -105,6 +109,8 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut single_endpoint_outage,
&mut shadow_rotate_deadline,
&mut idle_refresh_next_attempt,
&mut adaptive_idle_since,
&mut adaptive_recover_until,
&mut floor_warn_next_allowed,
)
.await;
@@ -120,6 +126,8 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut single_endpoint_outage,
&mut shadow_rotate_deadline,
&mut idle_refresh_next_attempt,
&mut adaptive_idle_since,
&mut adaptive_recover_until,
&mut floor_warn_next_allowed,
)
.await;
@@ -352,6 +360,8 @@ async fn check_family(
single_endpoint_outage: &mut HashSet<(i32, IpFamily)>,
shadow_rotate_deadline: &mut HashMap<(i32, IpFamily), Instant>,
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
floor_warn_next_allowed: &mut HashMap<(i32, IpFamily), Instant>,
) -> bool {
let enabled = match family {
@@ -383,7 +393,10 @@ async fn check_family(
let reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len());
let reconnect_sem = Arc::new(Semaphore::new(reconnect_budget));
if pool.floor_mode() == MeFloorMode::Static {}
if pool.floor_mode() == MeFloorMode::Static {
adaptive_idle_since.clear();
adaptive_recover_until.clear();
}
let mut live_addr_counts = HashMap::<(i32, SocketAddr), usize>::new();
let mut live_writer_ids_by_addr = HashMap::<(i32, SocketAddr), Vec<u64>>::new();
@@ -422,6 +435,8 @@ async fn check_family(
&live_addr_counts,
&live_writer_ids_by_addr,
&bound_clients_by_writer,
adaptive_idle_since,
adaptive_recover_until,
)
.await;
pool.set_adaptive_floor_runtime_caps(
@@ -488,6 +503,8 @@ async fn check_family(
outage_next_attempt.remove(&key);
shadow_rotate_deadline.remove(&key);
idle_refresh_next_attempt.remove(&key);
adaptive_idle_since.remove(&key);
adaptive_recover_until.remove(&key);
info!(
dc = %dc,
?family,
@@ -615,28 +632,22 @@ async fn check_family(
restored += 1;
continue;
}
let base_req = pool_for_reconnect
.required_writers_for_dc_with_floor_mode(endpoints_for_dc.len(), false);
if alive + restored >= base_req {
pool_for_reconnect
.stats
.increment_me_floor_cap_block_total();
pool_for_reconnect
.stats
.increment_me_floor_swap_idle_failed_total();
debug!(
dc = %dc,
?family,
alive,
required,
active_cap_effective_total,
"Adaptive floor cap reached, reconnect attempt blocked"
);
break;
}
pool_for_reconnect
.stats
.increment_me_floor_cap_block_total();
pool_for_reconnect
.stats
.increment_me_floor_swap_idle_failed_total();
debug!(
dc = %dc,
?family,
alive,
required,
active_cap_effective_total,
"Adaptive floor cap reached, reconnect attempt blocked"
);
break;
}
pool_for_reconnect.stats.increment_me_reconnect_attempt();
let res = tokio::time::timeout(
pool_for_reconnect.reconnect_runtime.me_one_timeout,
pool_for_reconnect.connect_endpoints_round_robin(
@@ -652,9 +663,11 @@ async fn check_family(
pool_for_reconnect.stats.increment_me_reconnect_success();
}
Ok(false) => {
pool_for_reconnect.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME round-robin reconnect failed")
}
Err(_) => {
pool_for_reconnect.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME reconnect timed out");
}
}
@@ -665,8 +678,10 @@ async fn check_family(
key,
dc,
family,
alive,
required,
endpoint_count: endpoints_for_dc.len(),
restored,
}
});
}
@@ -680,7 +695,7 @@ async fn check_family(
}
};
let now = Instant::now();
let now_alive = live_active_writers_for_dc_family(pool, outcome.dc, outcome.family).await;
let now_alive = outcome.alive + outcome.restored;
if now_alive >= outcome.required {
info!(
dc = %outcome.dc,
@@ -836,33 +851,6 @@ fn should_emit_rate_limited_warn(
false
}
async fn live_active_writers_for_dc_family(pool: &Arc<MePool>, dc: i32, family: IpFamily) -> usize {
let writers = pool.writers.read().await;
writers
.iter()
.filter(|writer| {
if writer.draining.load(std::sync::atomic::Ordering::Relaxed) {
return false;
}
if writer.writer_dc != dc {
return false;
}
if !matches!(
super::pool::WriterContour::from_u8(
writer.contour.load(std::sync::atomic::Ordering::Relaxed),
),
super::pool::WriterContour::Active
) {
return false;
}
match family {
IpFamily::V4 => writer.addr.is_ipv4(),
IpFamily::V6 => writer.addr.is_ipv6(),
}
})
.count()
}
fn adaptive_floor_class_min(
pool: &Arc<MePool>,
endpoint_count: usize,
@@ -916,6 +904,8 @@ async fn build_family_floor_plan(
live_addr_counts: &HashMap<(i32, SocketAddr), usize>,
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
bound_clients_by_writer: &HashMap<u64, usize>,
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
) -> FamilyFloorPlan {
let mut entries = Vec::<DcFloorPlanEntry>::new();
let mut by_dc = HashMap::<i32, DcFloorPlanEntry>::new();
@@ -931,7 +921,18 @@ async fn build_family_floor_plan(
if endpoints.is_empty() {
continue;
}
let _key = (*dc, family);
let key = (*dc, family);
let reduce_for_idle = should_reduce_floor_for_idle(
pool,
key,
*dc,
endpoints,
live_writer_ids_by_addr,
bound_clients_by_writer,
adaptive_idle_since,
adaptive_recover_until,
)
.await;
let base_required = pool.required_writers_for_dc(endpoints.len()).max(1);
let min_required = if is_adaptive {
adaptive_floor_class_min(pool, endpoints.len(), base_required)
@@ -946,11 +947,11 @@ async fn build_family_floor_plan(
if max_required < min_required {
max_required = min_required;
}
// We initialize target_required at base_required to prevent 0-writer blackouts
// caused by proactively dropping an idle DC to a single fragile connection.
// The Adaptive Floor constraint loop below will gracefully compress idle DCs
// (prioritized via has_bound_clients = false) to min_required only when global capacity is reached.
let desired_raw = base_required;
let desired_raw = if is_adaptive && reduce_for_idle {
min_required
} else {
base_required
};
let target_required = desired_raw.clamp(min_required, max_required);
let alive = endpoints
.iter()
@@ -1277,6 +1278,43 @@ async fn maybe_refresh_idle_writer_for_dc(
);
}
async fn should_reduce_floor_for_idle(
pool: &Arc<MePool>,
key: (i32, IpFamily),
dc: i32,
endpoints: &[SocketAddr],
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
bound_clients_by_writer: &HashMap<u64, usize>,
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
) -> bool {
if pool.floor_mode() != MeFloorMode::Adaptive {
adaptive_idle_since.remove(&key);
adaptive_recover_until.remove(&key);
return false;
}
let now = Instant::now();
let writer_ids = list_writer_ids_for_endpoints(dc, endpoints, live_writer_ids_by_addr);
let has_bound_clients = has_bound_clients_on_endpoint(&writer_ids, bound_clients_by_writer);
if has_bound_clients {
adaptive_idle_since.remove(&key);
adaptive_recover_until.insert(key, now + pool.adaptive_floor_recover_grace_duration());
return false;
}
if let Some(recover_until) = adaptive_recover_until.get(&key)
&& now < *recover_until
{
adaptive_idle_since.remove(&key);
return false;
}
adaptive_recover_until.remove(&key);
let idle_since = adaptive_idle_since.entry(key).or_insert(now);
now.saturating_duration_since(*idle_since) >= pool.adaptive_floor_idle_duration()
}
fn has_bound_clients_on_endpoint(
writer_ids: &[u64],
bound_clients_by_writer: &HashMap<u64, usize>,
@@ -1326,7 +1364,6 @@ async fn recover_single_endpoint_outage(
);
return;
};
pool.stats.increment_me_reconnect_attempt();
pool.stats
.increment_me_single_endpoint_outage_reconnect_attempt_total();
@@ -1402,6 +1439,7 @@ async fn recover_single_endpoint_outage(
return;
}
pool.stats.increment_me_reconnect_attempt();
let current_ms = *outage_backoff.get(&key).unwrap_or(&min_backoff_ms);
let next_ms = current_ms.saturating_mul(2).min(max_backoff_ms);
outage_backoff.insert(key, next_ms);

View File

@@ -1422,6 +1422,22 @@ impl MePool {
MeFloorMode::from_u8(self.floor_runtime.me_floor_mode.load(Ordering::Relaxed))
}
pub(super) fn adaptive_floor_idle_duration(&self) -> Duration {
Duration::from_secs(
self.floor_runtime
.me_adaptive_floor_idle_secs
.load(Ordering::Relaxed),
)
}
pub(super) fn adaptive_floor_recover_grace_duration(&self) -> Duration {
Duration::from_secs(
self.floor_runtime
.me_adaptive_floor_recover_grace_secs
.load(Ordering::Relaxed),
)
}
pub(super) fn adaptive_floor_min_writers_multi_endpoint(&self) -> usize {
(self
.floor_runtime
@@ -1643,7 +1659,6 @@ impl MePool {
&self,
contour: WriterContour,
allow_coverage_override: bool,
writer_dc: i32,
) -> bool {
let (active_writers, warm_writers, _) = self.non_draining_writer_counts_by_contour().await;
match contour {
@@ -1655,43 +1670,6 @@ impl MePool {
if !allow_coverage_override {
return false;
}
let mut endpoints_len = 0;
let now_epoch = Self::now_epoch_secs();
if self.family_enabled_for_drain_coverage(IpFamily::V4, now_epoch) {
if let Some(addrs) = self.proxy_map_v4.read().await.get(&writer_dc) {
endpoints_len += addrs.len();
}
}
if self.family_enabled_for_drain_coverage(IpFamily::V6, now_epoch) {
if let Some(addrs) = self.proxy_map_v6.read().await.get(&writer_dc) {
endpoints_len += addrs.len();
}
}
if endpoints_len > 0 {
let base_req =
self.required_writers_for_dc_with_floor_mode(endpoints_len, false);
let active_for_dc = {
let ws = self.writers.read().await;
ws.iter()
.filter(|w| {
!w.draining.load(std::sync::atomic::Ordering::Relaxed)
&& w.writer_dc == writer_dc
&& matches!(
WriterContour::from_u8(
w.contour.load(std::sync::atomic::Ordering::Relaxed),
),
WriterContour::Active
)
})
.count()
};
if active_for_dc < base_req {
return true;
}
}
let coverage_required = self.active_coverage_required_total().await;
active_writers < coverage_required
}

View File

@@ -77,12 +77,6 @@ impl MePool {
return Vec::new();
}
if endpoints.len() == 1 && self.single_endpoint_outage_disable_quarantine() {
let mut guard = self.endpoint_quarantine.lock().await;
guard.retain(|_, expiry| *expiry > Instant::now());
return endpoints.to_vec();
}
let mut guard = self.endpoint_quarantine.lock().await;
let now = Instant::now();
guard.retain(|_, expiry| *expiry > now);
@@ -242,18 +236,8 @@ impl MePool {
let fast_retries = self.reconnect_runtime.me_reconnect_fast_retry_count.max(1);
let mut total_attempts = 0u32;
let same_endpoint_quarantined = self.is_endpoint_quarantined(addr).await;
let dc_endpoints = self.endpoints_for_dc(writer_dc).await;
let single_endpoint_dc = dc_endpoints.len() == 1 && dc_endpoints[0] == addr;
let bypass_quarantine_for_single_endpoint =
single_endpoint_dc && self.single_endpoint_outage_disable_quarantine();
if !same_endpoint_quarantined || bypass_quarantine_for_single_endpoint {
if same_endpoint_quarantined && bypass_quarantine_for_single_endpoint {
debug!(
%addr,
"Bypassing quarantine for immediate reconnect on single-endpoint DC"
);
}
if !same_endpoint_quarantined {
for attempt in 0..fast_retries {
if total_attempts >= ME_REFILL_TOTAL_ATTEMPT_CAP {
break;
@@ -292,6 +276,7 @@ impl MePool {
);
}
let dc_endpoints = self.endpoints_for_dc(writer_dc).await;
if dc_endpoints.is_empty() {
self.stats.increment_me_refill_failed_total();
return false;

View File

@@ -342,7 +342,7 @@ impl MePool {
allow_coverage_override: bool,
) -> Result<()> {
if !self
.can_open_writer_for_contour(contour, allow_coverage_override, writer_dc)
.can_open_writer_for_contour(contour, allow_coverage_override)
.await
{
return Err(ProxyError::Proxy(format!(

View File

@@ -109,16 +109,18 @@ async fn connectable_endpoints_waits_until_quarantine_expires() {
{
let mut guard = pool.endpoint_quarantine.lock().await;
guard.insert(addr, Instant::now() + Duration::from_millis(500));
guard.insert(addr, Instant::now() + Duration::from_millis(80));
}
let endpoints = tokio::time::timeout(
Duration::from_millis(120),
pool.connectable_endpoints_for_test(&[addr]),
)
.await
.expect("single-endpoint outage mode should bypass quarantine delay");
let started = Instant::now();
let endpoints = pool.connectable_endpoints_for_test(&[addr]).await;
let elapsed = started.elapsed();
assert_eq!(endpoints, vec![addr]);
assert!(
elapsed >= Duration::from_millis(50),
"single-endpoint DC should honor quarantine before retry"
);
}
#[tokio::test]