Compare commits

..

73 Commits

Author SHA1 Message Date
Alexey a383efcb21 Bounded Hybrid Loop + Watch + Family ArcSwap Snapshots + Health in Parallel + ArcSwap Writers + Registry Split + Endpoint on ArcSwap + New Backpressure Model + ME Decomposition: merge pull request #586 from telemt/flow
Bounded Hybrid Loop + Watch + Family ArcSwap Snapshots + Health in Parallel + ArcSwap Writers + Registry Split + Endpoint on ArcSwap + New Backpressure Model + ME Decomposition
2026-03-26 02:31:18 +03:00
Alexey cb5753f77c Update admission.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-26 02:19:35 +03:00
Alexey 7a075b2ffe Middle Relay fixes
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-26 02:18:39 +03:00
Alexey 7de822dd15 RPC Proxy-req fixes
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 22:51:00 +03:00
Alexey 1bbf4584a6 Merge branch 'main' into flow 2026-03-25 22:25:58 +03:00
Alexey 70479c4094 Unexpected-only Quarantine
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 22:25:39 +03:00
Alexey b94746a6e0 Dashmap-driven Routing + Health Parallel + Family Runtime State
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 21:26:20 +03:00
Alexey ceae1564af Floor Runtime + Writer Selection Policy + Reconnect/Warmup + TransportPolicy + NAT Runtime Cores
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 20:55:20 +03:00
Alexey 7ce5fc66db ME Reinit Core advancing + Binding Policy Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 20:35:57 +03:00
Alexey 41493462a1 Drain + Single-Endpoint Runtime Cores
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 20:29:22 +03:00
Alexey 6ee4d4648c ME Health Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 20:01:44 +03:00
Alexey 97f6649584 ME Route Runtime Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 19:56:25 +03:00
Alexey dc6b6d3f9d ME Writer Lifecycle Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 19:47:41 +03:00
Alexey 1c3e0d4e46 ME Reinit Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 19:43:02 +03:00
Alexey 0b78583cf5 ME Routing Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 18:18:06 +03:00
Alexey 28d318d724 ME Writer Task Consolidation
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 17:59:54 +03:00
Alexey 70c2f0f045 RoutingTable + BindingState
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 17:50:44 +03:00
Alexey b9b1271f14 Merge pull request #584 from Dimasssss/patch-3
Update CONFIG_PARAMS, QUICK_START_GUIDE and FAQ
2026-03-25 17:44:59 +03:00
Dimasssss 3c734bd811 Update FAQ.en.md 2026-03-25 17:42:16 +03:00
Dimasssss 6391df0583 Update FAQ.ru.md 2026-03-25 17:42:07 +03:00
Dimasssss 6a781c8bc3 Update QUICK_START_GUIDE.en.md 2026-03-25 17:40:45 +03:00
Dimasssss 138652af8e Update QUICK_START_GUIDE.ru.md 2026-03-25 17:40:16 +03:00
Dimasssss 59157d31a6 Update CONFIG_PARAMS.en.md 2026-03-25 17:37:01 +03:00
Alexey 8bab3f70e1 WritersState on ArcSwao + Preferred Endpoint on ArcSwap + Two-map Rotation for Desync Dedup
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 17:25:35 +03:00
Alexey 41d786cc11 Safety Gates Invariants + HybridAsyncPersistent + Watch + Runtime Snapshots + ME Writer Ping Tracker + Parallel Recovery + Backpressure Guardrails
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 16:29:35 +03:00
Alexey c43de1bd2a Update release.yml 2026-03-24 22:36:25 +03:00
Alexey 101efe45b7 Update Dockerfile 2026-03-24 22:36:20 +03:00
Alexey 11df61c6ac Update release.yml 2026-03-24 22:18:34 +03:00
Alexey 08684bcbd2 Update Cargo.toml 2026-03-24 22:03:12 +03:00
Alexey 744fb4425f TLS Validator: Unknown SNI as WARN in Log: merge pull request #579 from telemt/flow
TLS Validator: Unknown SNI as WARN in Log
2026-03-24 22:01:09 +03:00
Alexey 80cb1bc221 Merge branch 'main' into flow 2026-03-24 22:00:51 +03:00
Alexey 8461556b02 Update release.yml 2026-03-24 22:00:32 +03:00
Alexey cfd516edf3 Update Cargo.toml 2026-03-24 21:41:14 +03:00
Alexey 803c2c0492 Update release.yml 2026-03-24 21:40:53 +03:00
Alexey b762bd029f Merge branch 'main' into flow 2026-03-24 21:18:54 +03:00
Alexey 761679d306 Update test.yml 2026-03-24 21:18:13 +03:00
Alexey 41668b153d Update test.yml 2026-03-24 21:14:12 +03:00
Alexey 1d2f88ad29 Merge branch 'main' into flow 2026-03-24 21:11:11 +03:00
Alexey 80917f5abc Update test.yml 2026-03-24 21:10:56 +03:00
Alexey dc61d300ab Bump 2026-03-24 21:02:43 +03:00
Alexey ae16080de5 TLS Validator: Unknown SNI as WARN in Log 2026-03-24 21:01:41 +03:00
Alexey b8ca1fc166 Update Dockerfile 2026-03-24 20:55:32 +03:00
Alexey f9986944df Update release.yml 2026-03-24 20:53:56 +03:00
Alexey cb877c2bc3 Update release profile settings for better optimization: merge pull request #574 from vladon/main
Update release profile settings for better optimization
2026-03-24 14:10:04 +03:00
Vladislav Yaroslavlev 4426082c17 Update release profile settings for better optimization 2026-03-24 14:01:49 +03:00
Alexey 22097f8c7c Update Dockerfile 2026-03-24 11:46:49 +03:00
Alexey 1450af60a0 Update Dockerfile 2026-03-24 11:41:53 +03:00
Alexey f1cc8d65f2 Update release.yml 2026-03-24 11:12:03 +03:00
Alexey ec7e808daf Update release.yml 2026-03-24 11:05:50 +03:00
Alexey e4b7e23e76 New TLS-Fetcher + TLS SNI Validator + Upstream-driver getProxySecret/Config + Workflow Tunings + Redesign Quotas on Atomics + Tests Swap: merge pull request #569 from telemt/flow
New TLS-Fetcher + TLS SNI Validator + Upstream-driver getProxySecret/Config + Workflow Tunings + Redesign Quotas on Atomics + Tests Swap
2026-03-24 10:56:15 +03:00
Alexey 8b92b80b4a Rustks CryptoProvider fixes + Rustfmt 2026-03-24 10:33:06 +03:00
Alexey f7868aa00f Advanced TLS Fetcher
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-24 09:58:24 +03:00
Alexey 655a08fa5c TLS Fetcher fixes 2026-03-23 23:12:50 +03:00
Alexey 8bc432db49 Rustfmt 2026-03-23 23:00:46 +03:00
Alexey a40d6929e5 Upstream-driver getProxyConfig and getProxyConfig
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 22:41:17 +03:00
Alexey 8db566dbe9 TLS Validator
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 21:58:39 +03:00
Alexey bb71de0230 Missing proxy_protocol_trusted_cidrs as trust-
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 20:54:58 +03:00
Alexey 62a258f8e3 Update test.yml 2026-03-23 20:49:17 +03:00
Alexey c868eaae74 Update test.yml 2026-03-23 20:36:25 +03:00
Alexey 8e1860f912 Update test.yml 2026-03-23 20:34:59 +03:00
Alexey 814bef9d99 Rustfmt 2026-03-23 20:32:55 +03:00
Alexey 3ceda15073 Update relay_quota_model_adversarial_tests.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 20:18:18 +03:00
Alexey a3a6ea2880 Update relay_quota_overflow_regression_tests.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 20:06:11 +03:00
Alexey 24156b5067 Workflow for Docker and correct binary naming
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 17:42:18 +03:00
Alexey a1dfa5b11d Merge branch 'flow' of https://github.com/telemt/telemt into flow 2026-03-23 17:05:26 +03:00
Alexey 800356c751 Rewiring tests
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 17:04:47 +03:00
Alexey 1546b012a6 Merge pull request #568 from avbor/main
DOCS: Update VPS_DOUBLE_HOP.*.md - AmneziaWG 2.0
2026-03-23 16:49:57 +03:00
Alexey e6b77af931 Workflows Swap
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 16:49:23 +03:00
Alexey 8cfaab9320 Fixes in tests
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 16:39:49 +03:00
Alexey 2d69b9d0ae New wave of tests
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-23 16:39:23 +03:00
Alexander 41c2b4de65 Update VPS_DOUBLE_HOP.en.md
Added S3-S4 parameters for AWG and update AWG generator.
2026-03-23 16:30:37 +03:00
Alexander 0a5e8a09fd Update VPS_DOUBLE_HOP.ru.md
Added S3-S4 parameters for AWG and update AWG generator.
2026-03-23 16:29:08 +03:00
Alexander e46d2cfc52 Update VPS_DOUBLE_HOP.ru.md
Fix typo
2026-03-22 21:59:20 +03:00
111 changed files with 5843 additions and 2145 deletions
+39
View File
@@ -0,0 +1,39 @@
name: Build
on:
push:
branches: [ "*" ]
pull_request:
branches: [ "*" ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
name: Build
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install latest stable Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Cache cargo registry & build artifacts
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Build Release
run: cargo build --release --verbose
+230 -110
View File
@@ -5,35 +5,87 @@ on:
tags:
- '[0-9]+.[0-9]+.[0-9]+'
workflow_dispatch:
inputs:
tag:
description: 'Release tag (example: 3.3.15)'
required: true
type: string
concurrency:
group: release-${{ github.ref }}
group: release-${{ github.ref_name }}-${{ github.event.inputs.tag || 'auto' }}
cancel-in-progress: true
permissions:
contents: read
packages: write
env:
CARGO_TERM_COLOR: always
BINARY_NAME: telemt
jobs:
# ==========================
# GNU / glibc
# ==========================
build-gnu:
name: GNU ${{ matrix.target }}
prepare:
name: Prepare
runs-on: ubuntu-latest
outputs:
version: ${{ steps.vars.outputs.version }}
prerelease: ${{ steps.vars.outputs.prerelease }}
steps:
- name: Resolve version
id: vars
shell: bash
run: |
set -euo pipefail
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.tag }}"
else
VERSION="${GITHUB_REF#refs/tags/}"
fi
VERSION="${VERSION#refs/tags/}"
if [ -z "${VERSION}" ]; then
echo "Release version is empty" >&2
exit 1
fi
if [[ "${VERSION}" == *-* ]]; then
PRERELEASE=true
else
PRERELEASE=false
fi
echo "version=${VERSION}" >> "${GITHUB_OUTPUT}"
echo "prerelease=${PRERELEASE}" >> "${GITHUB_OUTPUT}"
# ==========================
# GNU / glibc
# ==========================
build-gnu:
name: GNU ${{ matrix.asset }}
runs-on: ubuntu-latest
needs: prepare
container:
image: rust:slim-bookworm
strategy:
fail-fast: false
matrix:
include:
- target: x86_64-unknown-linux-gnu
asset: telemt-x86_64-linux-gnu
cpu: baseline
- target: x86_64-unknown-linux-gnu
asset: telemt-x86_64-v3-linux-gnu
cpu: v3
- target: aarch64-unknown-linux-gnu
asset: telemt-aarch64-linux-gnu
cpu: generic
steps:
- uses: actions/checkout@v4
@@ -47,8 +99,8 @@ jobs:
- name: Install deps
run: |
sudo apt-get update
sudo apt-get install -y \
apt-get update
apt-get install -y \
build-essential \
clang \
lld \
@@ -59,53 +111,65 @@ jobs:
- uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
/usr/local/cargo/registry
/usr/local/cargo/git
target
key: gnu-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
key: gnu-${{ matrix.asset }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
gnu-${{ matrix.asset }}-
gnu-
- name: Build
shell: bash
run: |
set -euo pipefail
if [ "${{ matrix.target }}" = "aarch64-unknown-linux-gnu" ]; then
export CC=aarch64-linux-gnu-gcc
export CXX=aarch64-linux-gnu-g++
export CC_aarch64_unknown_linux_gnu=aarch64-linux-gnu-gcc
export CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++
export RUSTFLAGS="-C linker=aarch64-linux-gnu-gcc"
export RUSTFLAGS="-C linker=aarch64-linux-gnu-gcc -C lto=fat -C panic=abort"
else
export CC=clang
export CXX=clang++
export CC_x86_64_unknown_linux_gnu=clang
export CXX_x86_64_unknown_linux_gnu=clang++
export RUSTFLAGS="-C linker=clang -C link-arg=-fuse-ld=lld"
if [ "${{ matrix.cpu }}" = "v3" ]; then
CPU_FLAGS="-C target-cpu=x86-64-v3"
else
CPU_FLAGS="-C target-cpu=x86-64"
fi
export RUSTFLAGS="-C linker=clang -C link-arg=-fuse-ld=lld -C lto=fat -C panic=abort ${CPU_FLAGS}"
fi
cargo build --release --target ${{ matrix.target }}
cargo build --release --target ${{ matrix.target }} -j "$(nproc)"
- name: Package
shell: bash
run: |
mkdir -p dist
BIN=target/${{ matrix.target }}/release/${{ env.BINARY_NAME }}
set -euo pipefail
cp "$BIN" dist/${{ env.BINARY_NAME }}-${{ matrix.target }}
mkdir -p dist
cp "target/${{ matrix.target }}/release/${{ env.BINARY_NAME }}" dist/telemt
cd dist
tar -czf ${{ matrix.asset }}.tar.gz ${{ env.BINARY_NAME }}-${{ matrix.target }}
sha256sum ${{ matrix.asset }}.tar.gz > ${{ matrix.asset }}.sha256
tar -czf "${{ matrix.asset }}.tar.gz" \
--owner=0 --group=0 --numeric-owner \
telemt
sha256sum "${{ matrix.asset }}.tar.gz" > "${{ matrix.asset }}.tar.gz.sha256"
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.asset }}
path: |
dist/${{ matrix.asset }}.tar.gz
dist/${{ matrix.asset }}.sha256
path: dist/*
# ==========================
# MUSL
# ==========================
# ==========================
# MUSL
# ==========================
build-musl:
name: MUSL ${{ matrix.target }}
name: MUSL ${{ matrix.asset }}
runs-on: ubuntu-latest
needs: prepare
container:
image: rust:slim-bookworm
@@ -116,8 +180,15 @@ jobs:
include:
- target: x86_64-unknown-linux-musl
asset: telemt-x86_64-linux-musl
cpu: baseline
- target: x86_64-unknown-linux-musl
asset: telemt-x86_64-v3-linux-musl
cpu: v3
- target: aarch64-unknown-linux-musl
asset: telemt-aarch64-linux-musl
cpu: generic
steps:
- uses: actions/checkout@v4
@@ -138,30 +209,29 @@ jobs:
- name: Install aarch64 musl toolchain
if: matrix.target == 'aarch64-unknown-linux-musl'
shell: bash
run: |
set -e
set -euo pipefail
TOOLCHAIN_DIR="$HOME/.musl-aarch64"
ARCHIVE="aarch64-linux-musl-cross.tgz"
URL="https://github.com/telemt/telemt/releases/download/toolchains/$ARCHIVE"
URL="https://github.com/telemt/telemt/releases/download/toolchains/${ARCHIVE}"
if [ -x "$TOOLCHAIN_DIR/bin/aarch64-linux-musl-gcc" ]; then
echo "MUSL toolchain already installed"
if [ -x "${TOOLCHAIN_DIR}/bin/aarch64-linux-musl-gcc" ]; then
echo "MUSL toolchain cached"
else
echo "⬇️ Downloading musl toolchain from Telemt GitHub Releases..."
curl -fL \
--retry 5 \
--retry-delay 3 \
--connect-timeout 10 \
--max-time 120 \
-o "$ARCHIVE" "$URL"
-o "${ARCHIVE}" "${URL}"
mkdir -p "$TOOLCHAIN_DIR"
tar -xzf "$ARCHIVE" --strip-components=1 -C "$TOOLCHAIN_DIR"
mkdir -p "${TOOLCHAIN_DIR}"
tar -xzf "${ARCHIVE}" --strip-components=1 -C "${TOOLCHAIN_DIR}"
fi
echo "$TOOLCHAIN_DIR/bin" >> $GITHUB_PATH
echo "${TOOLCHAIN_DIR}/bin" >> "${GITHUB_PATH}"
- name: Add rust target
run: rustup target add ${{ matrix.target }}
@@ -172,96 +242,62 @@ jobs:
/usr/local/cargo/registry
/usr/local/cargo/git
target
key: musl-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
key: musl-${{ matrix.asset }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
musl-${{ matrix.asset }}-
musl-
- name: Build
shell: bash
run: |
set -euo pipefail
if [ "${{ matrix.target }}" = "aarch64-unknown-linux-musl" ]; then
export CC=aarch64-linux-musl-gcc
export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc
export RUSTFLAGS="-C target-feature=+crt-static -C linker=aarch64-linux-musl-gcc"
export RUSTFLAGS="-C target-feature=+crt-static -C linker=aarch64-linux-musl-gcc -C lto=fat -C panic=abort"
else
export CC=musl-gcc
export CC_x86_64_unknown_linux_musl=musl-gcc
export RUSTFLAGS="-C target-feature=+crt-static"
if [ "${{ matrix.cpu }}" = "v3" ]; then
CPU_FLAGS="-C target-cpu=x86-64-v3"
else
CPU_FLAGS="-C target-cpu=x86-64"
fi
export RUSTFLAGS="-C target-feature=+crt-static -C lto=fat -C panic=abort ${CPU_FLAGS}"
fi
cargo build --release --target ${{ matrix.target }}
cargo build --release --target ${{ matrix.target }} -j "$(nproc)"
- name: Package
shell: bash
run: |
mkdir -p dist
BIN=target/${{ matrix.target }}/release/${{ env.BINARY_NAME }}
set -euo pipefail
cp "$BIN" dist/${{ env.BINARY_NAME }}-${{ matrix.target }}
mkdir -p dist
cp "target/${{ matrix.target }}/release/${{ env.BINARY_NAME }}" dist/telemt
cd dist
tar -czf ${{ matrix.asset }}.tar.gz ${{ env.BINARY_NAME }}-${{ matrix.target }}
sha256sum ${{ matrix.asset }}.tar.gz > ${{ matrix.asset }}.sha256
tar -czf "${{ matrix.asset }}.tar.gz" \
--owner=0 --group=0 --numeric-owner \
telemt
sha256sum "${{ matrix.asset }}.tar.gz" > "${{ matrix.asset }}.tar.gz.sha256"
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.asset }}
path: |
dist/${{ matrix.asset }}.tar.gz
dist/${{ matrix.asset }}.sha256
path: dist/*
# ==========================
# Docker
# ==========================
docker:
name: Docker
runs-on: ubuntu-latest
needs: [build-gnu, build-musl]
continue-on-error: true
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
with:
path: artifacts
- name: Extract binaries
run: |
mkdir dist
find artifacts -name "*.tar.gz" -exec tar -xzf {} -C dist \;
cp dist/telemt-x86_64-unknown-linux-musl dist/telemt || true
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- name: Login to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract version
id: vars
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
- name: Build & Push
uses: docker/build-push-action@v6
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: |
ghcr.io/${{ github.repository }}:${{ steps.vars.outputs.VERSION }}
ghcr.io/${{ github.repository }}:latest
build-args: |
BINARY=dist/telemt
# ==========================
# Release
# ==========================
# ==========================
# Release
# ==========================
release:
name: Release
runs-on: ubuntu-latest
needs: [build-gnu, build-musl]
needs: [prepare, build-gnu, build-musl]
permissions:
contents: write
@@ -272,14 +308,98 @@ jobs:
path: artifacts
- name: Flatten artifacts
shell: bash
run: |
mkdir dist
set -euo pipefail
mkdir -p dist
find artifacts -type f -exec cp {} dist/ \;
- name: Create Release
- name: Create GitHub Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ needs.prepare.outputs.version }}
target_commitish: ${{ github.sha }}
files: dist/*
generate_release_notes: true
draft: false
prerelease: ${{ contains(github.ref, '-rc') || contains(github.ref, '-beta') || contains(github.ref, '-alpha') }}
prerelease: ${{ needs.prepare.outputs.prerelease == 'true' }}
overwrite_files: true
# ==========================
# Docker
# ==========================
docker:
name: Docker
runs-on: ubuntu-latest
needs: [prepare, release]
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v4
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Probe release assets
shell: bash
env:
VERSION: ${{ needs.prepare.outputs.version }}
run: |
set -euo pipefail
for asset in \
telemt-x86_64-linux-musl.tar.gz \
telemt-x86_64-linux-musl.tar.gz.sha256 \
telemt-aarch64-linux-musl.tar.gz \
telemt-aarch64-linux-musl.tar.gz.sha256
do
curl -fsIL \
--retry 10 \
--retry-delay 3 \
"https://github.com/${GITHUB_REPOSITORY}/releases/download/${VERSION}/${asset}" \
> /dev/null
done
- name: Compute image tags
id: meta
shell: bash
env:
VERSION: ${{ needs.prepare.outputs.version }}
run: |
set -euo pipefail
IMAGE="$(echo "ghcr.io/${GITHUB_REPOSITORY}" | tr '[:upper:]' '[:lower:]')"
TAGS="${IMAGE}:${VERSION}"
if [[ "${VERSION}" != *-* ]]; then
TAGS="${TAGS}"$'\n'"${IMAGE}:latest"
fi
{
echo "tags<<EOF"
printf '%s\n' "${TAGS}"
echo "EOF"
} >> "${GITHUB_OUTPUT}"
- name: Build & Push
uses: docker/build-push-action@v6
with:
context: .
push: true
pull: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
build-args: |
TELEMT_REPOSITORY=${{ github.repository }}
TELEMT_VERSION=${{ needs.prepare.outputs.version }}
cache-from: type=gha
cache-to: type=gha,mode=max
-54
View File
@@ -1,54 +0,0 @@
name: Rust
on:
push:
branches: [ "*" ]
pull_request:
branches: [ "*" ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
name: Compile, Test, Lint
runs-on: ubuntu-latest
permissions:
contents: read
actions: write
checks: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install latest stable Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
- name: Cache cargo registry & build artifacts
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Compile (no tests)
run: cargo check --workspace --all-features --lib --bins --verbose
- name: Run tests (single pass)
run: cargo test --workspace --all-features --verbose
# clippy dont fail on warnings because of active development of telemt
# and many warnings
- name: Run clippy
run: cargo clippy -- --cap-lints warn
- name: Check for unused dependencies
run: cargo udeps || true
-57
View File
@@ -1,57 +0,0 @@
name: Stress Tests
on:
workflow_dispatch:
schedule:
- cron: '0 2 * * *'
pull_request:
branches: ["*"]
paths:
- src/proxy/**
- src/transport/**
- src/stream/**
- src/protocol/**
- src/tls_front/**
- Cargo.toml
- Cargo.lock
env:
CARGO_TERM_COLOR: always
jobs:
quota-lock-stress:
name: Quota-lock stress loop
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install latest stable Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Cache cargo registry and build artifacts
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-stress-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-stress-
${{ runner.os }}-cargo-
- name: Run quota-lock stress suites
env:
RUST_TEST_THREADS: 16
run: |
set -euo pipefail
for i in $(seq 1 12); do
echo "[quota-lock-stress] iteration ${i}/12"
cargo test quota_lock_ --bin telemt -- --nocapture --test-threads 16
cargo test relay_quota_wake --bin telemt -- --nocapture --test-threads 16
done
+139
View File
@@ -0,0 +1,139 @@
name: Check
on:
push:
branches: [ "*" ]
pull_request:
branches: [ "*" ]
env:
CARGO_TERM_COLOR: always
concurrency:
group: test-${{ github.ref }}
cancel-in-progress: true
jobs:
# ==========================
# Formatting
# ==========================
fmt:
name: Fmt
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt
- run: cargo fmt -- --check
# ==========================
# Tests
# ==========================
test:
name: Test
runs-on: ubuntu-latest
permissions:
contents: read
actions: write
checks: write
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- name: Cache cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/bin
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-nextest-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-nextest-
${{ runner.os }}-cargo-
- name: Install cargo-nextest
run: cargo install --locked cargo-nextest || true
- name: Run tests with nextest
run: cargo nextest run -j "$(nproc)"
# ==========================
# Clippy
# ==========================
clippy:
name: Clippy
runs-on: ubuntu-latest
permissions:
contents: read
checks: write
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
components: clippy
- name: Cache cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-clippy-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-clippy-
${{ runner.os }}-cargo-
- name: Run clippy
run: cargo clippy -j "$(nproc)" -- --cap-lints warn
# ==========================
# Udeps
# ==========================
udeps:
name: Udeps
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
components: rust-src
- name: Cache cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/bin
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-udeps-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-udeps-
${{ runner.os }}-cargo-
- name: Install cargo-udeps
run: cargo install --locked cargo-udeps || true
- name: Run udeps
run: cargo udeps -j "$(nproc)" || true
Generated
+1 -1
View File
@@ -2793,7 +2793,7 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
[[package]]
name = "telemt"
version = "3.3.30"
version = "3.3.32"
dependencies = [
"aes",
"anyhow",
+4 -2
View File
@@ -1,6 +1,6 @@
[package]
name = "telemt"
version = "3.3.30"
version = "3.3.32"
edition = "2024"
[features]
@@ -83,4 +83,6 @@ name = "crypto_bench"
harness = false
[profile.release]
lto = "thin"
lto = "fat"
codegen-units = 1
+60 -73
View File
@@ -1,111 +1,98 @@
# syntax=docker/dockerfile:1
# ==========================
# Stage 1: Build
# ==========================
FROM rust:1.88-slim-bookworm AS builder
RUN apt-get update && apt-get install -y --no-install-recommends \
pkg-config \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /build
# Depcache
COPY Cargo.toml Cargo.lock* ./
RUN mkdir src && echo 'fn main() {}' > src/main.rs && \
cargo build --release 2>/dev/null || true && \
rm -rf src
# Build
COPY . .
RUN cargo build --release && strip target/release/telemt
ARG TELEMT_REPOSITORY=telemt/telemt
ARG TELEMT_VERSION=latest
# ==========================
# Stage 2: Compress (strip + UPX)
# Minimal Image
# ==========================
FROM debian:12-slim AS minimal
RUN apt-get update && apt-get install -y --no-install-recommends \
binutils \
curl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* \
\
# install UPX from Telemt releases
&& curl -fL \
ARG TARGETARCH
ARG TELEMT_REPOSITORY
ARG TELEMT_VERSION
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
binutils \
ca-certificates \
curl \
tar; \
rm -rf /var/lib/apt/lists/*
RUN set -eux; \
case "${TARGETARCH}" in \
amd64) ASSET="telemt-x86_64-linux-musl.tar.gz" ;; \
arm64) ASSET="telemt-aarch64-linux-musl.tar.gz" ;; \
*) echo "Unsupported TARGETARCH: ${TARGETARCH}" >&2; exit 1 ;; \
esac; \
VERSION="${TELEMT_VERSION#refs/tags/}"; \
if [ -z "${VERSION}" ] || [ "${VERSION}" = "latest" ]; then \
BASE_URL="https://github.com/${TELEMT_REPOSITORY}/releases/latest/download"; \
else \
BASE_URL="https://github.com/${TELEMT_REPOSITORY}/releases/download/${VERSION}"; \
fi; \
curl -fL \
--retry 5 \
--retry-delay 3 \
--connect-timeout 10 \
--max-time 120 \
-o /tmp/upx.tar.xz \
https://github.com/telemt/telemt/releases/download/toolchains/upx-amd64_linux.tar.xz \
&& tar -xf /tmp/upx.tar.xz -C /tmp \
&& mv /tmp/upx*/upx /usr/local/bin/upx \
&& chmod +x /usr/local/bin/upx \
&& rm -rf /tmp/upx*
COPY --from=builder /build/target/release/telemt /telemt
RUN strip /telemt || true
RUN upx --best --lzma /telemt || true
-o "/tmp/${ASSET}" \
"${BASE_URL}/${ASSET}"; \
curl -fL \
--retry 5 \
--retry-delay 3 \
--connect-timeout 10 \
--max-time 120 \
-o "/tmp/${ASSET}.sha256" \
"${BASE_URL}/${ASSET}.sha256"; \
cd /tmp; \
sha256sum -c "${ASSET}.sha256"; \
tar -xzf "${ASSET}" -C /tmp; \
test -f /tmp/telemt; \
install -m 0755 /tmp/telemt /telemt; \
strip --strip-unneeded /telemt || true; \
rm -f "/tmp/${ASSET}" "/tmp/${ASSET}.sha256" /tmp/telemt
# ==========================
# Stage 3: Debug base
# Debug Image
# ==========================
FROM debian:12-slim AS debug-base
FROM debian:12-slim AS debug
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
tzdata \
curl \
iproute2 \
busybox \
&& rm -rf /var/lib/apt/lists/*
# ==========================
# Stage 4: Debug image
# ==========================
FROM debug-base AS debug
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates \
tzdata \
curl \
iproute2 \
busybox; \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY --from=minimal /telemt /app/telemt
COPY config.toml /app/config.toml
USER root
EXPOSE 443
EXPOSE 9090
EXPOSE 9091
EXPOSE 443 9090 9091
ENTRYPOINT ["/app/telemt"]
CMD ["config.toml"]
# ==========================
# Stage 5: Production (distroless)
# Production Distroless on MUSL
# ==========================
FROM gcr.io/distroless/base-debian12 AS prod
FROM gcr.io/distroless/static-debian12 AS prod
WORKDIR /app
COPY --from=minimal /telemt /app/telemt
COPY config.toml /app/config.toml
# TLS + timezone + shell
COPY --from=debug-base /etc/ssl/certs /etc/ssl/certs
COPY --from=debug-base /usr/share/zoneinfo /usr/share/zoneinfo
COPY --from=debug-base /bin/busybox /bin/busybox
RUN ["/bin/busybox", "--install", "-s", "/bin"]
# distroless user
USER nonroot:nonroot
EXPOSE 443
EXPOSE 9090
EXPOSE 9091
EXPOSE 443 9090 9091
ENTRYPOINT ["/app/telemt"]
CMD ["config.toml"]
+21
View File
@@ -50,6 +50,8 @@ This document lists all configuration keys accepted by `config.toml`.
| me_d2c_flush_batch_max_bytes | `usize` | `131072` | `4096..=2_097_152`. | Max ME->client payload bytes coalesced before flush. |
| me_d2c_flush_batch_max_delay_us | `u64` | `500` | `0..=5000`. | Max microsecond wait for coalescing more ME->client frames (`0` disables timed coalescing). |
| me_d2c_ack_flush_immediate | `bool` | `true` | — | Flushes client writer immediately after quick-ack write. |
| me_quota_soft_overshoot_bytes | `u64` | `65536` | `0..=16_777_216`. | Extra per-route quota allowance (bytes) tolerated before writer-side quota enforcement drops route data. |
| me_d2c_frame_buf_shrink_threshold_bytes | `usize` | `262144` | `4096..=16_777_216`. | Threshold for shrinking oversized ME->client frame-aggregation buffers after flush. |
| direct_relay_copy_buf_c2s_bytes | `usize` | `65536` | `4096..=1_048_576`. | Copy buffer size for client->DC direction in direct relay. |
| direct_relay_copy_buf_s2c_bytes | `usize` | `262144` | `8192..=2_097_152`. | Copy buffer size for DC->client direction in direct relay. |
| crypto_pending_buffer | `usize` | `262144` | — | Max pending ciphertext buffer per client writer (bytes). |
@@ -243,6 +245,10 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
| Parameter | Type | Default | Constraints / validation | Description |
|---|---|---|---|---|
| client_handshake | `u64` | `30` | — | Client handshake timeout. |
| relay_idle_policy_v2_enabled | `bool` | `true` | — | Enables soft/hard middle-relay client idle policy. |
| relay_client_idle_soft_secs | `u64` | `120` | Must be `> 0`; must be `<= relay_client_idle_hard_secs`. | Soft idle threshold for middle-relay client uplink inactivity (seconds). |
| relay_client_idle_hard_secs | `u64` | `360` | Must be `> 0`; must be `>= relay_client_idle_soft_secs`. | Hard idle threshold for middle-relay client uplink inactivity (seconds). |
| relay_idle_grace_after_downstream_activity_secs | `u64` | `30` | Must be `<= relay_client_idle_hard_secs`. | Extra hard-idle grace after recent downstream activity (seconds). |
| tg_connect | `u64` | `10` | — | Upstream Telegram connect timeout. |
| client_keepalive | `u64` | `15` | — | Client keepalive timeout. |
| client_ack | `u64` | `90` | — | Client ACK timeout. |
@@ -255,6 +261,9 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
|---|---|---|---|---|
| tls_domain | `String` | `"petrovich.ru"` | — | Primary TLS domain used in fake TLS handshake profile. |
| tls_domains | `String[]` | `[]` | — | Additional TLS domains for generating multiple links. |
| unknown_sni_action | `"drop" \| "mask"` | `"drop"` | — | Action for TLS ClientHello with unknown/non-configured SNI. |
| tls_fetch_scope | `String` | `""` | Value is trimmed during load; empty keeps default upstream routing behavior. | Upstream scope tag used for TLS-front metadata fetches. |
| tls_fetch | `Table` | built-in defaults | See `[censorship.tls_fetch]` section below. | TLS-front metadata fetch strategy settings. |
| mask | `bool` | `true` | — | Enables masking/fronting relay mode. |
| mask_host | `String \| null` | `null` | — | Upstream mask host for TLS fronting relay. |
| mask_port | `u16` | `443` | — | Upstream mask port for TLS fronting relay. |
@@ -280,6 +289,18 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
| mask_timing_normalization_floor_ms | `u64` | `0` | Must be `> 0` when timing normalization is enabled; must be `<= ceiling`. | Lower bound (ms) for masking outcome normalization target. |
| mask_timing_normalization_ceiling_ms | `u64` | `0` | Must be `>= floor`; must be `<= 60000`. | Upper bound (ms) for masking outcome normalization target. |
## [censorship.tls_fetch]
| Parameter | Type | Default | Constraints / validation | Description |
|---|---|---|---|---|
| profiles | `("modern_chrome_like" \| "modern_firefox_like" \| "compat_tls12" \| "legacy_minimal")[]` | `["modern_chrome_like", "modern_firefox_like", "compat_tls12", "legacy_minimal"]` | Empty list falls back to defaults; values are deduplicated preserving order. | Ordered ClientHello profile fallback chain for TLS-front metadata fetch. |
| strict_route | `bool` | `true` | — | Fails closed on upstream-route connect errors instead of falling back to direct TCP when route is configured. |
| attempt_timeout_ms | `u64` | `5000` | Must be `> 0`. | Timeout budget per one TLS-fetch profile attempt (ms). |
| total_budget_ms | `u64` | `15000` | Must be `> 0`. | Total wall-clock budget across all TLS-fetch attempts (ms). |
| grease_enabled | `bool` | `false` | — | Enables GREASE-style random values in selected ClientHello extensions for fetch traffic. |
| deterministic | `bool` | `false` | — | Enables deterministic ClientHello randomness for debugging/tests. |
| profile_cache_ttl_secs | `u64` | `600` | `0` disables cache. | TTL for winner-profile cache entries used by TLS fetch path. |
### Shape-channel hardening notes (`[censorship]`)
These parameters are designed to reduce one specific fingerprint source during masking: the exact number of bytes sent from proxy to `mask_host` for invalid or probing traffic.
+4 -1
View File
@@ -63,9 +63,12 @@ user3 = "00000000000000000000000000000003"
curl -s http://127.0.0.1:9091/v1/users | jq
```
## "Unknown TLS SNI" Error
You probably updated tls_domain, but users are still connecting via old links with the previous domain.
## How to view metrics
1. Open the config `nano /etc/telemt.toml`
1. Open the config `nano /etc/telemt/telemt.toml`
2. Add the following parameters
```toml
[server]
+4 -1
View File
@@ -64,9 +64,12 @@ user3 = "00000000000000000000000000000003"
curl -s http://127.0.0.1:9091/v1/users | jq
```
## Ошибка "Unknown TLS SNI"
Возможно, вы обновили tls_domain, но пользователи всё ещё пытаются подключаться по старым ссылкам с прежним доменом.
## Как посмотреть метрики
1. Открыть конфиг `nano /etc/telemt.toml`
1. Открыть конфиг `nano /etc/telemt/telemt.toml`
2. Добавить следующие параметры
```toml
[server]
+13 -9
View File
@@ -27,12 +27,12 @@ chmod +x /bin/telemt
**0. Check port and generate secrets**
The port you have selected for use should be MISSING from the list, when:
The port you have selected for use should not be in the list:
```bash
netstat -lnp
```
Generate 16 bytes/32 characters HEX with OpenSSL or another way:
Generate 16 bytes/32 characters in HEX format with OpenSSL or another way:
```bash
openssl rand -hex 16
```
@@ -50,7 +50,7 @@ Save the obtained result somewhere. You will need it later!
**1. Place your config to /etc/telemt/telemt.toml**
Create config directory:
Create the config directory:
```bash
mkdir /etc/telemt
```
@@ -59,7 +59,7 @@ Open nano
```bash
nano /etc/telemt/telemt.toml
```
paste your config
Insert your configuration:
```toml
# === General Settings ===
@@ -93,8 +93,9 @@ hello = "00000000000000000000000000000000"
then Ctrl+S -> Ctrl+X to save
> [!WARNING]
> Replace the value of the hello parameter with the value you obtained in step 0.
> Replace the value of the tls_domain parameter with another website.
> Replace the value of the hello parameter with the value you obtained in step 0.
> Additionally, change the value of the tls_domain parameter to a different website.
> Changing the tls_domain parameter will break all links that use the old domain!
---
@@ -105,14 +106,14 @@ useradd -d /opt/telemt -m -r -U telemt
chown -R telemt:telemt /etc/telemt
```
**3. Create service on /etc/systemd/system/telemt.service**
**3. Create service in /etc/systemd/system/telemt.service**
Open nano
```bash
nano /etc/systemd/system/telemt.service
```
paste this Systemd Module
Insert this Systemd module:
```bash
[Unit]
Description=Telemt
@@ -147,13 +148,16 @@ systemctl daemon-reload
**6.** For automatic startup at system boot, enter `systemctl enable telemt`
**7.** To get the link(s), enter
**7.** To get the link(s), enter:
```bash
curl -s http://127.0.0.1:9091/v1/users | jq
```
> Any number of people can use one link.
> [!WARNING]
> Only the command from step 7 can provide a working link. Do not try to create it yourself or copy it from anywhere if you are not sure what you are doing!
---
# Telemt via Docker Compose
+1
View File
@@ -95,6 +95,7 @@ hello = "00000000000000000000000000000000"
> [!WARNING]
> Замените значение параметра hello на значение, которое вы получили в пункте 0.
> Так же замените значение параметра tls_domain на другой сайт.
> Изменение параметра tls_domain сделает нерабочими все ссылки, использующие старый домен!
---
+5 -1
View File
@@ -63,7 +63,7 @@ recommended range from 5 to 2147483647 inclusive
> [!IMPORTANT]
> It is recommended to use your own, unique values.\
> You can use the [generator](https://htmlpreview.github.io/?https://gist.githubusercontent.com/avbor/955782b5c37b06240b243aa375baeac5/raw/e8b269ff0089a27effd88f8d925179b78e5666c4/awg-gen.html) to select parameters.
> You can use the [generator](https://htmlpreview.github.io/?https://gist.githubusercontent.com/avbor/955782b5c37b06240b243aa375baeac5/raw/13f5517ca473b47c412b9a99407066de973732bd/awg-gen.html) to select parameters.
#### Server B Configuration (Netherlands):
@@ -84,6 +84,8 @@ Jmin = 8
Jmax = 80
S1 = 29
S2 = 15
S3 = 18
S4 = 0
H1 = 2087563914
H2 = 188817757
H3 = 101784570
@@ -121,6 +123,8 @@ Jmin = 8
Jmax = 80
S1 = 29
S2 = 15
S3 = 18
S4 = 0
H1 = 2087563914
H2 = 188817757
H3 = 101784570
+7 -3
View File
@@ -44,7 +44,7 @@ awg genkey | tee private.key | awg pubkey > public.key
Параметры обфускации `S1`, `S2`, `H1`, `H2`, `H3`, `H4` должны быть строго идентичными на обоих серверах.\
Параметры `Jc`, `Jmin` и `Jmax` могут отличатся.\
Параметры `I1-I5` [(Custom Protocol Signature)](https://docs.amnezia.org/documentation/amnezia-wg/) нужно указывать на стороне _клиента_ (Сервер **А**).
Параметры `I1-I5` ([Custom Protocol Signature](https://docs.amnezia.org/documentation/amnezia-wg/)) нужно указывать на стороне _клиента_ (Сервер **А**).
Рекомендации по выбору значений:
```text
@@ -62,7 +62,7 @@ H1/H2/H3/H4 — должны быть уникальны и отличаться
```
> [!IMPORTANT]
> Рекомендуется использовать собственные, уникальные значения.\
> Для выбора параметров можете воспользоваться [генератором](https://htmlpreview.github.io/?https://gist.githubusercontent.com/avbor/955782b5c37b06240b243aa375baeac5/raw/e8b269ff0089a27effd88f8d925179b78e5666c4/awg-gen.html).
> Для выбора параметров можете воспользоваться [генератором](https://htmlpreview.github.io/?https://gist.githubusercontent.com/avbor/955782b5c37b06240b243aa375baeac5/raw/13f5517ca473b47c412b9a99407066de973732bd/awg-gen.html).
#### Конфигурация Сервера B (_Нидерланды_):
@@ -83,6 +83,8 @@ Jmin = 8
Jmax = 80
S1 = 29
S2 = 15
S3 = 18
S4 = 0
H1 = 2087563914
H2 = 188817757
H3 = 101784570
@@ -121,6 +123,8 @@ Jmin = 8
Jmax = 80
S1 = 29
S2 = 15
S3 = 18
S4 = 0
H1 = 2087563914
H2 = 188817757
H3 = 101784570
@@ -272,7 +276,7 @@ backend telemt_nodes
```
>[!WARNING]
>**Файл должен заканчиваться пустой строкой, иначе HAProxy не запуститься!**
>**Файл должен заканчиваться пустой строкой, иначе HAProxy не запустится!**
#### Разрешаем порт 443\tcp в фаерволе (если включен)
```bash
+18
View File
@@ -35,11 +35,14 @@ pub(super) struct RuntimeGatesData {
pub(super) conditional_cast_enabled: bool,
pub(super) me_runtime_ready: bool,
pub(super) me2dc_fallback_enabled: bool,
pub(super) me2dc_fast_enabled: bool,
pub(super) use_middle_proxy: bool,
pub(super) route_mode: &'static str,
pub(super) reroute_active: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reroute_to_direct_at_epoch_secs: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reroute_reason: Option<&'static str>,
pub(super) startup_status: &'static str,
pub(super) startup_stage: String,
pub(super) startup_progress_pct: f64,
@@ -86,6 +89,7 @@ pub(super) struct EffectiveMiddleProxyLimits {
pub(super) writer_pick_mode: &'static str,
pub(super) writer_pick_sample_size: u8,
pub(super) me2dc_fallback: bool,
pub(super) me2dc_fast: bool,
}
#[derive(Serialize)]
@@ -169,6 +173,8 @@ pub(super) async fn build_runtime_gates_data(
let startup_summary = build_runtime_startup_summary(shared).await;
let route_state = shared.route_runtime.snapshot();
let route_mode = route_state.mode.as_str();
let fast_fallback_enabled =
cfg.general.use_middle_proxy && cfg.general.me2dc_fallback && cfg.general.me2dc_fast;
let reroute_active = cfg.general.use_middle_proxy
&& cfg.general.me2dc_fallback
&& matches!(route_state.mode, RelayRouteMode::Direct);
@@ -177,6 +183,15 @@ pub(super) async fn build_runtime_gates_data(
} else {
None
};
let reroute_reason = if reroute_active {
if fast_fallback_enabled {
Some("fast_not_ready_fallback")
} else {
Some("strict_grace_fallback")
}
} else {
None
};
let me_runtime_ready = if !cfg.general.use_middle_proxy {
true
} else {
@@ -194,10 +209,12 @@ pub(super) async fn build_runtime_gates_data(
conditional_cast_enabled: cfg.general.use_middle_proxy,
me_runtime_ready,
me2dc_fallback_enabled: cfg.general.me2dc_fallback,
me2dc_fast_enabled: fast_fallback_enabled,
use_middle_proxy: cfg.general.use_middle_proxy,
route_mode,
reroute_active,
reroute_to_direct_at_epoch_secs,
reroute_reason,
startup_status: startup_summary.status,
startup_stage: startup_summary.stage,
startup_progress_pct: startup_summary.progress_pct,
@@ -263,6 +280,7 @@ pub(super) fn build_limits_effective_data(cfg: &ProxyConfig) -> EffectiveLimitsD
writer_pick_mode: me_writer_pick_mode_label(cfg.general.me_writer_pick_mode),
writer_pick_sample_size: cfg.general.me_writer_pick_sample_size,
me2dc_fallback: cfg.general.me2dc_fallback,
me2dc_fast: cfg.general.me2dc_fast,
},
user_ip_policy: EffectiveUserIpPolicyLimits {
global_each: cfg.access.user_max_unique_ips_global_each,
+24
View File
@@ -71,6 +71,22 @@ pub(crate) fn default_tls_fetch_scope() -> String {
String::new()
}
pub(crate) fn default_tls_fetch_attempt_timeout_ms() -> u64 {
5_000
}
pub(crate) fn default_tls_fetch_total_budget_ms() -> u64 {
15_000
}
pub(crate) fn default_tls_fetch_strict_route() -> bool {
true
}
pub(crate) fn default_tls_fetch_profile_cache_ttl_secs() -> u64 {
600
}
pub(crate) fn default_mask_port() -> u16 {
443
}
@@ -185,6 +201,10 @@ pub(crate) fn default_proxy_protocol_header_timeout_ms() -> u64 {
500
}
pub(crate) fn default_proxy_protocol_trusted_cidrs() -> Vec<IpNetwork> {
vec!["0.0.0.0/0".parse().unwrap(), "::/0".parse().unwrap()]
}
pub(crate) fn default_server_max_connections() -> u32 {
10_000
}
@@ -253,6 +273,10 @@ pub(crate) fn default_me2dc_fallback() -> bool {
true
}
pub(crate) fn default_me2dc_fast() -> bool {
false
}
pub(crate) fn default_keepalive_interval() -> u64 {
8
}
+10 -3
View File
@@ -228,7 +228,9 @@ impl HotFields {
me_d2c_flush_batch_max_delay_us: cfg.general.me_d2c_flush_batch_max_delay_us,
me_d2c_ack_flush_immediate: cfg.general.me_d2c_ack_flush_immediate,
me_quota_soft_overshoot_bytes: cfg.general.me_quota_soft_overshoot_bytes,
me_d2c_frame_buf_shrink_threshold_bytes: cfg.general.me_d2c_frame_buf_shrink_threshold_bytes,
me_d2c_frame_buf_shrink_threshold_bytes: cfg
.general
.me_d2c_frame_buf_shrink_threshold_bytes,
direct_relay_copy_buf_c2s_bytes: cfg.general.direct_relay_copy_buf_c2s_bytes,
direct_relay_copy_buf_s2c_bytes: cfg.general.direct_relay_copy_buf_s2c_bytes,
me_health_interval_ms_unhealthy: cfg.general.me_health_interval_ms_unhealthy,
@@ -649,6 +651,9 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
}
if old.general.me_route_no_writer_mode != new.general.me_route_no_writer_mode
|| old.general.me_route_no_writer_wait_ms != new.general.me_route_no_writer_wait_ms
|| old.general.me_route_hybrid_max_wait_ms != new.general.me_route_hybrid_max_wait_ms
|| old.general.me_route_blocking_send_timeout_ms
!= new.general.me_route_blocking_send_timeout_ms
|| old.general.me_route_inline_recovery_attempts
!= new.general.me_route_inline_recovery_attempts
|| old.general.me_route_inline_recovery_wait_ms
@@ -667,9 +672,11 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
warned = true;
warn!("config reload: general.me_init_retry_attempts changed; restart required");
}
if old.general.me2dc_fallback != new.general.me2dc_fallback {
if old.general.me2dc_fallback != new.general.me2dc_fallback
|| old.general.me2dc_fast != new.general.me2dc_fast
{
warned = true;
warn!("config reload: general.me2dc_fallback changed; restart required");
warn!("config reload: general.me2dc_fallback/me2dc_fast changed; restart required");
}
if old.general.proxy_config_v4_cache_path != new.general.proxy_config_v4_cache_path
|| old.general.proxy_config_v6_cache_path != new.general.proxy_config_v6_cache_path
+199 -4
View File
@@ -1,6 +1,6 @@
#![allow(deprecated)]
use std::collections::{BTreeSet, HashMap};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::net::{IpAddr, SocketAddr};
use std::path::{Path, PathBuf};
@@ -444,8 +444,7 @@ impl ProxyConfig {
if !(5..=50).contains(&config.censorship.mask_classifier_prefetch_timeout_ms) {
return Err(ProxyError::Config(
"censorship.mask_classifier_prefetch_timeout_ms must be within [5, 50]"
.to_string(),
"censorship.mask_classifier_prefetch_timeout_ms must be within [5, 50]".to_string(),
));
}
@@ -558,7 +557,9 @@ impl ProxyConfig {
));
}
if !(4096..=16 * 1024 * 1024).contains(&config.general.me_d2c_frame_buf_shrink_threshold_bytes) {
if !(4096..=16 * 1024 * 1024)
.contains(&config.general.me_d2c_frame_buf_shrink_threshold_bytes)
{
return Err(ProxyError::Config(
"general.me_d2c_frame_buf_shrink_threshold_bytes must be within [4096, 16777216]"
.to_string(),
@@ -976,6 +977,28 @@ impl ProxyConfig {
// Normalize optional TLS fetch scope: whitespace-only values disable scoped routing.
config.censorship.tls_fetch_scope = config.censorship.tls_fetch_scope.trim().to_string();
if config.censorship.tls_fetch.profiles.is_empty() {
config.censorship.tls_fetch.profiles = TlsFetchConfig::default().profiles;
} else {
let mut seen = HashSet::new();
config
.censorship
.tls_fetch
.profiles
.retain(|profile| seen.insert(*profile));
}
if config.censorship.tls_fetch.attempt_timeout_ms == 0 {
return Err(ProxyError::Config(
"censorship.tls_fetch.attempt_timeout_ms must be > 0".to_string(),
));
}
if config.censorship.tls_fetch.total_budget_ms == 0 {
return Err(ProxyError::Config(
"censorship.tls_fetch.total_budget_ms must be > 0".to_string(),
));
}
// Merge primary + extra TLS domains, deduplicate (primary always first).
if !config.censorship.tls_domains.is_empty() {
let mut all = Vec::with_capacity(1 + config.censorship.tls_domains.len());
@@ -1194,6 +1217,7 @@ mod tests {
default_me_init_retry_attempts()
);
assert_eq!(cfg.general.me2dc_fallback, default_me2dc_fallback());
assert_eq!(cfg.general.me2dc_fast, default_me2dc_fast());
assert_eq!(
cfg.general.proxy_config_v4_cache_path,
default_proxy_config_v4_cache_path()
@@ -1262,6 +1286,11 @@ mod tests {
assert_eq!(cfg.general.update_every, default_update_every());
assert_eq!(cfg.server.listen_addr_ipv4, default_listen_addr_ipv4());
assert_eq!(cfg.server.listen_addr_ipv6, default_listen_addr_ipv6_opt());
assert_eq!(
cfg.server.proxy_protocol_trusted_cidrs,
default_proxy_protocol_trusted_cidrs()
);
assert_eq!(cfg.censorship.unknown_sni_action, UnknownSniAction::Drop);
assert_eq!(cfg.server.api.listen, default_api_listen());
assert_eq!(cfg.server.api.whitelist, default_api_whitelist());
assert_eq!(
@@ -1328,6 +1357,7 @@ mod tests {
default_me_init_retry_attempts()
);
assert_eq!(general.me2dc_fallback, default_me2dc_fallback());
assert_eq!(general.me2dc_fast, default_me2dc_fast());
assert_eq!(
general.proxy_config_v4_cache_path,
default_proxy_config_v4_cache_path()
@@ -1394,6 +1424,14 @@ mod tests {
let server = ServerConfig::default();
assert_eq!(server.listen_addr_ipv6, Some(default_listen_addr_ipv6()));
assert_eq!(
server.proxy_protocol_trusted_cidrs,
default_proxy_protocol_trusted_cidrs()
);
assert_eq!(
AntiCensorshipConfig::default().unknown_sni_action,
UnknownSniAction::Drop
);
assert_eq!(server.api.listen, default_api_listen());
assert_eq!(server.api.whitelist, default_api_whitelist());
assert_eq!(
@@ -1429,6 +1467,75 @@ mod tests {
assert_eq!(access.users, default_access_users());
}
#[test]
fn proxy_protocol_trusted_cidrs_missing_uses_trust_all_but_explicit_empty_stays_empty() {
let cfg_missing: ProxyConfig = toml::from_str(
r#"
[server]
[general]
[network]
[access]
"#,
)
.unwrap();
assert_eq!(
cfg_missing.server.proxy_protocol_trusted_cidrs,
default_proxy_protocol_trusted_cidrs()
);
let cfg_explicit_empty: ProxyConfig = toml::from_str(
r#"
[server]
proxy_protocol_trusted_cidrs = []
[general]
[network]
[access]
"#,
)
.unwrap();
assert!(
cfg_explicit_empty
.server
.proxy_protocol_trusted_cidrs
.is_empty()
);
}
#[test]
fn unknown_sni_action_parses_and_defaults_to_drop() {
let cfg_default: ProxyConfig = toml::from_str(
r#"
[server]
[general]
[network]
[access]
[censorship]
"#,
)
.unwrap();
assert_eq!(
cfg_default.censorship.unknown_sni_action,
UnknownSniAction::Drop
);
let cfg_mask: ProxyConfig = toml::from_str(
r#"
[server]
[general]
[network]
[access]
[censorship]
unknown_sni_action = "mask"
"#,
)
.unwrap();
assert_eq!(
cfg_mask.censorship.unknown_sni_action,
UnknownSniAction::Mask
);
}
#[test]
fn dc_overrides_allow_string_and_array() {
let toml = r#"
@@ -2376,6 +2483,94 @@ mod tests {
let _ = std::fs::remove_file(path);
}
#[test]
fn tls_fetch_defaults_are_applied() {
let toml = r#"
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_tls_fetch_defaults_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert_eq!(
cfg.censorship.tls_fetch.profiles,
TlsFetchConfig::default().profiles
);
assert!(cfg.censorship.tls_fetch.strict_route);
assert_eq!(cfg.censorship.tls_fetch.attempt_timeout_ms, 5_000);
assert_eq!(cfg.censorship.tls_fetch.total_budget_ms, 15_000);
assert_eq!(cfg.censorship.tls_fetch.profile_cache_ttl_secs, 600);
let _ = std::fs::remove_file(path);
}
#[test]
fn tls_fetch_profiles_are_deduplicated_preserving_order() {
let toml = r#"
[censorship]
tls_domain = "example.com"
[censorship.tls_fetch]
profiles = ["compat_tls12", "modern_chrome_like", "compat_tls12", "legacy_minimal"]
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_tls_fetch_profiles_dedup_test.toml");
std::fs::write(&path, toml).unwrap();
let cfg = ProxyConfig::load(&path).unwrap();
assert_eq!(
cfg.censorship.tls_fetch.profiles,
vec![
TlsFetchProfile::CompatTls12,
TlsFetchProfile::ModernChromeLike,
TlsFetchProfile::LegacyMinimal
]
);
let _ = std::fs::remove_file(path);
}
#[test]
fn tls_fetch_attempt_timeout_zero_is_rejected() {
let toml = r#"
[censorship]
tls_domain = "example.com"
[censorship.tls_fetch]
attempt_timeout_ms = 0
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_tls_fetch_attempt_timeout_zero_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("censorship.tls_fetch.attempt_timeout_ms must be > 0"));
let _ = std::fs::remove_file(path);
}
#[test]
fn tls_fetch_total_budget_zero_is_rejected() {
let toml = r#"
[censorship]
tls_domain = "example.com"
[censorship.tls_fetch]
total_budget_ms = 0
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_tls_fetch_total_budget_zero_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("censorship.tls_fetch.total_budget_ms must be > 0"));
let _ = std::fs::remove_file(path);
}
#[test]
fn invalid_ad_tag_is_disabled_during_load() {
let toml = r#"
@@ -8,8 +8,9 @@ fn write_temp_config(contents: &str) -> PathBuf {
.duration_since(UNIX_EPOCH)
.expect("system time must be after unix epoch")
.as_nanos();
let path = std::env::temp_dir()
.join(format!("telemt-load-mask-prefetch-timeout-security-{nonce}.toml"));
let path = std::env::temp_dir().join(format!(
"telemt-load-mask-prefetch-timeout-security-{nonce}.toml"
));
fs::write(&path, contents).expect("temp config write must succeed");
path
}
@@ -67,8 +68,8 @@ mask_classifier_prefetch_timeout_ms = 20
"#,
);
let cfg = ProxyConfig::load(&path)
.expect("prefetch timeout within security bounds must be accepted");
let cfg =
ProxyConfig::load(&path).expect("prefetch timeout within security bounds must be accepted");
assert_eq!(cfg.censorship.mask_classifier_prefetch_timeout_ms, 20);
remove_temp_config(&path);
@@ -265,8 +265,8 @@ mask_relay_max_bytes = 67108865
"#,
);
let err = ProxyConfig::load(&path)
.expect_err("mask_relay_max_bytes above hard cap must be rejected");
let err =
ProxyConfig::load(&path).expect_err("mask_relay_max_bytes above hard cap must be rejected");
let msg = err.to_string();
assert!(
msg.contains("censorship.mask_relay_max_bytes must be <= 67108864"),
+107 -5
View File
@@ -429,6 +429,11 @@ pub struct GeneralConfig {
#[serde(default = "default_me2dc_fallback")]
pub me2dc_fallback: bool,
/// Fast ME->Direct fallback mode for new sessions.
/// Active only when both `use_middle_proxy=true` and `me2dc_fallback=true`.
#[serde(default = "default_me2dc_fast")]
pub me2dc_fast: bool,
/// Enable ME keepalive padding frames.
#[serde(default = "default_true")]
pub me_keepalive_enabled: bool,
@@ -939,6 +944,7 @@ impl Default for GeneralConfig {
middle_proxy_warm_standby: default_middle_proxy_warm_standby(),
me_init_retry_attempts: default_me_init_retry_attempts(),
me2dc_fallback: default_me2dc_fallback(),
me2dc_fast: default_me2dc_fast(),
me_keepalive_enabled: default_true(),
me_keepalive_interval_secs: default_keepalive_interval(),
me_keepalive_jitter_secs: default_keepalive_jitter(),
@@ -954,7 +960,8 @@ impl Default for GeneralConfig {
me_d2c_flush_batch_max_delay_us: default_me_d2c_flush_batch_max_delay_us(),
me_d2c_ack_flush_immediate: default_me_d2c_ack_flush_immediate(),
me_quota_soft_overshoot_bytes: default_me_quota_soft_overshoot_bytes(),
me_d2c_frame_buf_shrink_threshold_bytes: default_me_d2c_frame_buf_shrink_threshold_bytes(),
me_d2c_frame_buf_shrink_threshold_bytes:
default_me_d2c_frame_buf_shrink_threshold_bytes(),
direct_relay_copy_buf_c2s_bytes: default_direct_relay_copy_buf_c2s_bytes(),
direct_relay_copy_buf_s2c_bytes: default_direct_relay_copy_buf_s2c_bytes(),
me_warmup_stagger_enabled: default_true(),
@@ -1239,9 +1246,10 @@ pub struct ServerConfig {
/// Trusted source CIDRs allowed to send incoming PROXY protocol headers.
///
/// When non-empty, connections from addresses outside this allowlist are
/// rejected before `src_addr` is applied.
#[serde(default)]
/// If this field is omitted in config, it defaults to trust-all CIDRs
/// (`0.0.0.0/0` and `::/0`). If it is explicitly set to an empty list,
/// all PROXY protocol headers are rejected.
#[serde(default = "default_proxy_protocol_trusted_cidrs")]
pub proxy_protocol_trusted_cidrs: Vec<IpNetwork>,
/// Port for the Prometheus-compatible metrics endpoint.
@@ -1286,7 +1294,7 @@ impl Default for ServerConfig {
listen_tcp: None,
proxy_protocol: false,
proxy_protocol_header_timeout_ms: default_proxy_protocol_header_timeout_ms(),
proxy_protocol_trusted_cidrs: Vec::new(),
proxy_protocol_trusted_cidrs: default_proxy_protocol_trusted_cidrs(),
metrics_port: None,
metrics_listen: None,
metrics_whitelist: default_metrics_whitelist(),
@@ -1357,6 +1365,90 @@ impl Default for TimeoutsConfig {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum UnknownSniAction {
#[default]
Drop,
Mask,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TlsFetchProfile {
ModernChromeLike,
ModernFirefoxLike,
CompatTls12,
LegacyMinimal,
}
impl TlsFetchProfile {
pub fn as_str(self) -> &'static str {
match self {
TlsFetchProfile::ModernChromeLike => "modern_chrome_like",
TlsFetchProfile::ModernFirefoxLike => "modern_firefox_like",
TlsFetchProfile::CompatTls12 => "compat_tls12",
TlsFetchProfile::LegacyMinimal => "legacy_minimal",
}
}
}
fn default_tls_fetch_profiles() -> Vec<TlsFetchProfile> {
vec![
TlsFetchProfile::ModernChromeLike,
TlsFetchProfile::ModernFirefoxLike,
TlsFetchProfile::CompatTls12,
TlsFetchProfile::LegacyMinimal,
]
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TlsFetchConfig {
/// Ordered list of ClientHello profiles used for adaptive fallback.
#[serde(default = "default_tls_fetch_profiles")]
pub profiles: Vec<TlsFetchProfile>,
/// When true and upstream route is configured, TLS fetch fails closed on
/// upstream connect errors and does not fallback to direct TCP.
#[serde(default = "default_tls_fetch_strict_route")]
pub strict_route: bool,
/// Timeout per one profile attempt in milliseconds.
#[serde(default = "default_tls_fetch_attempt_timeout_ms")]
pub attempt_timeout_ms: u64,
/// Total wall-clock budget in milliseconds across all profile attempts.
#[serde(default = "default_tls_fetch_total_budget_ms")]
pub total_budget_ms: u64,
/// Adds GREASE-style values into selected ClientHello extensions.
#[serde(default)]
pub grease_enabled: bool,
/// Produces deterministic ClientHello randomness for debugging/tests.
#[serde(default)]
pub deterministic: bool,
/// TTL for winner-profile cache entries in seconds.
/// Set to 0 to disable profile cache.
#[serde(default = "default_tls_fetch_profile_cache_ttl_secs")]
pub profile_cache_ttl_secs: u64,
}
impl Default for TlsFetchConfig {
fn default() -> Self {
Self {
profiles: default_tls_fetch_profiles(),
strict_route: default_tls_fetch_strict_route(),
attempt_timeout_ms: default_tls_fetch_attempt_timeout_ms(),
total_budget_ms: default_tls_fetch_total_budget_ms(),
grease_enabled: false,
deterministic: false,
profile_cache_ttl_secs: default_tls_fetch_profile_cache_ttl_secs(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AntiCensorshipConfig {
#[serde(default = "default_tls_domain")]
@@ -1366,11 +1458,19 @@ pub struct AntiCensorshipConfig {
#[serde(default)]
pub tls_domains: Vec<String>,
/// Policy for TLS ClientHello with unknown (non-configured) SNI.
#[serde(default)]
pub unknown_sni_action: UnknownSniAction,
/// Upstream scope used for TLS front metadata fetches.
/// Empty value keeps default upstream routing behavior.
#[serde(default = "default_tls_fetch_scope")]
pub tls_fetch_scope: String,
/// Fetch strategy for TLS front metadata bootstrap and periodic refresh.
#[serde(default)]
pub tls_fetch: TlsFetchConfig,
#[serde(default = "default_true")]
pub mask: bool,
@@ -1476,7 +1576,9 @@ impl Default for AntiCensorshipConfig {
Self {
tls_domain: default_tls_domain(),
tls_domains: Vec::new(),
unknown_sni_action: UnknownSniAction::Drop,
tls_fetch_scope: default_tls_fetch_scope(),
tls_fetch: TlsFetchConfig::default(),
mask: default_true(),
mask_host: None,
mask_port: default_mask_port(),
+3
View File
@@ -216,6 +216,9 @@ pub enum ProxyError {
#[error("Invalid proxy protocol header")]
InvalidProxyProtocol,
#[error("Unknown TLS SNI")]
UnknownTlsSni,
#[error("Proxy error: {0}")]
Proxy(String),
+60 -20
View File
@@ -21,10 +21,29 @@ pub(crate) async fn configure_admission_gate(
if config.general.use_middle_proxy {
if let Some(pool) = me_pool.as_ref() {
let initial_ready = pool.admission_ready_conditional_cast().await;
admission_tx.send_replace(initial_ready);
let _ = route_runtime.set_mode(RelayRouteMode::Middle);
let mut fallback_enabled = config.general.me2dc_fallback;
let mut fast_fallback_enabled = fallback_enabled && config.general.me2dc_fast;
let (initial_gate_open, initial_route_mode, initial_fallback_reason) = if initial_ready
{
(true, RelayRouteMode::Middle, None)
} else if fast_fallback_enabled {
(
true,
RelayRouteMode::Direct,
Some("fast_not_ready_fallback"),
)
} else {
(false, RelayRouteMode::Middle, None)
};
admission_tx.send_replace(initial_gate_open);
let _ = route_runtime.set_mode(initial_route_mode);
if initial_ready {
info!("Conditional-admission gate: open / ME pool READY");
} else if let Some(reason) = initial_fallback_reason {
warn!(
fallback_reason = reason,
"Conditional-admission gate opened in ME fast fallback mode"
);
} else {
warn!("Conditional-admission gate: closed / ME pool is NOT ready)");
}
@@ -34,10 +53,9 @@ pub(crate) async fn configure_admission_gate(
let route_runtime_gate = route_runtime.clone();
let mut config_rx_gate = config_rx.clone();
let mut admission_poll_ms = config.general.me_admission_poll_ms.max(1);
let mut fallback_enabled = config.general.me2dc_fallback;
tokio::spawn(async move {
let mut gate_open = initial_ready;
let mut route_mode = RelayRouteMode::Middle;
let mut gate_open = initial_gate_open;
let mut route_mode = initial_route_mode;
let mut ready_observed = initial_ready;
let mut not_ready_since = if initial_ready {
None
@@ -53,16 +71,23 @@ pub(crate) async fn configure_admission_gate(
let cfg = config_rx_gate.borrow_and_update().clone();
admission_poll_ms = cfg.general.me_admission_poll_ms.max(1);
fallback_enabled = cfg.general.me2dc_fallback;
fast_fallback_enabled = cfg.general.me2dc_fallback && cfg.general.me2dc_fast;
continue;
}
_ = tokio::time::sleep(Duration::from_millis(admission_poll_ms)) => {}
}
let ready = pool_for_gate.admission_ready_conditional_cast().await;
let now = Instant::now();
let (next_gate_open, next_route_mode, next_fallback_active) = if ready {
let (next_gate_open, next_route_mode, next_fallback_reason) = if ready {
ready_observed = true;
not_ready_since = None;
(true, RelayRouteMode::Middle, false)
(true, RelayRouteMode::Middle, None)
} else if fast_fallback_enabled {
(
true,
RelayRouteMode::Direct,
Some("fast_not_ready_fallback"),
)
} else {
let not_ready_started_at = *not_ready_since.get_or_insert(now);
let not_ready_for = now.saturating_duration_since(not_ready_started_at);
@@ -72,11 +97,12 @@ pub(crate) async fn configure_admission_gate(
STARTUP_FALLBACK_AFTER
};
if fallback_enabled && not_ready_for > fallback_after {
(true, RelayRouteMode::Direct, true)
(true, RelayRouteMode::Direct, Some("strict_grace_fallback"))
} else {
(false, RelayRouteMode::Middle, false)
(false, RelayRouteMode::Middle, None)
}
};
let next_fallback_active = next_fallback_reason.is_some();
if next_route_mode != route_mode {
route_mode = next_route_mode;
@@ -88,17 +114,28 @@ pub(crate) async fn configure_admission_gate(
"Middle-End routing restored for new sessions"
);
} else {
let fallback_after = if ready_observed {
RUNTIME_FALLBACK_AFTER
let fallback_reason = next_fallback_reason.unwrap_or("unknown");
if fallback_reason == "strict_grace_fallback" {
let fallback_after = if ready_observed {
RUNTIME_FALLBACK_AFTER
} else {
STARTUP_FALLBACK_AFTER
};
warn!(
target_mode = route_mode.as_str(),
cutover_generation = snapshot.generation,
grace_secs = fallback_after.as_secs(),
fallback_reason,
"ME pool stayed not-ready beyond grace; routing new sessions via Direct-DC"
);
} else {
STARTUP_FALLBACK_AFTER
};
warn!(
target_mode = route_mode.as_str(),
cutover_generation = snapshot.generation,
grace_secs = fallback_after.as_secs(),
"ME pool stayed not-ready beyond grace; routing new sessions via Direct-DC"
);
warn!(
target_mode = route_mode.as_str(),
cutover_generation = snapshot.generation,
fallback_reason,
"ME pool not-ready; routing new sessions via Direct-DC (fast mode)"
);
}
}
}
}
@@ -108,7 +145,10 @@ pub(crate) async fn configure_admission_gate(
admission_tx_gate.send_replace(gate_open);
if gate_open {
if next_fallback_active {
warn!("Conditional-admission gate opened in ME fallback mode");
warn!(
fallback_reason = next_fallback_reason.unwrap_or("unknown"),
"Conditional-admission gate opened in ME fallback mode"
);
} else {
info!("Conditional-admission gate opened / ME pool READY");
}
+5 -2
View File
@@ -8,8 +8,10 @@ use tracing::{debug, error, info, warn};
use crate::cli;
use crate::config::ProxyConfig;
use crate::transport::UpstreamManager;
use crate::transport::middle_proxy::{
ProxyConfigData, fetch_proxy_config_with_raw, load_proxy_config_cache, save_proxy_config_cache,
ProxyConfigData, fetch_proxy_config_with_raw_via_upstream, load_proxy_config_cache,
save_proxy_config_cache,
};
pub(crate) fn resolve_runtime_config_path(
@@ -288,9 +290,10 @@ pub(crate) async fn load_startup_proxy_config_snapshot(
cache_path: Option<&str>,
me2dc_fallback: bool,
label: &'static str,
upstream: Option<std::sync::Arc<UpstreamManager>>,
) -> Option<ProxyConfigData> {
loop {
match fetch_proxy_config_with_raw(url).await {
match fetch_proxy_config_with_raw_via_upstream(url, upstream.clone()).await {
Ok((cfg, raw)) => {
if !cfg.map.is_empty() {
if let Some(path) = cache_path
+6 -1
View File
@@ -63,9 +63,10 @@ pub(crate) async fn initialize_me_pool(
let proxy_secret_path = config.general.proxy_secret_path.as_deref();
let pool_size = config.general.middle_proxy_pool_size.max(1);
let proxy_secret = loop {
match crate::transport::middle_proxy::fetch_proxy_secret(
match crate::transport::middle_proxy::fetch_proxy_secret_with_upstream(
proxy_secret_path,
config.general.proxy_secret_len_max,
Some(upstream_manager.clone()),
)
.await
{
@@ -129,6 +130,7 @@ pub(crate) async fn initialize_me_pool(
config.general.proxy_config_v4_cache_path.as_deref(),
me2dc_fallback,
"getProxyConfig",
Some(upstream_manager.clone()),
)
.await;
if cfg_v4.is_some() {
@@ -160,6 +162,7 @@ pub(crate) async fn initialize_me_pool(
config.general.proxy_config_v6_cache_path.as_deref(),
me2dc_fallback,
"getProxyConfigV6",
Some(upstream_manager.clone()),
)
.await;
if cfg_v6.is_some() {
@@ -274,6 +277,8 @@ pub(crate) async fn initialize_me_pool(
config.general.me_warn_rate_limit_ms,
config.general.me_route_no_writer_mode,
config.general.me_route_no_writer_wait_ms,
config.general.me_route_hybrid_max_wait_ms,
config.general.me_route_blocking_send_timeout_ms,
config.general.me_route_inline_recovery_attempts,
config.general.me_route_inline_recovery_wait_ms,
);
+7 -9
View File
@@ -115,15 +115,13 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
);
std::process::exit(1);
}
} else {
if let Err(e) = std::fs::create_dir_all(data_path) {
eprintln!(
"[telemt] Can't create data_path {}: {}",
data_path.display(),
e
);
std::process::exit(1);
}
} else if let Err(e) = std::fs::create_dir_all(data_path) {
eprintln!(
"[telemt] Can't create data_path {}: {}",
data_path.display(),
e
);
std::process::exit(1);
}
if let Err(e) = std::env::set_current_dir(data_path) {
+20 -5
View File
@@ -7,6 +7,7 @@ use tracing::warn;
use crate::config::ProxyConfig;
use crate::startup::{COMPONENT_TLS_FRONT_BOOTSTRAP, StartupTracker};
use crate::tls_front::TlsFrontCache;
use crate::tls_front::fetcher::TlsFetchStrategy;
use crate::transport::UpstreamManager;
pub(crate) async fn bootstrap_tls_front(
@@ -40,7 +41,17 @@ pub(crate) async fn bootstrap_tls_front(
let mask_unix_sock = config.censorship.mask_unix_sock.clone();
let tls_fetch_scope = (!config.censorship.tls_fetch_scope.is_empty())
.then(|| config.censorship.tls_fetch_scope.clone());
let fetch_timeout = Duration::from_secs(5);
let tls_fetch = config.censorship.tls_fetch.clone();
let fetch_strategy = TlsFetchStrategy {
profiles: tls_fetch.profiles,
strict_route: tls_fetch.strict_route,
attempt_timeout: Duration::from_millis(tls_fetch.attempt_timeout_ms.max(1)),
total_budget: Duration::from_millis(tls_fetch.total_budget_ms.max(1)),
grease_enabled: tls_fetch.grease_enabled,
deterministic: tls_fetch.deterministic,
profile_cache_ttl: Duration::from_secs(tls_fetch.profile_cache_ttl_secs),
};
let fetch_timeout = fetch_strategy.total_budget;
let cache_initial = cache.clone();
let domains_initial = tls_domains.to_vec();
@@ -48,6 +59,7 @@ pub(crate) async fn bootstrap_tls_front(
let unix_sock_initial = mask_unix_sock.clone();
let scope_initial = tls_fetch_scope.clone();
let upstream_initial = upstream_manager.clone();
let strategy_initial = fetch_strategy.clone();
tokio::spawn(async move {
let mut join = tokio::task::JoinSet::new();
for domain in domains_initial {
@@ -56,12 +68,13 @@ pub(crate) async fn bootstrap_tls_front(
let unix_sock_domain = unix_sock_initial.clone();
let scope_domain = scope_initial.clone();
let upstream_domain = upstream_initial.clone();
let strategy_domain = strategy_initial.clone();
join.spawn(async move {
match crate::tls_front::fetcher::fetch_real_tls(
match crate::tls_front::fetcher::fetch_real_tls_with_strategy(
&host_domain,
port,
&domain,
fetch_timeout,
&strategy_domain,
Some(upstream_domain),
scope_domain.as_deref(),
proxy_protocol,
@@ -107,6 +120,7 @@ pub(crate) async fn bootstrap_tls_front(
let unix_sock_refresh = mask_unix_sock.clone();
let scope_refresh = tls_fetch_scope.clone();
let upstream_refresh = upstream_manager.clone();
let strategy_refresh = fetch_strategy.clone();
tokio::spawn(async move {
loop {
let base_secs = rand::rng().random_range(4 * 3600..=6 * 3600);
@@ -120,12 +134,13 @@ pub(crate) async fn bootstrap_tls_front(
let unix_sock_domain = unix_sock_refresh.clone();
let scope_domain = scope_refresh.clone();
let upstream_domain = upstream_refresh.clone();
let strategy_domain = strategy_refresh.clone();
join.spawn(async move {
match crate::tls_front::fetcher::fetch_real_tls(
match crate::tls_front::fetcher::fetch_real_tls_with_strategy(
&host_domain,
port,
&domain,
fetch_timeout,
&strategy_domain,
Some(upstream_domain),
scope_domain.as_deref(),
proxy_protocol,
+4 -3
View File
@@ -7,12 +7,12 @@ mod crypto;
mod error;
mod ip_tracker;
#[cfg(test)]
#[path = "tests/ip_tracker_hotpath_adversarial_tests.rs"]
mod ip_tracker_hotpath_adversarial_tests;
#[cfg(test)]
#[path = "tests/ip_tracker_encapsulation_adversarial_tests.rs"]
mod ip_tracker_encapsulation_adversarial_tests;
#[cfg(test)]
#[path = "tests/ip_tracker_hotpath_adversarial_tests.rs"]
mod ip_tracker_hotpath_adversarial_tests;
#[cfg(test)]
#[path = "tests/ip_tracker_regression_tests.rs"]
mod ip_tracker_regression_tests;
mod maestro;
@@ -29,5 +29,6 @@ mod util;
#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let _ = rustls::crypto::ring::default_provider().install_default();
maestro::run().await
}
+61 -4
View File
@@ -1233,10 +1233,7 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
out,
"# HELP telemt_me_d2c_batch_bytes_bucket_total DC->Client batch byte size buckets"
);
let _ = writeln!(
out,
"# TYPE telemt_me_d2c_batch_bytes_bucket_total counter"
);
let _ = writeln!(out, "# TYPE telemt_me_d2c_batch_bytes_bucket_total counter");
let _ = writeln!(
out,
"telemt_me_d2c_batch_bytes_bucket_total{{bucket=\"0_1k\"}} {}",
@@ -1561,6 +1558,40 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_endpoint_quarantine_unexpected_total ME endpoint quarantines caused by unexpected writer removals"
);
let _ = writeln!(
out,
"# TYPE telemt_me_endpoint_quarantine_unexpected_total counter"
);
let _ = writeln!(
out,
"telemt_me_endpoint_quarantine_unexpected_total {}",
if me_allows_normal {
stats.get_me_endpoint_quarantine_unexpected_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_endpoint_quarantine_draining_suppressed_total Draining writer removals that skipped endpoint quarantine"
);
let _ = writeln!(
out,
"# TYPE telemt_me_endpoint_quarantine_draining_suppressed_total counter"
);
let _ = writeln!(
out,
"telemt_me_endpoint_quarantine_draining_suppressed_total {}",
if me_allows_normal {
stats.get_me_endpoint_quarantine_draining_suppressed_total()
} else {
0
}
);
let _ = writeln!(
out,
@@ -2321,6 +2352,20 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_hybrid_timeout_total ME hybrid route timeouts after bounded retry window"
);
let _ = writeln!(out, "# TYPE telemt_me_hybrid_timeout_total counter");
let _ = writeln!(
out,
"telemt_me_hybrid_timeout_total {}",
if me_allows_normal {
stats.get_me_hybrid_timeout_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_async_recovery_trigger_total Async ME recovery trigger attempts from route path"
@@ -2611,6 +2656,9 @@ mod tests {
stats.increment_me_d2c_write_mode(crate::stats::MeD2cWriteMode::Coalesced);
stats.increment_me_d2c_quota_reject_total(crate::stats::MeD2cQuotaRejectStage::PostWrite);
stats.observe_me_d2c_frame_buf_shrink(4096);
stats.increment_me_endpoint_quarantine_total();
stats.increment_me_endpoint_quarantine_unexpected_total();
stats.increment_me_endpoint_quarantine_draining_suppressed_total();
stats.increment_user_connects("alice");
stats.increment_user_curr_connects("alice");
stats.add_user_octets_from("alice", 1024);
@@ -2661,6 +2709,9 @@ mod tests {
assert!(output.contains("telemt_me_d2c_quota_reject_total{stage=\"post_write\"} 1"));
assert!(output.contains("telemt_me_d2c_frame_buf_shrink_total 1"));
assert!(output.contains("telemt_me_d2c_frame_buf_shrink_bytes_total 4096"));
assert!(output.contains("telemt_me_endpoint_quarantine_total 1"));
assert!(output.contains("telemt_me_endpoint_quarantine_unexpected_total 1"));
assert!(output.contains("telemt_me_endpoint_quarantine_draining_suppressed_total 1"));
assert!(output.contains("telemt_user_connections_total{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_connections_current{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_octets_from_client{user=\"alice\"} 1024"));
@@ -2727,6 +2778,12 @@ mod tests {
assert!(output.contains("# TYPE telemt_me_d2c_write_mode_total counter"));
assert!(output.contains("# TYPE telemt_me_d2c_batch_frames_bucket_total counter"));
assert!(output.contains("# TYPE telemt_me_d2c_flush_duration_us_bucket_total counter"));
assert!(output.contains("# TYPE telemt_me_endpoint_quarantine_total counter"));
assert!(output.contains("# TYPE telemt_me_endpoint_quarantine_unexpected_total counter"));
assert!(
output
.contains("# TYPE telemt_me_endpoint_quarantine_draining_suppressed_total counter")
);
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
assert!(
output
+26 -8
View File
@@ -210,7 +210,9 @@ fn should_prefetch_mask_classifier_window(initial_data: &[u8]) -> bool {
return false;
}
initial_data.iter().all(|b| b.is_ascii_alphabetic() || *b == b' ')
initial_data
.iter()
.all(|b| b.is_ascii_alphabetic() || *b == b' ')
}
#[cfg(test)]
@@ -218,16 +220,19 @@ async fn extend_masking_initial_window<R>(reader: &mut R, initial_data: &mut Vec
where
R: AsyncRead + Unpin,
{
extend_masking_initial_window_with_timeout(reader, initial_data, MASK_CLASSIFIER_PREFETCH_TIMEOUT)
.await;
extend_masking_initial_window_with_timeout(
reader,
initial_data,
MASK_CLASSIFIER_PREFETCH_TIMEOUT,
)
.await;
}
async fn extend_masking_initial_window_with_timeout<R>(
reader: &mut R,
initial_data: &mut Vec<u8>,
prefetch_timeout: Duration,
)
where
) where
R: AsyncRead + Unpin,
{
if !should_prefetch_mask_classifier_window(initial_data) {
@@ -312,13 +317,20 @@ fn record_handshake_failure_class(
record_beobachten_class(beobachten, config, peer_ip, class);
}
#[inline]
fn increment_bad_on_unknown_tls_sni(stats: &Stats, error: &ProxyError) {
if matches!(error, ProxyError::UnknownTlsSni) {
stats.increment_connects_bad();
}
}
fn is_trusted_proxy_source(peer_ip: IpAddr, trusted: &[IpNetwork]) -> bool {
if trusted.is_empty() {
static EMPTY_PROXY_TRUST_WARNED: OnceLock<AtomicBool> = OnceLock::new();
let warned = EMPTY_PROXY_TRUST_WARNED.get_or_init(|| AtomicBool::new(false));
if !warned.swap(true, Ordering::Relaxed) {
warn!(
"PROXY protocol enabled but server.proxy_protocol_trusted_cidrs is empty; rejecting all PROXY headers by default"
"PROXY protocol enabled but server.proxy_protocol_trusted_cidrs is empty; rejecting all PROXY headers"
);
}
return false;
@@ -503,7 +515,10 @@ where
beobachten.clone(),
));
}
HandshakeResult::Error(e) => return Err(e),
HandshakeResult::Error(e) => {
increment_bad_on_unknown_tls_sni(stats.as_ref(), &e);
return Err(e);
}
};
debug!(peer = %peer, "Reading MTProto handshake through TLS");
@@ -954,7 +969,10 @@ impl RunningClientHandler {
self.beobachten.clone(),
));
}
HandshakeResult::Error(e) => return Err(e),
HandshakeResult::Error(e) => {
increment_bad_on_unknown_tls_sni(stats.as_ref(), &e);
return Err(e);
}
};
debug!(peer = %peer, "Reading MTProto handshake through TLS");
+158 -57
View File
@@ -13,10 +13,10 @@ use std::sync::Arc;
use std::sync::{Mutex, OnceLock};
use std::time::{Duration, Instant};
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
use tracing::{debug, trace, warn};
use tracing::{debug, info, trace, warn};
use zeroize::{Zeroize, Zeroizing};
use crate::config::ProxyConfig;
use crate::config::{ProxyConfig, UnknownSniAction};
use crate::crypto::{AesCtr, SecureRandom, sha256};
use crate::error::{HandshakeResult, ProxyError};
use crate::protocol::constants::*;
@@ -28,6 +28,8 @@ use rand::RngExt;
const ACCESS_SECRET_BYTES: usize = 16;
static INVALID_SECRET_WARNED: OnceLock<Mutex<HashSet<(String, String)>>> = OnceLock::new();
const UNKNOWN_SNI_WARN_COOLDOWN_SECS: u64 = 5;
static UNKNOWN_SNI_WARN_NEXT_ALLOWED: OnceLock<Mutex<Option<Instant>>> = OnceLock::new();
#[cfg(test)]
const WARNED_SECRET_MAX_ENTRIES: usize = 64;
#[cfg(not(test))]
@@ -86,6 +88,24 @@ fn auth_probe_saturation_state_lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
}
fn unknown_sni_warn_state_lock() -> std::sync::MutexGuard<'static, Option<Instant>> {
UNKNOWN_SNI_WARN_NEXT_ALLOWED
.get_or_init(|| Mutex::new(None))
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
}
fn should_emit_unknown_sni_warn(now: Instant) -> bool {
let mut guard = unknown_sni_warn_state_lock();
if let Some(next_allowed) = *guard
&& now < next_allowed
{
return false;
}
*guard = Some(now + Duration::from_secs(UNKNOWN_SNI_WARN_COOLDOWN_SECS));
true
}
fn normalize_auth_probe_ip(peer_ip: IpAddr) -> IpAddr {
match peer_ip {
IpAddr::V4(ip) => IpAddr::V4(ip),
@@ -282,30 +302,9 @@ fn auth_probe_record_failure_with_state(
let mut eviction_candidate: Option<(IpAddr, u32, Instant)> = None;
let state_len = state.len();
let scan_limit = state_len.min(AUTH_PROBE_PRUNE_SCAN_LIMIT);
let start_offset = auth_probe_scan_start_offset(peer_ip, now, state_len, scan_limit);
let mut scanned = 0usize;
for entry in state.iter().skip(start_offset) {
let key = *entry.key();
let fail_streak = entry.value().fail_streak;
let last_seen = entry.value().last_seen;
match eviction_candidate {
Some((_, current_fail, current_seen))
if fail_streak > current_fail
|| (fail_streak == current_fail && last_seen >= current_seen) => {}
_ => eviction_candidate = Some((key, fail_streak, last_seen)),
}
if auth_probe_state_expired(entry.value(), now) {
stale_keys.push(key);
}
scanned += 1;
if scanned >= scan_limit {
break;
}
}
if scanned < scan_limit {
for entry in state.iter().take(scan_limit - scanned) {
if state_len <= AUTH_PROBE_PRUNE_SCAN_LIMIT {
for entry in state.iter() {
let key = *entry.key();
let fail_streak = entry.value().fail_streak;
let last_seen = entry.value().last_seen;
@@ -319,6 +318,46 @@ fn auth_probe_record_failure_with_state(
stale_keys.push(key);
}
}
} else {
let start_offset =
auth_probe_scan_start_offset(peer_ip, now, state_len, scan_limit);
let mut scanned = 0usize;
for entry in state.iter().skip(start_offset) {
let key = *entry.key();
let fail_streak = entry.value().fail_streak;
let last_seen = entry.value().last_seen;
match eviction_candidate {
Some((_, current_fail, current_seen))
if fail_streak > current_fail
|| (fail_streak == current_fail && last_seen >= current_seen) => {}
_ => eviction_candidate = Some((key, fail_streak, last_seen)),
}
if auth_probe_state_expired(entry.value(), now) {
stale_keys.push(key);
}
scanned += 1;
if scanned >= scan_limit {
break;
}
}
if scanned < scan_limit {
for entry in state.iter().take(scan_limit - scanned) {
let key = *entry.key();
let fail_streak = entry.value().fail_streak;
let last_seen = entry.value().last_seen;
match eviction_candidate {
Some((_, current_fail, current_seen))
if fail_streak > current_fail
|| (fail_streak == current_fail
&& last_seen >= current_seen) => {}
_ => eviction_candidate = Some((key, fail_streak, last_seen)),
}
if auth_probe_state_expired(entry.value(), now) {
stale_keys.push(key);
}
}
}
}
for stale_key in stale_keys {
@@ -393,6 +432,25 @@ fn auth_probe_test_lock() -> &'static Mutex<()> {
TEST_LOCK.get_or_init(|| Mutex::new(()))
}
#[cfg(test)]
fn unknown_sni_warn_test_lock() -> &'static Mutex<()> {
static TEST_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
TEST_LOCK.get_or_init(|| Mutex::new(()))
}
#[cfg(test)]
fn clear_unknown_sni_warn_state_for_testing() {
if UNKNOWN_SNI_WARN_NEXT_ALLOWED.get().is_some() {
let mut guard = unknown_sni_warn_state_lock();
*guard = None;
}
}
#[cfg(test)]
fn should_emit_unknown_sni_warn_for_testing(now: Instant) -> bool {
should_emit_unknown_sni_warn(now)
}
#[cfg(test)]
fn clear_warned_secrets_for_testing() {
if let Some(warned) = INVALID_SECRET_WARNED.get()
@@ -510,6 +568,21 @@ fn decode_user_secrets(
secrets
}
#[inline]
fn find_matching_tls_domain<'a>(config: &'a ProxyConfig, sni: &str) -> Option<&'a str> {
if config.censorship.tls_domain.eq_ignore_ascii_case(sni) {
return Some(config.censorship.tls_domain.as_str());
}
for domain in &config.censorship.tls_domains {
if domain.eq_ignore_ascii_case(sni) {
return Some(domain.as_str());
}
}
None
}
async fn maybe_apply_server_hello_delay(config: &ProxyConfig) {
if config.censorship.server_hello_delay_max_ms == 0 {
return;
@@ -593,7 +666,63 @@ where
}
let client_sni = tls::extract_sni_from_client_hello(handshake);
let secrets = decode_user_secrets(config, client_sni.as_deref());
let preferred_user_hint = client_sni
.as_deref()
.filter(|sni| config.access.users.contains_key(*sni));
let matched_tls_domain = client_sni
.as_deref()
.and_then(|sni| find_matching_tls_domain(config, sni));
let alpn_list = if config.censorship.alpn_enforce {
tls::extract_alpn_from_client_hello(handshake)
} else {
Vec::new()
};
let selected_alpn = if config.censorship.alpn_enforce {
if alpn_list.iter().any(|p| p == b"h2") {
Some(b"h2".to_vec())
} else if alpn_list.iter().any(|p| p == b"http/1.1") {
Some(b"http/1.1".to_vec())
} else if !alpn_list.is_empty() {
maybe_apply_server_hello_delay(config).await;
debug!(peer = %peer, "Client ALPN list has no supported protocol; using masking fallback");
return HandshakeResult::BadClient { reader, writer };
} else {
None
}
} else {
None
};
if client_sni.is_some() && matched_tls_domain.is_none() && preferred_user_hint.is_none() {
auth_probe_record_failure(peer.ip(), Instant::now());
maybe_apply_server_hello_delay(config).await;
let sni = client_sni.as_deref().unwrap_or_default();
let log_now = Instant::now();
if should_emit_unknown_sni_warn(log_now) {
warn!(
peer = %peer,
sni = %sni,
unknown_sni = true,
unknown_sni_action = ?config.censorship.unknown_sni_action,
"TLS handshake rejected by unknown SNI policy"
);
} else {
info!(
peer = %peer,
sni = %sni,
unknown_sni = true,
unknown_sni_action = ?config.censorship.unknown_sni_action,
"TLS handshake rejected by unknown SNI policy"
);
}
return match config.censorship.unknown_sni_action {
UnknownSniAction::Drop => HandshakeResult::Error(ProxyError::UnknownTlsSni),
UnknownSniAction::Mask => HandshakeResult::BadClient { reader, writer },
};
}
let secrets = decode_user_secrets(config, preferred_user_hint);
let validation = match tls::validate_tls_handshake_with_replay_window(
handshake,
@@ -633,16 +762,9 @@ where
let cached = if config.censorship.tls_emulation {
if let Some(cache) = tls_cache.as_ref() {
let selected_domain = if let Some(sni) = client_sni.as_ref() {
if cache.contains_domain(sni).await {
sni.clone()
} else {
config.censorship.tls_domain.clone()
}
} else {
config.censorship.tls_domain.clone()
};
let cached_entry = cache.get(&selected_domain).await;
let selected_domain =
matched_tls_domain.unwrap_or(config.censorship.tls_domain.as_str());
let cached_entry = cache.get(selected_domain).await;
let use_full_cert_payload = cache
.take_full_cert_budget_for_ip(
peer.ip(),
@@ -657,27 +779,6 @@ where
None
};
let alpn_list = if config.censorship.alpn_enforce {
tls::extract_alpn_from_client_hello(handshake)
} else {
Vec::new()
};
let selected_alpn = if config.censorship.alpn_enforce {
if alpn_list.iter().any(|p| p == b"h2") {
Some(b"h2".to_vec())
} else if alpn_list.iter().any(|p| p == b"http/1.1") {
Some(b"http/1.1".to_vec())
} else if !alpn_list.is_empty() {
maybe_apply_server_hello_delay(config).await;
debug!(peer = %peer, "Client ALPN list has no supported protocol; using masking fallback");
return HandshakeResult::BadClient { reader, writer };
} else {
None
}
} else {
None
};
// Add replay digest only for policy-valid handshakes.
replay_checker.add_tls_digest(digest_half);
+29 -17
View File
@@ -10,10 +10,10 @@ use rand::rngs::StdRng;
use rand::{Rng, RngExt, SeedableRng};
use std::net::{IpAddr, SocketAddr};
use std::str;
#[cfg(unix)]
use std::sync::{Mutex, OnceLock};
#[cfg(test)]
use std::sync::atomic::{AtomicUsize, Ordering};
#[cfg(unix)]
use std::sync::{Mutex, OnceLock};
use std::time::{Duration, Instant as StdInstant};
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
use tokio::net::TcpStream;
@@ -107,15 +107,7 @@ where
fn is_http_probe(data: &[u8]) -> bool {
// RFC 7540 section 3.5: HTTP/2 client preface starts with "PRI ".
const HTTP_METHODS: [&[u8]; 10] = [
b"GET ",
b"POST",
b"HEAD",
b"PUT ",
b"DELETE",
b"OPTIONS",
b"CONNECT",
b"TRACE",
b"PATCH",
b"GET ", b"POST", b"HEAD", b"PUT ", b"DELETE", b"OPTIONS", b"CONNECT", b"TRACE", b"PATCH",
b"PRI ",
];
@@ -328,7 +320,10 @@ fn parse_mask_host_ip_literal(host: &str) -> Option<IpAddr> {
fn canonical_ip(ip: IpAddr) -> IpAddr {
match ip {
IpAddr::V6(v6) => v6.to_ipv4_mapped().map(IpAddr::V4).unwrap_or(IpAddr::V6(v6)),
IpAddr::V6(v6) => v6
.to_ipv4_mapped()
.map(IpAddr::V4)
.unwrap_or(IpAddr::V6(v6)),
IpAddr::V4(v4) => IpAddr::V4(v4),
}
}
@@ -664,12 +659,20 @@ pub async fn handle_bad_client<R, W>(
Ok(Err(e)) => {
wait_mask_connect_budget_if_needed(connect_started, config).await;
debug!(error = %e, "Failed to connect to mask unix socket");
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes).await;
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
}
Err(_) => {
debug!("Timeout connecting to mask unix socket");
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes).await;
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
}
}
@@ -698,7 +701,8 @@ pub async fn handle_bad_client<R, W>(
local = %local_addr,
"Mask target resolves to local listener; refusing self-referential masking fallback"
);
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes).await;
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
return;
}
@@ -758,12 +762,20 @@ pub async fn handle_bad_client<R, W>(
Ok(Err(e)) => {
wait_mask_connect_budget_if_needed(connect_started, config).await;
debug!(error = %e, "Failed to connect to mask host");
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes).await;
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
}
Err(_) => {
debug!("Timeout connecting to mask host");
consume_client_data_with_timeout_and_cap(reader, config.censorship.mask_relay_max_bytes).await;
consume_client_data_with_timeout_and_cap(
reader,
config.censorship.mask_relay_max_bytes,
)
.await;
wait_mask_outcome_budget(outcome_started, config).await;
}
}
+149 -79
View File
@@ -4,7 +4,7 @@ use std::collections::{BTreeSet, HashMap};
use std::future::Future;
use std::hash::{BuildHasher, Hash};
use std::net::{IpAddr, SocketAddr};
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex, OnceLock};
use std::time::{Duration, Instant};
@@ -23,7 +23,9 @@ use crate::proxy::route_mode::{
ROUTE_SWITCH_ERROR_MSG, RelayRouteMode, RouteCutoverState, affected_cutover_state,
cutover_stagger_delay,
};
use crate::stats::{MeD2cFlushReason, MeD2cQuotaRejectStage, MeD2cWriteMode, QuotaReserveError, Stats, UserStats};
use crate::stats::{
MeD2cFlushReason, MeD2cQuotaRejectStage, MeD2cWriteMode, QuotaReserveError, Stats, UserStats,
};
use crate::stream::{BufferPool, CryptoReader, CryptoWriter, PooledBuffer};
use crate::transport::middle_proxy::{MePool, MeResponse, proto_flags_for_tag};
@@ -34,7 +36,6 @@ enum C2MeCommand {
const DESYNC_DEDUP_WINDOW: Duration = Duration::from_secs(60);
const DESYNC_DEDUP_MAX_ENTRIES: usize = 65_536;
const DESYNC_DEDUP_PRUNE_SCAN_LIMIT: usize = 1024;
const DESYNC_FULL_CACHE_EMIT_MIN_INTERVAL: Duration = Duration::from_millis(1000);
const DESYNC_ERROR_CLASS: &str = "frame_too_large_crypto_desync";
const C2ME_CHANNEL_CAPACITY_FALLBACK: usize = 128;
@@ -44,10 +45,6 @@ const RELAY_IDLE_IO_POLL_MAX: Duration = Duration::from_secs(1);
const TINY_FRAME_DEBT_PER_TINY: u32 = 8;
const TINY_FRAME_DEBT_LIMIT: u32 = 512;
#[cfg(test)]
const C2ME_SEND_TIMEOUT: Duration = Duration::from_millis(50);
#[cfg(not(test))]
const C2ME_SEND_TIMEOUT: Duration = Duration::from_secs(5);
#[cfg(test)]
const RELAY_TEST_STEP_TIMEOUT: Duration = Duration::from_secs(1);
const ME_D2C_FLUSH_BATCH_MAX_FRAMES_MIN: usize = 1;
const ME_D2C_FLUSH_BATCH_MAX_BYTES_MIN: usize = 4096;
@@ -55,12 +52,21 @@ const ME_D2C_FRAME_BUF_SHRINK_HYSTERESIS_FACTOR: usize = 2;
const ME_D2C_SINGLE_WRITE_COALESCE_MAX_BYTES: usize = 128 * 1024;
const QUOTA_RESERVE_SPIN_RETRIES: usize = 32;
static DESYNC_DEDUP: OnceLock<DashMap<u64, Instant>> = OnceLock::new();
static DESYNC_DEDUP_PREVIOUS: OnceLock<DashMap<u64, Instant>> = OnceLock::new();
static DESYNC_HASHER: OnceLock<RandomState> = OnceLock::new();
static DESYNC_FULL_CACHE_LAST_EMIT_AT: OnceLock<Mutex<Option<Instant>>> = OnceLock::new();
static DESYNC_DEDUP_EVER_SATURATED: OnceLock<AtomicBool> = OnceLock::new();
static DESYNC_DEDUP_ROTATION_STATE: OnceLock<Mutex<DesyncDedupRotationState>> = OnceLock::new();
// Invariant for async callers:
// this std::sync::Mutex is allowed only because critical sections are short,
// synchronous, and MUST never cross an `.await`.
static RELAY_IDLE_CANDIDATE_REGISTRY: OnceLock<Mutex<RelayIdleCandidateRegistry>> = OnceLock::new();
static RELAY_IDLE_MARK_SEQ: AtomicU64 = AtomicU64::new(0);
#[derive(Default)]
struct DesyncDedupRotationState {
current_started_at: Option<Instant>,
}
struct RelayForensicsState {
trace_id: u64,
conn_id: u64,
@@ -91,7 +97,9 @@ fn relay_idle_candidate_registry() -> &'static Mutex<RelayIdleCandidateRegistry>
RELAY_IDLE_CANDIDATE_REGISTRY.get_or_init(|| Mutex::new(RelayIdleCandidateRegistry::default()))
}
fn relay_idle_candidate_registry_lock() -> std::sync::MutexGuard<'static, RelayIdleCandidateRegistry> {
fn relay_idle_candidate_registry_lock() -> std::sync::MutexGuard<'static, RelayIdleCandidateRegistry>
{
// Keep lock scope narrow and synchronous: callers must drop guard before any `.await`.
let registry = relay_idle_candidate_registry();
match registry.lock() {
Ok(guard) => guard,
@@ -309,64 +317,76 @@ fn should_emit_full_desync(key: u64, all_full: bool, now: Instant) -> bool {
return true;
}
let dedup = DESYNC_DEDUP.get_or_init(DashMap::new);
let saturated_before = dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES;
let ever_saturated = DESYNC_DEDUP_EVER_SATURATED.get_or_init(|| AtomicBool::new(false));
if saturated_before {
ever_saturated.store(true, Ordering::Relaxed);
}
let dedup_current = DESYNC_DEDUP.get_or_init(DashMap::new);
let dedup_previous = DESYNC_DEDUP_PREVIOUS.get_or_init(DashMap::new);
let rotation_state =
DESYNC_DEDUP_ROTATION_STATE.get_or_init(|| Mutex::new(DesyncDedupRotationState::default()));
if let Some(mut seen_at) = dedup.get_mut(&key) {
if now.duration_since(*seen_at) >= DESYNC_DEDUP_WINDOW {
*seen_at = now;
return true;
let mut state = match rotation_state.lock() {
Ok(guard) => guard,
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = DesyncDedupRotationState::default();
rotation_state.clear_poison();
guard
}
return false;
}
if dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES {
let mut stale_keys = Vec::new();
let mut oldest_candidate: Option<(u64, Instant)> = None;
for entry in dedup.iter().take(DESYNC_DEDUP_PRUNE_SCAN_LIMIT) {
let key = *entry.key();
let seen_at = *entry.value();
match oldest_candidate {
Some((_, oldest_seen)) if seen_at >= oldest_seen => {}
_ => oldest_candidate = Some((key, seen_at)),
}
if now.duration_since(seen_at) >= DESYNC_DEDUP_WINDOW {
stale_keys.push(*entry.key());
}
}
for stale_key in stale_keys {
dedup.remove(&stale_key);
}
if dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES {
let Some((evict_key, _)) = oldest_candidate else {
return false;
};
dedup.remove(&evict_key);
dedup.insert(key, now);
return should_emit_full_desync_full_cache(now);
}
}
dedup.insert(key, now);
let saturated_after = dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES;
// Preserve the first sequential insert that reaches capacity as a normal
// emit, while still gating concurrent newcomer churn after the cache has
// ever been observed at saturation.
let was_ever_saturated = if saturated_after {
ever_saturated.swap(true, Ordering::Relaxed)
} else {
ever_saturated.load(Ordering::Relaxed)
};
if saturated_before || (saturated_after && was_ever_saturated) {
let rotate_now = match state.current_started_at {
Some(current_started_at) => match now.checked_duration_since(current_started_at) {
Some(elapsed) => elapsed >= DESYNC_DEDUP_WINDOW,
None => true,
},
None => true,
};
if rotate_now {
dedup_previous.clear();
for entry in dedup_current.iter() {
dedup_previous.insert(*entry.key(), *entry.value());
}
dedup_current.clear();
state.current_started_at = Some(now);
}
if let Some(seen_at) = dedup_current.get(&key).map(|entry| *entry.value()) {
let within_window = match now.checked_duration_since(seen_at) {
Some(elapsed) => elapsed < DESYNC_DEDUP_WINDOW,
None => true,
};
if within_window {
return false;
}
dedup_current.insert(key, now);
return true;
}
if let Some(seen_at) = dedup_previous.get(&key).map(|entry| *entry.value()) {
let within_window = match now.checked_duration_since(seen_at) {
Some(elapsed) => elapsed < DESYNC_DEDUP_WINDOW,
None => true,
};
if within_window {
// Keep the original timestamp when promoting from previous bucket,
// so dedup expiry remains tied to first-seen time.
dedup_current.insert(key, seen_at);
return false;
}
dedup_previous.remove(&key);
}
if dedup_current.len() >= DESYNC_DEDUP_MAX_ENTRIES {
// Bounded eviction path: rotate buckets instead of scanning/evicting
// arbitrary entries from a saturated single map.
dedup_previous.clear();
for entry in dedup_current.iter() {
dedup_previous.insert(*entry.key(), *entry.value());
}
dedup_current.clear();
state.current_started_at = Some(now);
dedup_current.insert(key, now);
should_emit_full_desync_full_cache(now)
} else {
dedup_current.insert(key, now);
true
}
}
@@ -402,8 +422,20 @@ fn clear_desync_dedup_for_testing() {
if let Some(dedup) = DESYNC_DEDUP.get() {
dedup.clear();
}
if let Some(ever_saturated) = DESYNC_DEDUP_EVER_SATURATED.get() {
ever_saturated.store(false, Ordering::Relaxed);
if let Some(dedup_previous) = DESYNC_DEDUP_PREVIOUS.get() {
dedup_previous.clear();
}
if let Some(rotation_state) = DESYNC_DEDUP_ROTATION_STATE.get() {
match rotation_state.lock() {
Ok(mut guard) => {
*guard = DesyncDedupRotationState::default();
}
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = DesyncDedupRotationState::default();
rotation_state.clear_poison();
}
}
}
if let Some(last_emit_at) = DESYNC_FULL_CACHE_LAST_EMIT_AT.get() {
match last_emit_at.lock() {
@@ -612,6 +644,7 @@ pub(crate) fn relay_idle_pressure_test_scope() -> std::sync::MutexGuard<'static,
async fn enqueue_c2me_command(
tx: &mpsc::Sender<C2MeCommand>,
cmd: C2MeCommand,
send_timeout: Option<Duration>,
) -> std::result::Result<(), mpsc::error::SendError<C2MeCommand>> {
match tx.try_send(cmd) {
Ok(()) => Ok(()),
@@ -622,12 +655,18 @@ async fn enqueue_c2me_command(
if tx.capacity() <= C2ME_SOFT_PRESSURE_MIN_FREE_SLOTS {
tokio::task::yield_now().await;
}
match timeout(C2ME_SEND_TIMEOUT, tx.reserve()).await {
Ok(Ok(permit)) => {
let reserve_result = match send_timeout {
Some(send_timeout) => match timeout(send_timeout, tx.reserve()).await {
Ok(result) => result,
Err(_) => return Err(mpsc::error::SendError(cmd)),
},
None => tx.reserve().await,
};
match reserve_result {
Ok(permit) => {
permit.send(cmd);
Ok(())
}
Ok(Err(_)) => Err(mpsc::error::SendError(cmd)),
Err(_) => Err(mpsc::error::SendError(cmd)),
}
}
@@ -753,6 +792,10 @@ where
.general
.me_c2me_channel_capacity
.max(C2ME_CHANNEL_CAPACITY_FALLBACK);
let c2me_send_timeout = match config.general.me_c2me_send_timeout_ms {
0 => None,
timeout_ms => Some(Duration::from_millis(timeout_ms)),
};
let (c2me_tx, mut c2me_rx) = mpsc::channel::<C2MeCommand>(c2me_channel_capacity);
let me_pool_c2me = me_pool.clone();
let c2me_sender = tokio::spawn(async move {
@@ -1129,7 +1172,7 @@ where
user = %user,
"Middle-relay pressure eviction for idle-candidate session"
);
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close, c2me_send_timeout).await;
main_result = Err(ProxyError::Proxy(
"middle-relay session evicted under pressure (idle-candidate)".to_string(),
));
@@ -1148,7 +1191,7 @@ where
"Cutover affected middle session, closing client connection"
);
tokio::time::sleep(delay).await;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close, c2me_send_timeout).await;
main_result = Err(ProxyError::Proxy(ROUTE_SWITCH_ERROR_MSG.to_string()));
break;
}
@@ -1206,8 +1249,12 @@ where
flags |= RPC_FLAG_NOT_ENCRYPTED;
}
// Keep client read loop lightweight: route heavy ME send path via a dedicated task.
if enqueue_c2me_command(&c2me_tx, C2MeCommand::Data { payload, flags })
.await
if enqueue_c2me_command(
&c2me_tx,
C2MeCommand::Data { payload, flags },
c2me_send_timeout,
)
.await
.is_err()
{
main_result = Err(ProxyError::Proxy("ME sender channel closed".into()));
@@ -1217,7 +1264,9 @@ where
Ok(None) => {
debug!(conn_id, "Client EOF");
client_closed = true;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
let _ =
enqueue_c2me_command(&c2me_tx, C2MeCommand::Close, c2me_send_timeout)
.await;
break;
}
Err(e) => {
@@ -1520,8 +1569,7 @@ where
}
if !idle_policy.enabled {
consecutive_zero_len_frames =
consecutive_zero_len_frames.saturating_add(1);
consecutive_zero_len_frames = consecutive_zero_len_frames.saturating_add(1);
if consecutive_zero_len_frames > LEGACY_MAX_CONSECUTIVE_ZERO_LEN_FRAMES {
stats.increment_relay_protocol_desync_close_total();
return Err(ProxyError::Proxy(
@@ -1835,8 +1883,14 @@ where
MeD2cWriteMode::Coalesced
} else {
let header = [first];
client_writer.write_all(&header).await.map_err(ProxyError::Io)?;
client_writer.write_all(data).await.map_err(ProxyError::Io)?;
client_writer
.write_all(&header)
.await
.map_err(ProxyError::Io)?;
client_writer
.write_all(data)
.await
.map_err(ProxyError::Io)?;
MeD2cWriteMode::Split
}
} else if len_words < (1 << 24) {
@@ -1858,8 +1912,14 @@ where
MeD2cWriteMode::Coalesced
} else {
let header = [first, lw[0], lw[1], lw[2]];
client_writer.write_all(&header).await.map_err(ProxyError::Io)?;
client_writer.write_all(data).await.map_err(ProxyError::Io)?;
client_writer
.write_all(&header)
.await
.map_err(ProxyError::Io)?;
client_writer
.write_all(data)
.await
.map_err(ProxyError::Io)?;
MeD2cWriteMode::Split
}
} else {
@@ -1901,8 +1961,14 @@ where
MeD2cWriteMode::Coalesced
} else {
let header = len_val.to_le_bytes();
client_writer.write_all(&header).await.map_err(ProxyError::Io)?;
client_writer.write_all(data).await.map_err(ProxyError::Io)?;
client_writer
.write_all(&header)
.await
.map_err(ProxyError::Io)?;
client_writer
.write_all(data)
.await
.map_err(ProxyError::Io)?;
if padding_len > 0 {
frame_buf.clear();
if frame_buf.capacity() < padding_len {
@@ -1977,3 +2043,7 @@ mod middle_relay_tiny_frame_debt_concurrency_security_tests;
#[cfg(test)]
#[path = "tests/middle_relay_tiny_frame_debt_proto_chunking_security_tests.rs"]
mod middle_relay_tiny_frame_debt_proto_chunking_security_tests;
#[cfg(test)]
#[path = "tests/middle_relay_atomic_quota_invariant_tests.rs"]
mod middle_relay_atomic_quota_invariant_tests;
+50 -50
View File
@@ -4,58 +4,58 @@
#![cfg_attr(test, allow(warnings))]
#![cfg_attr(not(test), forbid(clippy::undocumented_unsafe_blocks))]
#![cfg_attr(
not(test),
deny(
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::todo,
clippy::unimplemented,
clippy::correctness,
clippy::option_if_let_else,
clippy::or_fun_call,
clippy::branches_sharing_code,
clippy::single_option_map,
clippy::useless_let_if_seq,
clippy::redundant_locals,
clippy::cloned_ref_to_slice_refs,
unsafe_code,
clippy::await_holding_lock,
clippy::await_holding_refcell_ref,
clippy::debug_assert_with_mut_call,
clippy::macro_use_imports,
clippy::cast_ptr_alignment,
clippy::cast_lossless,
clippy::ptr_as_ptr,
clippy::large_stack_arrays,
clippy::same_functions_in_if_condition,
trivial_casts,
trivial_numeric_casts,
unused_extern_crates,
unused_import_braces,
rust_2018_idioms
)
not(test),
deny(
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::todo,
clippy::unimplemented,
clippy::correctness,
clippy::option_if_let_else,
clippy::or_fun_call,
clippy::branches_sharing_code,
clippy::single_option_map,
clippy::useless_let_if_seq,
clippy::redundant_locals,
clippy::cloned_ref_to_slice_refs,
unsafe_code,
clippy::await_holding_lock,
clippy::await_holding_refcell_ref,
clippy::debug_assert_with_mut_call,
clippy::macro_use_imports,
clippy::cast_ptr_alignment,
clippy::cast_lossless,
clippy::ptr_as_ptr,
clippy::large_stack_arrays,
clippy::same_functions_in_if_condition,
trivial_casts,
trivial_numeric_casts,
unused_extern_crates,
unused_import_braces,
rust_2018_idioms
)
)]
#![cfg_attr(
not(test),
allow(
clippy::use_self,
clippy::redundant_closure,
clippy::too_many_arguments,
clippy::doc_markdown,
clippy::missing_const_for_fn,
clippy::unnecessary_operation,
clippy::redundant_pub_crate,
clippy::derive_partial_eq_without_eq,
clippy::type_complexity,
clippy::new_ret_no_self,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::significant_drop_tightening,
clippy::significant_drop_in_scrutinee,
clippy::float_cmp,
clippy::nursery
)
not(test),
allow(
clippy::use_self,
clippy::redundant_closure,
clippy::too_many_arguments,
clippy::doc_markdown,
clippy::missing_const_for_fn,
clippy::unnecessary_operation,
clippy::redundant_pub_crate,
clippy::derive_partial_eq_without_eq,
clippy::type_complexity,
clippy::new_ret_no_self,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::significant_drop_tightening,
clippy::significant_drop_in_scrutinee,
clippy::float_cmp,
clippy::nursery
)
)]
pub mod adaptive_buffers;
+9 -7
View File
@@ -56,8 +56,8 @@ use crate::stats::{Stats, UserStats};
use crate::stream::BufferPool;
use std::io;
use std::pin::Pin;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::task::{Context, Poll};
use std::time::Duration;
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, ReadBuf, copy_bidirectional_with_sizes};
@@ -272,12 +272,10 @@ const QUOTA_ADAPTIVE_INTERVAL_MAX_BYTES: u64 = 64 * 1024;
#[inline]
fn quota_adaptive_interval_bytes(remaining_before: u64) -> u64 {
remaining_before
.saturating_div(2)
.clamp(
QUOTA_ADAPTIVE_INTERVAL_MIN_BYTES,
QUOTA_ADAPTIVE_INTERVAL_MAX_BYTES,
)
remaining_before.saturating_div(2).clamp(
QUOTA_ADAPTIVE_INTERVAL_MIN_BYTES,
QUOTA_ADAPTIVE_INTERVAL_MAX_BYTES,
)
}
#[inline]
@@ -669,3 +667,7 @@ mod relay_quota_extended_attack_surface_security_tests;
#[cfg(test)]
#[path = "tests/relay_watchdog_delta_security_tests.rs"]
mod relay_watchdog_delta_security_tests;
#[cfg(test)]
#[path = "tests/relay_atomic_quota_invariant_tests.rs"]
mod relay_atomic_quota_invariant_tests;
+75 -17
View File
@@ -1,5 +1,5 @@
use super::*;
use crate::config::{UpstreamConfig, UpstreamType, ProxyConfig};
use crate::config::{ProxyConfig, UpstreamConfig, UpstreamType};
use crate::protocol::constants::{MAX_TLS_PLAINTEXT_SIZE, MIN_TLS_CLIENT_HELLO_SIZE};
use crate::stats::Stats;
use crate::transport::UpstreamManager;
@@ -41,7 +41,9 @@ fn edge_handshake_timeout_with_mask_grace_saturating_add_prevents_overflow() {
#[test]
fn edge_tls_clienthello_len_in_bounds_exact_boundaries() {
assert!(tls_clienthello_len_in_bounds(MIN_TLS_CLIENT_HELLO_SIZE));
assert!(!tls_clienthello_len_in_bounds(MIN_TLS_CLIENT_HELLO_SIZE - 1));
assert!(!tls_clienthello_len_in_bounds(
MIN_TLS_CLIENT_HELLO_SIZE - 1
));
assert!(tls_clienthello_len_in_bounds(MAX_TLS_PLAINTEXT_SIZE));
assert!(!tls_clienthello_len_in_bounds(MAX_TLS_PLAINTEXT_SIZE + 1));
}
@@ -87,7 +89,15 @@ async fn adversarial_tls_handshake_timeout_during_masking_delay() {
"198.51.100.1:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -99,7 +109,10 @@ async fn adversarial_tls_handshake_timeout_during_masking_delay() {
false,
));
client_side.write_all(&[0x16, 0x03, 0x01, 0xFF, 0xFF]).await.unwrap();
client_side
.write_all(&[0x16, 0x03, 0x01, 0xFF, 0xFF])
.await
.unwrap();
let result = tokio::time::timeout(Duration::from_secs(4), handle)
.await
@@ -123,7 +136,15 @@ async fn blackhat_proxy_protocol_slowloris_timeout() {
"198.51.100.2:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -167,7 +188,15 @@ async fn negative_proxy_protocol_enabled_but_client_sends_tls_hello() {
"198.51.100.3:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -179,7 +208,10 @@ async fn negative_proxy_protocol_enabled_but_client_sends_tls_hello() {
true,
));
client_side.write_all(&[0x16, 0x03, 0x01, 0x02, 0x00]).await.unwrap();
client_side
.write_all(&[0x16, 0x03, 0x01, 0x02, 0x00])
.await
.unwrap();
let result = tokio::time::timeout(Duration::from_secs(2), handle)
.await
@@ -202,7 +234,15 @@ async fn edge_client_stream_exactly_4_bytes_eof() {
"198.51.100.4:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -214,7 +254,10 @@ async fn edge_client_stream_exactly_4_bytes_eof() {
false,
));
client_side.write_all(&[0x16, 0x03, 0x01, 0x00]).await.unwrap();
client_side
.write_all(&[0x16, 0x03, 0x01, 0x00])
.await
.unwrap();
client_side.shutdown().await.unwrap();
let _ = tokio::time::timeout(Duration::from_secs(2), handle).await;
@@ -234,7 +277,15 @@ async fn edge_client_stream_tls_header_valid_but_body_1_byte_short_eof() {
"198.51.100.5:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -246,7 +297,10 @@ async fn edge_client_stream_tls_header_valid_but_body_1_byte_short_eof() {
false,
));
client_side.write_all(&[0x16, 0x03, 0x01, 0x00, 100]).await.unwrap();
client_side
.write_all(&[0x16, 0x03, 0x01, 0x00, 100])
.await
.unwrap();
client_side.write_all(&vec![0x41; 99]).await.unwrap();
client_side.shutdown().await.unwrap();
@@ -269,7 +323,15 @@ async fn integration_non_tls_modes_disabled_immediately_masks() {
"198.51.100.6:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -372,11 +434,7 @@ async fn stress_user_connection_reservation_concurrent_same_ip_exhaustion() {
let ip_tracker = ip_tracker.clone();
tasks.spawn(async move {
RunningClientHandler::acquire_user_connection_reservation_static(
user,
&config,
stats,
peer,
ip_tracker,
user, &config, stats, peer, ip_tracker,
)
.await
});
@@ -7,6 +7,11 @@ use std::sync::Arc;
use std::time::Duration;
use tokio::io::{AsyncWriteExt, duplex};
fn preload_user_quota(stats: &Stats, user: &str, bytes: u64) {
let user_stats = stats.get_or_create_user_stats_handle(user);
stats.quota_charge_post_write(user_stats.as_ref(), bytes);
}
#[test]
fn invariant_wrap_tls_application_record_exact_multiples() {
let chunk_size = u16::MAX as usize;
@@ -37,7 +42,15 @@ async fn invariant_tls_clienthello_truncation_exact_boundary_triggers_masking()
"198.51.100.20:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -60,7 +73,9 @@ async fn invariant_tls_clienthello_truncation_exact_boundary_triggers_masking()
.unwrap();
client_side.shutdown().await.unwrap();
let _ = tokio::time::timeout(Duration::from_secs(2), handler).await.unwrap();
let _ = tokio::time::timeout(Duration::from_secs(2), handler)
.await
.unwrap();
assert_eq!(stats.get_connects_bad(), 1);
}
@@ -68,7 +83,10 @@ async fn invariant_tls_clienthello_truncation_exact_boundary_triggers_masking()
async fn invariant_acquire_reservation_ip_limit_rollback() {
let user = "rollback-test-user";
let mut config = ProxyConfig::default();
config.access.user_max_tcp_conns.insert(user.to_string(), 10);
config
.access
.user_max_tcp_conns
.insert(user.to_string(), 10);
let stats = Arc::new(Stats::new());
let ip_tracker = Arc::new(UserIpTracker::new());
@@ -114,7 +132,7 @@ async fn invariant_quota_exact_boundary_inclusive() {
let ip_tracker = Arc::new(UserIpTracker::new());
let peer = "198.51.100.23:55000".parse().unwrap();
stats.add_user_octets_from(user, 999);
preload_user_quota(stats.as_ref(), user, 999);
let res1 = RunningClientHandler::acquire_user_connection_reservation_static(
user,
&config,
@@ -126,7 +144,7 @@ async fn invariant_quota_exact_boundary_inclusive() {
assert!(res1.is_ok());
res1.unwrap().release().await;
stats.add_user_octets_from(user, 1);
preload_user_quota(stats.as_ref(), user, 1);
let res2 = RunningClientHandler::acquire_user_connection_reservation_static(
user,
&config,
@@ -154,7 +172,15 @@ async fn invariant_direct_mode_partial_header_eof_is_error_not_bad_connect() {
"198.51.100.25:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -100,14 +100,7 @@ async fn run_http2_fragment_case(split_at: usize, delay_ms: u64, peer: SocketAdd
#[tokio::test]
async fn http2_preface_fragmentation_matrix_is_classified_and_forwarded() {
let cases = [
(2usize, 0u64),
(3, 0),
(4, 0),
(2, 7),
(3, 7),
(8, 1),
];
let cases = [(2usize, 0u64), (3, 0), (4, 0), (2, 7), (3, 7), (8, 1)];
for (i, (split_at, delay_ms)) in cases.into_iter().enumerate() {
let peer: SocketAddr = format!("198.51.100.{}:58{}", 140 + i, 100 + i)
@@ -29,7 +29,10 @@ async fn configured_prefetch_budget_20ms_recovers_tail_delayed_15ms() {
.write_all(b"ONNECT example.org:443 HTTP/1.1\r\n")
.await
.expect("tail bytes must be writable");
writer.shutdown().await.expect("writer shutdown must succeed");
writer
.shutdown()
.await
.expect("writer shutdown must succeed");
});
let mut initial_data = b"C".to_vec();
@@ -60,7 +63,10 @@ async fn configured_prefetch_budget_5ms_misses_tail_delayed_15ms() {
.write_all(b"ONNECT example.org:443 HTTP/1.1\r\n")
.await
.expect("tail bytes must be writable");
writer.shutdown().await.expect("writer shutdown must succeed");
writer
.shutdown()
.await
.expect("writer shutdown must succeed");
});
let mut initial_data = b"C".to_vec();
@@ -245,7 +245,10 @@ async fn blackhat_integration_empty_initial_data_path_is_byte_exact_and_eof_clea
assert_eq!(head[0], 0x16);
read_and_discard_tls_record_body(&mut client_side, head).await;
client_side.write_all(&invalid_mtproto_record).await.unwrap();
client_side
.write_all(&invalid_mtproto_record)
.await
.unwrap();
client_side.write_all(&trailing_record).await.unwrap();
client_side.shutdown().await.unwrap();
@@ -7,7 +7,9 @@ async fn run_strict_prefetch_case(prefetch_ms: u64, tail_delay_ms: u64) -> Vec<u
let writer_task = tokio::spawn(async move {
sleep(Duration::from_millis(tail_delay_ms)).await;
let _ = writer.write_all(b"ONNECT example.org:443 HTTP/1.1\r\n").await;
let _ = writer
.write_all(b"ONNECT example.org:443 HTTP/1.1\r\n")
.await;
let _ = writer.shutdown().await;
});
@@ -35,7 +35,10 @@ async fn run_prefetch_budget_case(prefetch_budget_ms: u64, delayed_tail_ms: u64)
.write_all(b"ONNECT example.org:443 HTTP/1.1\r\n")
.await
.expect("tail bytes must be writable");
writer.shutdown().await.expect("writer shutdown must succeed");
writer
.shutdown()
.await
.expect("writer shutdown must succeed");
});
let mut initial_data = b"C".to_vec();
@@ -67,9 +67,10 @@ async fn run_replay_candidate_session(
cfg.censorship.mask_port = 1;
cfg.censorship.mask_timing_normalization_enabled = false;
cfg.access.ignore_time_skew = true;
cfg.access
.users
.insert("user".to_string(), "abababababababababababababababab".to_string());
cfg.access.users.insert(
"user".to_string(),
"abababababababababababababababab".to_string(),
);
let config = Arc::new(cfg);
let stats = Arc::new(Stats::new());
@@ -99,7 +100,10 @@ async fn run_replay_candidate_session(
if drive_mtproto_fail {
let mut server_hello_head = [0u8; 5];
client_side.read_exact(&mut server_hello_head).await.unwrap();
client_side
.read_exact(&mut server_hello_head)
.await
.unwrap();
assert_eq!(server_hello_head[0], 0x16);
let body_len = u16::from_be_bytes([server_hello_head[3], server_hello_head[4]]) as usize;
let mut body = vec![0u8; body_len];
@@ -110,7 +114,10 @@ async fn run_replay_candidate_session(
invalid_mtproto_record.extend_from_slice(&TLS_VERSION);
invalid_mtproto_record.extend_from_slice(&(HANDSHAKE_LEN as u16).to_be_bytes());
invalid_mtproto_record.extend_from_slice(&vec![0u8; HANDSHAKE_LEN]);
client_side.write_all(&invalid_mtproto_record).await.unwrap();
client_side
.write_all(&invalid_mtproto_record)
.await
.unwrap();
client_side
.write_all(b"GET /replay-fallback HTTP/1.1\r\nHost: x\r\n\r\n")
.await
@@ -154,8 +161,7 @@ async fn replay_reject_still_honors_masking_timing_budget() {
.await;
assert!(
replay_elapsed >= Duration::from_millis(40)
&& replay_elapsed < Duration::from_millis(250),
replay_elapsed >= Duration::from_millis(40) && replay_elapsed < Duration::from_millis(250),
"replay rejection path must still satisfy masking timing budget without unbounded DB/CPU delay"
);
}
+49 -18
View File
@@ -6,6 +6,11 @@ use std::sync::Arc;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt, duplex};
fn preload_user_quota(stats: &Stats, user: &str, bytes: u64) {
let user_stats = stats.get_or_create_user_stats_handle(user);
stats.quota_charge_post_write(user_stats.as_ref(), bytes);
}
#[tokio::test]
async fn edge_mask_delay_bypassed_if_max_is_zero() {
let mut config = ProxyConfig::default();
@@ -42,17 +47,13 @@ async fn boundary_user_data_quota_exact_match_rejects() {
config.access.user_data_quota.insert(user.to_string(), 1024);
let stats = Arc::new(Stats::new());
stats.add_user_octets_from(user, 1024);
preload_user_quota(stats.as_ref(), user, 1024);
let ip_tracker = Arc::new(UserIpTracker::new());
let peer = "198.51.100.10:55000".parse().unwrap();
let result = RunningClientHandler::acquire_user_connection_reservation_static(
user,
&config,
stats,
peer,
ip_tracker,
user, &config, stats, peer, ip_tracker,
)
.await;
@@ -74,11 +75,7 @@ async fn boundary_user_expiration_in_past_rejects() {
let peer = "198.51.100.11:55000".parse().unwrap();
let result = RunningClientHandler::acquire_user_connection_reservation_static(
user,
&config,
stats,
peer,
ip_tracker,
user, &config, stats, peer, ip_tracker,
)
.await;
@@ -98,7 +95,15 @@ async fn blackhat_proxy_protocol_massive_garbage_rejected_quickly() {
"198.51.100.12:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -136,7 +141,15 @@ async fn edge_tls_body_immediate_eof_triggers_masking_and_bad_connect() {
"198.51.100.13:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -148,10 +161,15 @@ async fn edge_tls_body_immediate_eof_triggers_masking_and_bad_connect() {
false,
));
client_side.write_all(&[0x16, 0x03, 0x01, 0x00, 100]).await.unwrap();
client_side
.write_all(&[0x16, 0x03, 0x01, 0x00, 100])
.await
.unwrap();
client_side.shutdown().await.unwrap();
let _ = tokio::time::timeout(Duration::from_secs(2), handler).await.unwrap();
let _ = tokio::time::timeout(Duration::from_secs(2), handler)
.await
.unwrap();
assert_eq!(stats.get_connects_bad(), 1);
}
@@ -172,7 +190,15 @@ async fn security_classic_mode_disabled_masks_valid_length_payload() {
"198.51.100.15:55000".parse().unwrap(),
config,
stats.clone(),
Arc::new(UpstreamManager::new(vec![], 1, 1, 1, 1, false, stats.clone())),
Arc::new(UpstreamManager::new(
vec![],
1,
1,
1,
1,
false,
stats.clone(),
)),
Arc::new(ReplayChecker::new(128, Duration::from_secs(60))),
Arc::new(BufferPool::new()),
Arc::new(SecureRandom::new()),
@@ -187,7 +213,9 @@ async fn security_classic_mode_disabled_masks_valid_length_payload() {
client_side.write_all(&vec![0xEF; 64]).await.unwrap();
client_side.shutdown().await.unwrap();
let _ = tokio::time::timeout(Duration::from_secs(2), handler).await.unwrap();
let _ = tokio::time::timeout(Duration::from_secs(2), handler)
.await
.unwrap();
assert_eq!(stats.get_connects_bad(), 1);
}
@@ -195,7 +223,10 @@ async fn security_classic_mode_disabled_masks_valid_length_payload() {
async fn concurrency_ip_tracker_strict_limit_one_rapid_churn() {
let user = "rapid-churn-user";
let mut config = ProxyConfig::default();
config.access.user_max_tcp_conns.insert(user.to_string(), 10);
config
.access
.user_max_tcp_conns
.insert(user.to_string(), 10);
let stats = Arc::new(Stats::new());
let ip_tracker = Arc::new(UserIpTracker::new());
+34 -11
View File
@@ -7,9 +7,9 @@ use crate::protocol::tls;
use crate::proxy::handshake::HandshakeSuccess;
use crate::stream::{CryptoReader, CryptoWriter};
use crate::transport::proxy_protocol::ProxyProtocolV1Builder;
use rand::rngs::StdRng;
use rand::Rng;
use rand::SeedableRng;
use rand::rngs::StdRng;
use std::net::Ipv4Addr;
use tokio::io::{AsyncReadExt, AsyncWriteExt, duplex};
use tokio::net::{TcpListener, TcpStream};
@@ -34,7 +34,10 @@ fn handshake_timeout_with_mask_grace_includes_mask_margin() {
config.timeouts.client_handshake = 2;
config.censorship.mask = false;
assert_eq!(handshake_timeout_with_mask_grace(&config), Duration::from_secs(2));
assert_eq!(
handshake_timeout_with_mask_grace(&config),
Duration::from_secs(2)
);
config.censorship.mask = true;
assert_eq!(
@@ -86,7 +89,10 @@ impl tokio::io::AsyncRead for ErrorReader {
_cx: &mut std::task::Context<'_>,
_buf: &mut tokio::io::ReadBuf<'_>,
) -> std::task::Poll<std::io::Result<()>> {
std::task::Poll::Ready(Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "fake error")))
std::task::Poll::Ready(Err(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"fake error",
)))
}
}
@@ -124,7 +130,10 @@ fn handshake_timeout_without_mask_is_exact_base() {
config.timeouts.client_handshake = 7;
config.censorship.mask = false;
assert_eq!(handshake_timeout_with_mask_grace(&config), Duration::from_secs(7));
assert_eq!(
handshake_timeout_with_mask_grace(&config),
Duration::from_secs(7)
);
}
#[test]
@@ -133,7 +142,10 @@ fn handshake_timeout_mask_enabled_adds_750ms() {
config.timeouts.client_handshake = 3;
config.censorship.mask = true;
assert_eq!(handshake_timeout_with_mask_grace(&config), Duration::from_millis(3750));
assert_eq!(
handshake_timeout_with_mask_grace(&config),
Duration::from_millis(3750)
);
}
#[tokio::test]
@@ -155,10 +167,12 @@ async fn read_with_progress_fragmented_io_works_over_multiple_calls() {
let mut b = vec![0u8; chunk_size];
let n = read_with_progress(&mut cursor, &mut b).await.unwrap();
result.extend_from_slice(&b[..n]);
if n == 0 { break; }
if n == 0 {
break;
}
}
assert_eq!(result, vec![1,2,3,4,5]);
assert_eq!(result, vec![1, 2, 3, 4, 5]);
}
#[tokio::test]
@@ -174,7 +188,9 @@ async fn read_with_progress_stress_randomized_chunk_sizes() {
let mut b = vec![0u8; chunk];
let read = read_with_progress(&mut cursor, &mut b).await.unwrap();
collected.extend_from_slice(&b[..read]);
if read == 0 { break; }
if read == 0 {
break;
}
}
assert_eq!(collected, input);
@@ -215,10 +231,12 @@ fn wrap_tls_application_record_roundtrip_size_check() {
let mut consumed = 0;
while idx + 5 <= wrapped.len() {
assert_eq!(wrapped[idx], 0x17);
let len = u16::from_be_bytes([wrapped[idx+3], wrapped[idx+4]]) as usize;
let len = u16::from_be_bytes([wrapped[idx + 3], wrapped[idx + 4]]) as usize;
consumed += len;
idx += 5 + len;
if idx >= wrapped.len() { break; }
if idx >= wrapped.len() {
break;
}
}
assert_eq!(consumed, payload_len);
@@ -242,6 +260,11 @@ where
CryptoWriter::new(writer, AesCtr::new(&key, iv), 8 * 1024)
}
fn preload_user_quota(stats: &Stats, user: &str, bytes: u64) {
let user_stats = stats.get_or_create_user_stats_handle(user);
stats.quota_charge_post_write(user_stats.as_ref(), bytes);
}
#[tokio::test]
async fn user_connection_reservation_drop_enqueues_cleanup_synchronously() {
let ip_tracker = Arc::new(crate::ip_tracker::UserIpTracker::new());
@@ -3040,7 +3063,7 @@ async fn quota_rejection_does_not_reserve_ip_or_trigger_rollback() {
.insert("user".to_string(), 1024);
let stats = Stats::new();
stats.add_user_octets_from("user", 1024);
preload_user_quota(&stats, "user", 1024);
let ip_tracker = UserIpTracker::new();
let peer_addr: SocketAddr = "203.0.113.211:50001".parse().unwrap();
@@ -25,13 +25,26 @@ fn wrap_tls_application_record_oversized_payload_is_chunked_without_truncation()
let len = u16::from_be_bytes([record[offset + 3], record[offset + 4]]) as usize;
let body_start = offset + 5;
let body_end = body_start + len;
assert!(body_end <= record.len(), "declared TLS record length must be in-bounds");
assert!(
body_end <= record.len(),
"declared TLS record length must be in-bounds"
);
recovered.extend_from_slice(&record[body_start..body_end]);
offset = body_end;
frames += 1;
}
assert_eq!(offset, record.len(), "record parser must consume exact output size");
assert_eq!(frames, 2, "oversized payload should split into exactly two records");
assert_eq!(recovered, payload, "chunked records must preserve full payload");
assert_eq!(
offset,
record.len(),
"record parser must consume exact output size"
);
assert_eq!(
frames, 2,
"oversized payload should split into exactly two records"
);
assert_eq!(
recovered, payload,
"chunked records must preserve full payload"
);
}
+23 -16
View File
@@ -773,8 +773,7 @@ fn anchored_open_nix_path_writes_expected_lines() {
"target/telemt-unknown-dc-anchored-open-ok-{}/unknown-dc.log",
std::process::id()
);
let sanitized =
sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let sanitized = sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let _ = fs::remove_file(&sanitized.resolved_path);
let mut first = open_unknown_dc_log_append_anchored(&sanitized)
@@ -787,7 +786,10 @@ fn anchored_open_nix_path_writes_expected_lines() {
let content =
fs::read_to_string(&sanitized.resolved_path).expect("anchored log file must be readable");
let lines: Vec<&str> = content.lines().filter(|line| !line.trim().is_empty()).collect();
let lines: Vec<&str> = content
.lines()
.filter(|line| !line.trim().is_empty())
.collect();
assert_eq!(lines.len(), 2, "expected one line per anchored append call");
assert!(
lines.contains(&"dc_idx=31200") && lines.contains(&"dc_idx=31201"),
@@ -811,8 +813,7 @@ fn anchored_open_parallel_appends_preserve_line_integrity() {
"target/telemt-unknown-dc-anchored-open-parallel-{}/unknown-dc.log",
std::process::id()
);
let sanitized =
sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let sanitized = sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let _ = fs::remove_file(&sanitized.resolved_path);
let mut workers = Vec::new();
@@ -831,8 +832,15 @@ fn anchored_open_parallel_appends_preserve_line_integrity() {
let content =
fs::read_to_string(&sanitized.resolved_path).expect("parallel log file must be readable");
let lines: Vec<&str> = content.lines().filter(|line| !line.trim().is_empty()).collect();
assert_eq!(lines.len(), 64, "expected one complete line per worker append");
let lines: Vec<&str> = content
.lines()
.filter(|line| !line.trim().is_empty())
.collect();
assert_eq!(
lines.len(),
64,
"expected one complete line per worker append"
);
for line in lines {
assert!(
line.starts_with("dc_idx="),
@@ -867,8 +875,7 @@ fn anchored_open_creates_private_0600_file_permissions() {
"target/telemt-unknown-dc-anchored-perms-{}/unknown-dc.log",
std::process::id()
);
let sanitized =
sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let sanitized = sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let _ = fs::remove_file(&sanitized.resolved_path);
let mut file = open_unknown_dc_log_append_anchored(&sanitized)
@@ -905,8 +912,7 @@ fn anchored_open_rejects_existing_symlink_target() {
"target/telemt-unknown-dc-anchored-symlink-target-{}/unknown-dc.log",
std::process::id()
);
let sanitized =
sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let sanitized = sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let outside = std::env::temp_dir().join(format!(
"telemt-unknown-dc-anchored-symlink-outside-{}.log",
@@ -943,8 +949,7 @@ fn anchored_open_high_contention_multi_write_preserves_complete_lines() {
"target/telemt-unknown-dc-anchored-contention-{}/unknown-dc.log",
std::process::id()
);
let sanitized =
sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let sanitized = sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let _ = fs::remove_file(&sanitized.resolved_path);
let workers = 24usize;
@@ -970,7 +975,10 @@ fn anchored_open_high_contention_multi_write_preserves_complete_lines() {
let content = fs::read_to_string(&sanitized.resolved_path)
.expect("contention output file must be readable");
let lines: Vec<&str> = content.lines().filter(|line| !line.trim().is_empty()).collect();
let lines: Vec<&str> = content
.lines()
.filter(|line| !line.trim().is_empty())
.collect();
assert_eq!(
lines.len(),
workers * rounds,
@@ -1014,8 +1022,7 @@ fn append_unknown_dc_line_returns_error_for_read_only_descriptor() {
"target/telemt-unknown-dc-append-ro-{}/unknown-dc.log",
std::process::id()
);
let sanitized =
sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
let sanitized = sanitize_unknown_dc_log_path(&rel_candidate).expect("candidate must sanitize");
fs::write(&sanitized.resolved_path, "seed\n").expect("seed file must be writable");
let mut readonly = std::fs::OpenOptions::new()
@@ -1,5 +1,5 @@
use super::*;
use crate::crypto::{sha256, sha256_hmac, AesCtr};
use crate::crypto::{AesCtr, sha256, sha256_hmac};
use crate::protocol::constants::{ProtoTag, RESERVED_NONCE_BEGINNINGS, RESERVED_NONCE_FIRST_BYTES};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
use std::sync::Arc;
@@ -175,7 +175,10 @@ async fn tls_minimum_viable_length_boundary() {
None,
)
.await;
assert!(matches!(res, HandshakeResult::Success(_)), "Exact minimum length TLS handshake must succeed");
assert!(
matches!(res, HandshakeResult::Success(_)),
"Exact minimum length TLS handshake must succeed"
);
let short_handshake = vec![0x42u8; min_len - 1];
let res_short = handle_tls_handshake(
@@ -189,7 +192,10 @@ async fn tls_minimum_viable_length_boundary() {
None,
)
.await;
assert!(matches!(res_short, HandshakeResult::BadClient { .. }), "Handshake 1 byte shorter than minimum must fail closed");
assert!(
matches!(res_short, HandshakeResult::BadClient { .. }),
"Handshake 1 byte shorter than minimum must fail closed"
);
}
#[tokio::test]
@@ -219,9 +225,16 @@ async fn mtproto_extreme_dc_index_serialization() {
match res {
HandshakeResult::Success((_, _, success)) => {
assert_eq!(success.dc_idx, extreme_dc, "Extreme DC index {} must serialize/deserialize perfectly", extreme_dc);
assert_eq!(
success.dc_idx, extreme_dc,
"Extreme DC index {} must serialize/deserialize perfectly",
extreme_dc
);
}
_ => panic!("MTProto handshake with extreme DC index {} failed", extreme_dc),
_ => panic!(
"MTProto handshake with extreme DC index {} failed",
extreme_dc
),
}
}
}
@@ -253,7 +266,11 @@ async fn alpn_strict_case_and_padding_rejection() {
None,
)
.await;
assert!(matches!(res, HandshakeResult::BadClient { .. }), "ALPN strict enforcement must reject {:?}", bad_alpn);
assert!(
matches!(res, HandshakeResult::BadClient { .. }),
"ALPN strict enforcement must reject {:?}",
bad_alpn
);
}
}
@@ -265,8 +282,15 @@ fn ipv4_mapped_ipv6_bucketing_anomaly() {
let norm_1 = normalize_auth_probe_ip(ipv4_mapped_1);
let norm_2 = normalize_auth_probe_ip(ipv4_mapped_2);
assert_eq!(norm_1, norm_2, "IPv4-mapped IPv6 addresses must collapse into the same /64 bucket (::0)");
assert_eq!(norm_1, IpAddr::V6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), "The bucket must be exactly ::0");
assert_eq!(
norm_1, norm_2,
"IPv4-mapped IPv6 addresses must collapse into the same /64 bucket (::0)"
);
assert_eq!(
norm_1,
IpAddr::V6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)),
"The bucket must be exactly ::0"
);
}
// --- Category 2: Adversarial & Black Hat ---
@@ -309,7 +333,10 @@ async fn mtproto_invalid_ciphertext_does_not_poison_replay_cache() {
None,
)
.await;
assert!(matches!(res_valid, HandshakeResult::Success(_)), "Invalid MTProto ciphertext must not poison the replay cache");
assert!(
matches!(res_valid, HandshakeResult::Success(_)),
"Invalid MTProto ciphertext must not poison the replay cache"
);
}
#[tokio::test]
@@ -352,7 +379,10 @@ async fn tls_invalid_session_does_not_poison_replay_cache() {
None,
)
.await;
assert!(matches!(res_valid, HandshakeResult::Success(_)), "Invalid TLS payload must not poison the replay cache");
assert!(
matches!(res_valid, HandshakeResult::Success(_)),
"Invalid TLS payload must not poison the replay cache"
);
}
#[tokio::test]
@@ -387,7 +417,10 @@ async fn server_hello_delay_timing_neutrality_on_hmac_failure() {
let elapsed = start.elapsed();
assert!(matches!(res, HandshakeResult::BadClient { .. }));
assert!(elapsed >= Duration::from_millis(45), "Invalid HMAC must still incur the configured ServerHello delay to prevent timing side-channels");
assert!(
elapsed >= Duration::from_millis(45),
"Invalid HMAC must still incur the configured ServerHello delay to prevent timing side-channels"
);
}
#[tokio::test]
@@ -421,7 +454,10 @@ async fn server_hello_delay_inversion_resilience() {
let elapsed = start.elapsed();
assert!(matches!(res, HandshakeResult::Success(_)));
assert!(elapsed >= Duration::from_millis(90), "Delay logic must gracefully handle min > max inversions via max.max(min)");
assert!(
elapsed >= Duration::from_millis(90),
"Delay logic must gracefully handle min > max inversions via max.max(min)"
);
}
#[tokio::test]
@@ -436,10 +472,16 @@ async fn mixed_valid_and_invalid_user_secrets_configuration() {
for i in 0..9 {
let bad_secret = if i % 2 == 0 { "badhex!" } else { "1122" };
config.access.users.insert(format!("bad_user_{}", i), bad_secret.to_string());
config
.access
.users
.insert(format!("bad_user_{}", i), bad_secret.to_string());
}
let valid_secret_hex = "99999999999999999999999999999999";
config.access.users.insert("good_user".to_string(), valid_secret_hex.to_string());
config
.access
.users
.insert("good_user".to_string(), valid_secret_hex.to_string());
config.general.modes.secure = true;
config.general.modes.classic = true;
config.general.modes.tls = true;
@@ -463,7 +505,10 @@ async fn mixed_valid_and_invalid_user_secrets_configuration() {
)
.await;
assert!(matches!(res, HandshakeResult::Success(_)), "Proxy must gracefully skip invalid secrets and authenticate the valid one");
assert!(
matches!(res, HandshakeResult::Success(_)),
"Proxy must gracefully skip invalid secrets and authenticate the valid one"
);
}
#[tokio::test]
@@ -494,7 +539,10 @@ async fn tls_emulation_fallback_when_cache_missing() {
)
.await;
assert!(matches!(res, HandshakeResult::Success(_)), "TLS emulation must gracefully fall back to standard ServerHello if cache is missing");
assert!(
matches!(res, HandshakeResult::Success(_)),
"TLS emulation must gracefully fall back to standard ServerHello if cache is missing"
);
}
#[tokio::test]
@@ -524,7 +572,10 @@ async fn classic_mode_over_tls_transport_protocol_confusion() {
)
.await;
assert!(matches!(res, HandshakeResult::Success(_)), "Intermediate tag over TLS must succeed if classic mode is enabled, locking in cross-transport behavior");
assert!(
matches!(res, HandshakeResult::Success(_)),
"Intermediate tag over TLS must succeed if classic mode is enabled, locking in cross-transport behavior"
);
}
#[test]
@@ -543,9 +594,15 @@ fn generate_tg_nonce_never_emits_reserved_bytes() {
false,
);
assert!(!RESERVED_NONCE_FIRST_BYTES.contains(&nonce[0]), "Nonce must never start with reserved bytes");
assert!(
!RESERVED_NONCE_FIRST_BYTES.contains(&nonce[0]),
"Nonce must never start with reserved bytes"
);
let first_four: [u8; 4] = [nonce[0], nonce[1], nonce[2], nonce[3]];
assert!(!RESERVED_NONCE_BEGINNINGS.contains(&first_four), "Nonce must never match reserved 4-byte beginnings");
assert!(
!RESERVED_NONCE_BEGINNINGS.contains(&first_four),
"Nonce must never match reserved 4-byte beginnings"
);
}
}
@@ -568,11 +625,18 @@ async fn dashmap_concurrent_saturation_stress() {
}
for task in tasks {
task.await.expect("Task panicked during concurrent DashMap stress");
task.await
.expect("Task panicked during concurrent DashMap stress");
}
assert!(auth_probe_is_throttled_for_testing(ip_a), "IP A must be throttled after concurrent stress");
assert!(auth_probe_is_throttled_for_testing(ip_b), "IP B must be throttled after concurrent stress");
assert!(
auth_probe_is_throttled_for_testing(ip_a),
"IP A must be throttled after concurrent stress"
);
assert!(
auth_probe_is_throttled_for_testing(ip_b),
"IP B must be throttled after concurrent stress"
);
}
#[test]
@@ -586,7 +650,12 @@ fn prototag_invalid_bytes_fail_closed() {
];
for tag in invalid_tags {
assert_eq!(ProtoTag::from_bytes(tag), None, "Invalid ProtoTag bytes {:?} must fail closed", tag);
assert_eq!(
ProtoTag::from_bytes(tag),
None,
"Invalid ProtoTag bytes {:?} must fail closed",
tag
);
}
}
@@ -603,7 +672,10 @@ fn auth_probe_eviction_hash_collision_stress() {
auth_probe_record_failure_with_state(state, ip, now);
}
assert!(state.len() <= AUTH_PROBE_TRACK_MAX_ENTRIES, "Eviction logic must successfully bound the map size under heavy insertion stress");
assert!(
state.len() <= AUTH_PROBE_TRACK_MAX_ENTRIES,
"Eviction logic must successfully bound the map size under heavy insertion stress"
);
}
#[test]
@@ -88,6 +88,9 @@ fn light_fuzz_offset_always_stays_inside_state_len() {
let now = base + Duration::from_nanos(seed & 0x0fff);
let start = auth_probe_scan_start_offset(ip, now, state_len, scan_limit);
assert!(start < state_len, "scan offset must stay inside state length");
assert!(
start < state_len,
"scan offset must stay inside state length"
);
}
}
}
@@ -96,4 +96,4 @@ fn light_fuzz_scan_offset_budget_never_exceeds_effective_window() {
"scan offset must stay inside state length"
);
}
}
}
@@ -113,4 +113,4 @@ fn light_fuzz_scan_offset_stays_within_window_for_randomized_inputs() {
"scan offset must always remain inside state length"
);
}
}
}
+98 -26
View File
@@ -1,8 +1,8 @@
use super::*;
use crate::crypto::{sha256, sha256_hmac, AesCtr};
use crate::crypto::{AesCtr, sha256, sha256_hmac};
use crate::protocol::constants::{ProtoTag, RESERVED_NONCE_BEGINNINGS, RESERVED_NONCE_FIRST_BYTES};
use rand::{Rng, SeedableRng};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use std::collections::HashSet;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
use std::sync::Arc;
@@ -223,7 +223,10 @@ fn auth_probe_backoff_extreme_fail_streak_clamps_safely() {
assert_eq!(updated.fail_streak, u32::MAX);
let expected_blocked_until = now + Duration::from_millis(AUTH_PROBE_BACKOFF_MAX_MS);
assert_eq!(updated.blocked_until, expected_blocked_until, "Extreme fail streak must clamp cleanly to AUTH_PROBE_BACKOFF_MAX_MS");
assert_eq!(
updated.blocked_until, expected_blocked_until,
"Extreme fail streak must clamp cleanly to AUTH_PROBE_BACKOFF_MAX_MS"
);
}
#[test]
@@ -250,12 +253,19 @@ fn generate_tg_nonce_cryptographic_uniqueness_and_entropy() {
total_set_bits += byte.count_ones() as usize;
}
assert!(nonces.insert(nonce), "generate_tg_nonce emitted a duplicate nonce! RNG is stuck.");
assert!(
nonces.insert(nonce),
"generate_tg_nonce emitted a duplicate nonce! RNG is stuck."
);
}
let total_bits = iterations * HANDSHAKE_LEN * 8;
let ratio = (total_set_bits as f64) / (total_bits as f64);
assert!(ratio > 0.48 && ratio < 0.52, "Nonce entropy is degraded. Set bit ratio: {}", ratio);
assert!(
ratio > 0.48 && ratio < 0.52,
"Nonce entropy is degraded. Set bit ratio: {}",
ratio
);
}
#[tokio::test]
@@ -267,10 +277,19 @@ async fn mtproto_multi_user_decryption_isolation() {
config.general.modes.secure = true;
config.access.ignore_time_skew = true;
config.access.users.insert("user_a".to_string(), "11111111111111111111111111111111".to_string());
config.access.users.insert("user_b".to_string(), "22222222222222222222222222222222".to_string());
config.access.users.insert(
"user_a".to_string(),
"11111111111111111111111111111111".to_string(),
);
config.access.users.insert(
"user_b".to_string(),
"22222222222222222222222222222222".to_string(),
);
let good_secret_hex = "33333333333333333333333333333333";
config.access.users.insert("user_c".to_string(), good_secret_hex.to_string());
config
.access
.users
.insert("user_c".to_string(), good_secret_hex.to_string());
let replay_checker = ReplayChecker::new(128, Duration::from_secs(60));
let peer: SocketAddr = "192.0.2.104:12345".parse().unwrap();
@@ -291,9 +310,14 @@ async fn mtproto_multi_user_decryption_isolation() {
match res {
HandshakeResult::Success((_, _, success)) => {
assert_eq!(success.user, "user_c", "Decryption attempts on previous users must not corrupt the handshake buffer for the valid user");
assert_eq!(
success.user, "user_c",
"Decryption attempts on previous users must not corrupt the handshake buffer for the valid user"
);
}
_ => panic!("Multi-user MTProto handshake failed. Decryption buffer might be mutating in place."),
_ => panic!(
"Multi-user MTProto handshake failed. Decryption buffer might be mutating in place."
),
}
}
@@ -325,7 +349,9 @@ async fn invalid_secret_warning_lock_contention_and_bound() {
}
let warned = INVALID_SECRET_WARNED.get().unwrap();
let guard = warned.lock().unwrap_or_else(|poisoned| poisoned.into_inner());
let guard = warned
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
assert_eq!(
guard.len(),
@@ -342,7 +368,11 @@ async fn mtproto_strict_concurrent_replay_race_condition() {
let secret_hex = "4A4A4A4A4A4A4A4A4A4A4A4A4A4A4A4A";
let config = Arc::new(test_config_with_secret_hex(secret_hex));
let replay_checker = Arc::new(ReplayChecker::new(4096, Duration::from_secs(60)));
let valid_handshake = Arc::new(make_valid_mtproto_handshake(secret_hex, ProtoTag::Secure, 1));
let valid_handshake = Arc::new(make_valid_mtproto_handshake(
secret_hex,
ProtoTag::Secure,
1,
));
let tasks = 100;
let barrier = Arc::new(Barrier::new(tasks));
@@ -355,7 +385,10 @@ async fn mtproto_strict_concurrent_replay_race_condition() {
let hs = valid_handshake.clone();
handles.push(tokio::spawn(async move {
let peer = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(10, 0, 0, (i % 250) as u8)), 10000 + i as u16);
let peer = SocketAddr::new(
IpAddr::V4(Ipv4Addr::new(10, 0, 0, (i % 250) as u8)),
10000 + i as u16,
);
b.wait().await;
handle_mtproto_handshake(
&hs,
@@ -382,8 +415,15 @@ async fn mtproto_strict_concurrent_replay_race_condition() {
}
}
assert_eq!(successes, 1, "Replay cache race condition allowed multiple identical MTProto handshakes to succeed");
assert_eq!(failures, tasks - 1, "Replay cache failed to forcefully reject concurrent duplicates");
assert_eq!(
successes, 1,
"Replay cache race condition allowed multiple identical MTProto handshakes to succeed"
);
assert_eq!(
failures,
tasks - 1,
"Replay cache failed to forcefully reject concurrent duplicates"
);
}
#[tokio::test]
@@ -398,7 +438,8 @@ async fn tls_alpn_zero_length_protocol_handled_safely() {
let rng = SecureRandom::new();
let peer: SocketAddr = "192.0.2.107:12345".parse().unwrap();
let handshake = make_valid_tls_client_hello_with_sni_and_alpn(&secret, 0, "example.com", &[b""]);
let handshake =
make_valid_tls_client_hello_with_sni_and_alpn(&secret, 0, "example.com", &[b""]);
let res = handle_tls_handshake(
&handshake,
@@ -412,7 +453,10 @@ async fn tls_alpn_zero_length_protocol_handled_safely() {
)
.await;
assert!(matches!(res, HandshakeResult::BadClient { .. }), "0-length ALPN must be safely rejected without panicking");
assert!(
matches!(res, HandshakeResult::BadClient { .. }),
"0-length ALPN must be safely rejected without panicking"
);
}
#[tokio::test]
@@ -427,7 +471,8 @@ async fn tls_sni_massive_hostname_does_not_panic() {
let peer: SocketAddr = "192.0.2.108:12345".parse().unwrap();
let massive_hostname = String::from_utf8(vec![b'a'; 65000]).unwrap();
let handshake = make_valid_tls_client_hello_with_sni_and_alpn(&secret, 0, &massive_hostname, &[]);
let handshake =
make_valid_tls_client_hello_with_sni_and_alpn(&secret, 0, &massive_hostname, &[]);
let res = handle_tls_handshake(
&handshake,
@@ -441,7 +486,13 @@ async fn tls_sni_massive_hostname_does_not_panic() {
)
.await;
assert!(matches!(res, HandshakeResult::Success(_) | HandshakeResult::BadClient { .. }), "Massive SNI hostname must be processed or ignored without stack overflow or panic");
assert!(
matches!(
res,
HandshakeResult::Success(_) | HandshakeResult::BadClient { .. }
),
"Massive SNI hostname must be processed or ignored without stack overflow or panic"
);
}
#[tokio::test]
@@ -455,7 +506,8 @@ async fn tls_progressive_truncation_fuzzing_no_panics() {
let rng = SecureRandom::new();
let peer: SocketAddr = "192.0.2.109:12345".parse().unwrap();
let valid_handshake = make_valid_tls_client_hello_with_sni_and_alpn(&secret, 0, "example.com", &[b"h2"]);
let valid_handshake =
make_valid_tls_client_hello_with_sni_and_alpn(&secret, 0, "example.com", &[b"h2"]);
let full_len = valid_handshake.len();
// Truncated corpus only: full_len is a valid baseline and should not be
@@ -473,7 +525,11 @@ async fn tls_progressive_truncation_fuzzing_no_panics() {
None,
)
.await;
assert!(matches!(res, HandshakeResult::BadClient { .. }), "Truncated TLS handshake at len {} must fail safely without panicking", i);
assert!(
matches!(res, HandshakeResult::BadClient { .. }),
"Truncated TLS handshake at len {} must fail safely without panicking",
i
);
}
}
@@ -504,7 +560,10 @@ async fn mtproto_pure_entropy_fuzzing_no_panics() {
)
.await;
assert!(matches!(res, HandshakeResult::BadClient { .. }), "Pure entropy MTProto payload must fail closed and never panic");
assert!(
matches!(res, HandshakeResult::BadClient { .. }),
"Pure entropy MTProto payload must fail closed and never panic"
);
}
}
@@ -517,10 +576,16 @@ fn decode_user_secret_odd_length_hex_rejection() {
let mut config = ProxyConfig::default();
config.access.users.clear();
config.access.users.insert("odd_user".to_string(), "1234567890123456789012345678901".to_string());
config.access.users.insert(
"odd_user".to_string(),
"1234567890123456789012345678901".to_string(),
);
let decoded = decode_user_secrets(&config, None);
assert!(decoded.is_empty(), "Odd-length hex string must be gracefully rejected by hex::decode without unwrapping");
assert!(
decoded.is_empty(),
"Odd-length hex string must be gracefully rejected by hex::decode without unwrapping"
);
}
#[test]
@@ -552,7 +617,10 @@ fn saturation_grace_pre_existing_high_fail_streak_immediate_throttle() {
}
let is_throttled = auth_probe_should_apply_preauth_throttle(peer_ip, now);
assert!(is_throttled, "A peer with a pre-existing high fail streak must be immediately throttled when saturation begins, receiving no unearned grace period");
assert!(
is_throttled,
"A peer with a pre-existing high fail streak must be immediately throttled when saturation begins, receiving no unearned grace period"
);
}
#[test]
@@ -586,7 +654,11 @@ fn mtproto_classic_tags_rejected_when_only_secure_mode_enabled() {
config.general.modes.tls = false;
assert!(!mode_enabled_for_proto(&config, ProtoTag::Abridged, false));
assert!(!mode_enabled_for_proto(&config, ProtoTag::Intermediate, false));
assert!(!mode_enabled_for_proto(
&config,
ProtoTag::Intermediate,
false
));
}
#[test]
@@ -1,5 +1,5 @@
use super::*;
use crate::crypto::{sha256, sha256_hmac, AesCtr, SecureRandom};
use crate::crypto::{AesCtr, SecureRandom, sha256, sha256_hmac};
use crate::protocol::constants::{ProtoTag, TLS_RECORD_HANDSHAKE, TLS_VERSION};
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::sync::Arc;
@@ -80,8 +80,7 @@ fn make_valid_tls_client_hello_with_alpn(
digest[28 + i] ^= ts[i];
}
record[tls::TLS_DIGEST_POS..tls::TLS_DIGEST_POS + tls::TLS_DIGEST_LEN]
.copy_from_slice(&digest);
record[tls::TLS_DIGEST_POS..tls::TLS_DIGEST_POS + tls::TLS_DIGEST_LEN].copy_from_slice(&digest);
record
}
@@ -331,7 +330,11 @@ async fn saturation_grace_exhaustion_under_concurrency_keeps_peer_throttled() {
let final_state = state.get(&peer_ip).expect("state must exist");
assert!(
final_state.fail_streak >= AUTH_PROBE_BACKOFF_START_FAILS + AUTH_PROBE_SATURATION_GRACE_FAILS
final_state.fail_streak
>= AUTH_PROBE_BACKOFF_START_FAILS + AUTH_PROBE_SATURATION_GRACE_FAILS
);
assert!(auth_probe_should_apply_preauth_throttle(peer_ip, Instant::now()));
assert!(auth_probe_should_apply_preauth_throttle(
peer_ip,
Instant::now()
));
}
+109
View File
@@ -956,6 +956,89 @@ async fn stress_tls_sni_preferred_user_hint_scales_to_large_user_set() {
}
}
#[tokio::test]
async fn tls_unknown_sni_drop_policy_returns_hard_error() {
let secret = [0x48u8; 16];
let mut config = test_config_with_secret_hex("48484848484848484848484848484848");
config.censorship.unknown_sni_action = UnknownSniAction::Drop;
let replay_checker = ReplayChecker::new(128, Duration::from_secs(60));
let rng = SecureRandom::new();
let peer: SocketAddr = "198.51.100.190:44326".parse().unwrap();
let handshake =
make_valid_tls_client_hello_with_sni_and_alpn(&secret, 0, "unknown.example", &[b"h2"]);
let result = handle_tls_handshake(
&handshake,
tokio::io::empty(),
tokio::io::sink(),
peer,
&config,
&replay_checker,
&rng,
None,
)
.await;
assert!(matches!(
result,
HandshakeResult::Error(ProxyError::UnknownTlsSni)
));
}
#[tokio::test]
async fn tls_unknown_sni_mask_policy_falls_back_to_bad_client() {
let secret = [0x49u8; 16];
let mut config = test_config_with_secret_hex("49494949494949494949494949494949");
config.censorship.unknown_sni_action = UnknownSniAction::Mask;
let replay_checker = ReplayChecker::new(128, Duration::from_secs(60));
let rng = SecureRandom::new();
let peer: SocketAddr = "198.51.100.191:44326".parse().unwrap();
let handshake =
make_valid_tls_client_hello_with_sni_and_alpn(&secret, 0, "unknown.example", &[b"h2"]);
let result = handle_tls_handshake(
&handshake,
tokio::io::empty(),
tokio::io::sink(),
peer,
&config,
&replay_checker,
&rng,
None,
)
.await;
assert!(matches!(result, HandshakeResult::BadClient { .. }));
}
#[tokio::test]
async fn tls_missing_sni_keeps_legacy_auth_path() {
let secret = [0x4Au8; 16];
let mut config = test_config_with_secret_hex("4a4a4a4a4a4a4a4a4a4a4a4a4a4a4a4a");
config.censorship.unknown_sni_action = UnknownSniAction::Drop;
let replay_checker = ReplayChecker::new(128, Duration::from_secs(60));
let rng = SecureRandom::new();
let peer: SocketAddr = "198.51.100.192:44326".parse().unwrap();
let handshake = make_valid_tls_handshake(&secret, 0);
let result = handle_tls_handshake(
&handshake,
tokio::io::empty(),
tokio::io::sink(),
peer,
&config,
&replay_checker,
&rng,
None,
)
.await;
assert!(matches!(result, HandshakeResult::Success(_)));
}
#[tokio::test]
async fn alpn_enforce_rejects_unsupported_client_alpn() {
let secret = [0x33u8; 16];
@@ -1560,6 +1643,32 @@ fn auth_probe_capacity_fresh_full_map_still_tracks_newcomer_with_bounded_evictio
);
}
#[test]
fn unknown_sni_warn_cooldown_first_event_is_warn_and_repeated_events_are_info_until_window_expires()
{
let _guard = unknown_sni_warn_test_lock()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
clear_unknown_sni_warn_state_for_testing();
let now = Instant::now();
assert!(
should_emit_unknown_sni_warn_for_testing(now),
"first unknown SNI event must be eligible for WARN emission"
);
assert!(
!should_emit_unknown_sni_warn_for_testing(now + Duration::from_secs(1)),
"events inside cooldown window must be demoted from WARN to INFO"
);
assert!(
should_emit_unknown_sni_warn_for_testing(
now + Duration::from_secs(UNKNOWN_SNI_WARN_COOLDOWN_SECS)
),
"once cooldown expires, next unknown SNI event must be WARN-eligible again"
);
}
#[test]
fn stress_auth_probe_full_map_churn_keeps_bound_and_tracks_newcomers() {
let _guard = auth_probe_test_lock()
@@ -1,5 +1,5 @@
use super::*;
use crate::crypto::{sha256, sha256_hmac, AesCtr, SecureRandom};
use crate::crypto::{AesCtr, SecureRandom, sha256, sha256_hmac};
use crate::protocol::constants::{ProtoTag, TLS_RECORD_HANDSHAKE, TLS_VERSION};
use std::net::SocketAddr;
use std::time::{Duration, Instant};
@@ -169,10 +169,10 @@ async fn mtproto_user_scan_timing_manual_benchmark() {
);
}
config.access.users.insert(
preferred_user.to_string(),
target_secret_hex.to_string(),
);
config
.access
.users
.insert(preferred_user.to_string(), target_secret_hex.to_string());
let replay_checker_preferred = ReplayChecker::new(65_536, Duration::from_secs(60));
let replay_checker_full_scan = ReplayChecker::new(65_536, Duration::from_secs(60));
@@ -544,7 +544,6 @@ async fn timing_classifier_light_fuzz_pairwise_bucketed_accuracy_stays_bounded_u
if hardened_acc + 0.05 <= baseline_acc {
meaningful_improvement_seen = true;
}
}
assert!(
@@ -78,7 +78,11 @@ fn timing_normalization_zero_floor_safety_net_defaults_to_mask_timeout() {
config.censorship.mask_timing_normalization_ceiling_ms = 0;
let budget = mask_outcome_target_budget(&config);
assert_eq!(budget, MASK_TIMEOUT);
assert_eq!(
budget,
Duration::from_millis(0),
"zero floor/ceiling must produce zero extra normalization budget"
);
}
#[tokio::test]
@@ -85,7 +85,10 @@ async fn aggressive_mode_shapes_backend_silent_non_eof_path() {
let legacy = capture_forwarded_len_with_mode(body_sent, false, false, false, 0).await;
let aggressive = capture_forwarded_len_with_mode(body_sent, false, true, false, 0).await;
assert!(legacy < floor, "legacy mode should keep timeout path unshaped");
assert!(
legacy < floor,
"legacy mode should keep timeout path unshaped"
);
assert!(
aggressive >= floor,
"aggressive mode must shape backend-silent non-EOF paths (aggressive={aggressive}, floor={floor})"
@@ -52,7 +52,10 @@ async fn run_connect_failure_case(
.await
.unwrap()
.unwrap();
assert_eq!(n, 0, "connect-failure path must close client-visible writer");
assert_eq!(
n, 0,
"connect-failure path must close client-visible writer"
);
started.elapsed()
}
@@ -67,13 +70,9 @@ async fn connect_failure_refusal_close_behavior_matrix() {
let peer: SocketAddr = format!("203.0.113.210:{}", 54100 + idx as u16)
.parse()
.unwrap();
let elapsed = run_connect_failure_case(
"127.0.0.1",
unused_port,
timing_normalization_enabled,
peer,
)
.await;
let elapsed =
run_connect_failure_case("127.0.0.1", unused_port, timing_normalization_enabled, peer)
.await;
if timing_normalization_enabled {
assert!(
@@ -79,7 +79,10 @@ async fn io_error_terminates_cleanly() {
}
}
tokio::time::timeout(MASK_RELAY_TIMEOUT, consume_client_data(ErrReader, usize::MAX))
.await
.expect("consume_client_data did not return on I/O error");
tokio::time::timeout(
MASK_RELAY_TIMEOUT,
consume_client_data(ErrReader, usize::MAX),
)
.await
.expect("consume_client_data did not return on I/O error");
}
@@ -32,8 +32,16 @@ async fn run_self_target_refusal(
let (mut client, server) = duplex(1024);
let started = Instant::now();
let task = tokio::spawn(async move {
handle_bad_client(server, tokio::io::sink(), initial, peer, local_addr, &config, &beobachten)
.await;
handle_bad_client(
server,
tokio::io::sink(),
initial,
peer,
local_addr,
&config,
&beobachten,
)
.await;
});
client
@@ -214,4 +222,4 @@ async fn stress_high_fanout_self_target_refusal_no_deadlock_or_timeout() {
})
.await
.expect("high-fanout refusal workload must complete without deadlock");
}
}
@@ -2,7 +2,13 @@ use super::*;
#[test]
fn exact_four_byte_http_tokens_are_classified() {
for token in [b"GET ".as_ref(), b"POST".as_ref(), b"HEAD".as_ref(), b"PUT ".as_ref(), b"PRI ".as_ref()] {
for token in [
b"GET ".as_ref(),
b"POST".as_ref(),
b"HEAD".as_ref(),
b"PUT ".as_ref(),
b"PRI ".as_ref(),
] {
assert!(
is_http_probe(token),
"exact 4-byte token must be classified as HTTP probe: {:?}",
@@ -76,4 +82,4 @@ fn light_fuzz_four_byte_ascii_noise_not_misclassified() {
token
);
}
}
}
@@ -38,4 +38,4 @@ async fn adversarial_parallel_cold_miss_performs_single_interface_refresh() {
1,
"parallel cold misses must coalesce into a single interface enumeration"
);
}
}
@@ -37,7 +37,10 @@ async fn tdd_non_local_port_short_circuit_does_not_enumerate_interfaces() {
let local_addr: SocketAddr = "0.0.0.0:443".parse().expect("valid local addr");
let is_local = is_mask_target_local_listener_async("127.0.0.1", 8443, local_addr, None).await;
assert!(!is_local, "different port must not be treated as local listener");
assert!(
!is_local,
"different port must not be treated as local listener"
);
assert_eq!(
local_interface_enumerations_for_tests(),
0,
@@ -63,17 +63,11 @@ impl AsyncWrite for CountingWriter {
Poll::Ready(Ok(buf.len()))
}
fn poll_flush(
self: Pin<&mut Self>,
_cx: &mut Context<'_>,
) -> Poll<std::io::Result<()>> {
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
Poll::Ready(Ok(()))
}
fn poll_shutdown(
self: Pin<&mut Self>,
_cx: &mut Context<'_>,
) -> Poll<std::io::Result<()>> {
fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
Poll::Ready(Ok(()))
}
}
@@ -1,6 +1,6 @@
use super::*;
use std::net::TcpListener as StdTcpListener;
use std::net::SocketAddr;
use std::net::TcpListener as StdTcpListener;
use tokio::io::{AsyncReadExt, AsyncWriteExt, duplex};
use tokio::net::TcpListener;
use tokio::time::{Duration, Instant, timeout};
@@ -15,74 +15,38 @@ fn closed_local_port() -> u16 {
#[tokio::test]
async fn self_target_detection_matches_literal_ipv4_listener() {
let local: SocketAddr = "198.51.100.40:443".parse().unwrap();
assert!(is_mask_target_local_listener_async(
"198.51.100.40",
443,
local,
None,
)
.await);
assert!(is_mask_target_local_listener_async("198.51.100.40", 443, local, None,).await);
}
#[tokio::test]
async fn self_target_detection_matches_bracketed_ipv6_listener() {
let local: SocketAddr = "[2001:db8::44]:8443".parse().unwrap();
assert!(is_mask_target_local_listener_async(
"[2001:db8::44]",
8443,
local,
None,
)
.await);
assert!(is_mask_target_local_listener_async("[2001:db8::44]", 8443, local, None,).await);
}
#[tokio::test]
async fn self_target_detection_keeps_same_ip_different_port_forwardable() {
let local: SocketAddr = "203.0.113.44:443".parse().unwrap();
assert!(!is_mask_target_local_listener_async(
"203.0.113.44",
8443,
local,
None,
)
.await);
assert!(!is_mask_target_local_listener_async("203.0.113.44", 8443, local, None,).await);
}
#[tokio::test]
async fn self_target_detection_normalizes_ipv4_mapped_ipv6_literal() {
let local: SocketAddr = "127.0.0.1:443".parse().unwrap();
assert!(is_mask_target_local_listener_async(
"::ffff:127.0.0.1",
443,
local,
None,
)
.await);
assert!(is_mask_target_local_listener_async("::ffff:127.0.0.1", 443, local, None,).await);
}
#[tokio::test]
async fn self_target_detection_unspecified_bind_blocks_loopback_target() {
let local: SocketAddr = "0.0.0.0:443".parse().unwrap();
assert!(is_mask_target_local_listener_async(
"127.0.0.1",
443,
local,
None,
)
.await);
assert!(is_mask_target_local_listener_async("127.0.0.1", 443, local, None,).await);
}
#[tokio::test]
async fn self_target_detection_unspecified_bind_keeps_remote_target_forwardable() {
let local: SocketAddr = "0.0.0.0:443".parse().unwrap();
let remote: SocketAddr = "198.51.100.44:443".parse().unwrap();
assert!(!is_mask_target_local_listener_async(
"mask.example",
443,
local,
Some(remote),
)
.await);
assert!(!is_mask_target_local_listener_async("mask.example", 443, local, Some(remote),).await);
}
#[tokio::test]
@@ -306,7 +270,10 @@ async fn offline_mask_target_refusal_respects_timing_normalization_budget() {
});
client.shutdown().await.unwrap();
timeout(Duration::from_secs(2), task).await.unwrap().unwrap();
timeout(Duration::from_secs(2), task)
.await
.unwrap()
.unwrap();
let elapsed = started.elapsed();
assert!(
@@ -350,7 +317,10 @@ async fn offline_mask_target_refusal_with_idle_client_is_bounded_by_consume_time
.await
.expect("connection should still be open before consume timeout expires");
timeout(Duration::from_secs(2), task).await.unwrap().unwrap();
timeout(Duration::from_secs(2), task)
.await
.unwrap()
.unwrap();
let elapsed = started.elapsed();
assert!(
@@ -40,7 +40,10 @@ async fn adversarial_delayed_interface_lookup_does_not_consume_outcome_floor_bud
tokio::time::sleep(Duration::from_millis(80)).await;
drop(held_refresh_guard);
client.shutdown().await.expect("client shutdown must succeed");
client
.shutdown()
.await
.expect("client shutdown must succeed");
timeout(Duration::from_secs(2), task)
.await
@@ -52,4 +55,4 @@ async fn adversarial_delayed_interface_lookup_does_not_consume_outcome_floor_bud
elapsed >= Duration::from_millis(180) && elapsed < Duration::from_millis(350),
"timing normalization floor must start after pre-outcome self-target checks"
);
}
}
@@ -0,0 +1,189 @@
use super::*;
use crate::crypto::AesCtr;
use bytes::Bytes;
use std::io;
use std::pin::Pin;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::task::{Context, Poll};
use tokio::io::AsyncWrite;
struct CountedWriter {
write_calls: Arc<AtomicUsize>,
fail_writes: bool,
}
impl CountedWriter {
fn new(write_calls: Arc<AtomicUsize>, fail_writes: bool) -> Self {
Self {
write_calls,
fail_writes,
}
}
}
impl AsyncWrite for CountedWriter {
fn poll_write(
self: Pin<&mut Self>,
_cx: &mut Context<'_>,
buf: &[u8],
) -> Poll<io::Result<usize>> {
let this = self.get_mut();
this.write_calls.fetch_add(1, Ordering::Relaxed);
if this.fail_writes {
Poll::Ready(Err(io::Error::new(
io::ErrorKind::BrokenPipe,
"forced write failure",
)))
} else {
Poll::Ready(Ok(buf.len()))
}
}
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
Poll::Ready(Ok(()))
}
fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
Poll::Ready(Ok(()))
}
}
fn make_crypto_writer(inner: CountedWriter) -> CryptoWriter<CountedWriter> {
let key = [0u8; 32];
let iv = 0u128;
CryptoWriter::new(inner, AesCtr::new(&key, iv), 8 * 1024)
}
#[tokio::test]
async fn me_writer_write_fail_keeps_reserved_quota_and_tracks_fail_metrics() {
let stats = Stats::new();
let user = "middle-me-writer-no-rollback-user";
let user_stats = stats.get_or_create_user_stats_handle(user);
let write_calls = Arc::new(AtomicUsize::new(0));
let mut writer = make_crypto_writer(CountedWriter::new(write_calls.clone(), true));
let mut frame_buf = Vec::new();
let bytes_me2c = AtomicU64::new(0);
let payload = Bytes::from_static(&[0x11, 0x22, 0x33, 0x44, 0x55]);
let result = process_me_writer_response(
MeResponse::Data {
flags: 0,
data: payload.clone(),
},
&mut writer,
ProtoTag::Intermediate,
&SecureRandom::new(),
&mut frame_buf,
&stats,
user,
Some(user_stats.as_ref()),
Some(64),
0,
&bytes_me2c,
11,
true,
false,
)
.await;
assert!(
matches!(result, Err(ProxyError::Io(_))),
"write failure must propagate as I/O error"
);
assert!(
write_calls.load(Ordering::Relaxed) > 0,
"writer must be attempted after successful quota reservation"
);
assert_eq!(
stats.get_user_quota_used(user),
payload.len() as u64,
"reserved quota must not roll back on write failure"
);
assert_eq!(
stats.get_quota_write_fail_bytes_total(),
payload.len() as u64,
"write-fail byte metric must include failed payload size"
);
assert_eq!(
stats.get_quota_write_fail_events_total(),
1,
"write-fail events metric must increment once"
);
assert_eq!(
stats.get_user_total_octets(user),
0,
"telemetry octets_to should not advance when write fails"
);
assert_eq!(
bytes_me2c.load(Ordering::Relaxed),
0,
"ME->C committed byte counter must not advance on write failure"
);
}
#[tokio::test]
async fn me_writer_pre_write_quota_reject_happens_before_writer_poll() {
let stats = Stats::new();
let user = "middle-me-writer-precheck-user";
let limit = 8u64;
let user_stats = stats.get_or_create_user_stats_handle(user);
stats.quota_charge_post_write(user_stats.as_ref(), limit);
let write_calls = Arc::new(AtomicUsize::new(0));
let mut writer = make_crypto_writer(CountedWriter::new(write_calls.clone(), false));
let mut frame_buf = Vec::new();
let bytes_me2c = AtomicU64::new(0);
let result = process_me_writer_response(
MeResponse::Data {
flags: 0,
data: Bytes::from_static(&[0xAA, 0xBB, 0xCC]),
},
&mut writer,
ProtoTag::Intermediate,
&SecureRandom::new(),
&mut frame_buf,
&stats,
user,
Some(user_stats.as_ref()),
Some(limit),
0,
&bytes_me2c,
12,
true,
false,
)
.await;
assert!(
matches!(result, Err(ProxyError::DataQuotaExceeded { .. })),
"pre-write quota rejection must return typed quota error"
);
assert_eq!(
write_calls.load(Ordering::Relaxed),
0,
"writer must not be polled when pre-write quota reservation fails"
);
assert_eq!(
stats.get_me_d2c_quota_reject_pre_write_total(),
1,
"pre-write quota reject metric must increment"
);
assert_eq!(
stats.get_user_quota_used(user),
limit,
"failed pre-write reservation must keep previous quota usage unchanged"
);
assert_eq!(
stats.get_quota_write_fail_bytes_total(),
0,
"write-fail bytes metric must stay unchanged on pre-write reject"
);
assert_eq!(
stats.get_quota_write_fail_events_total(),
0,
"write-fail events metric must stay unchanged on pre-write reject"
);
assert_eq!(bytes_me2c.load(Ordering::Relaxed), 0);
}
@@ -2,8 +2,8 @@ use super::*;
use crate::crypto::AesCtr;
use crate::stats::Stats;
use crate::stream::{BufferPool, CryptoReader};
use std::sync::atomic::AtomicU64;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use tokio::io::AsyncWriteExt;
use tokio::io::duplex;
use tokio::time::{Duration as TokioDuration, Instant as TokioInstant, timeout};
@@ -29,7 +29,10 @@ fn blackhat_registry_poison_recovers_with_fail_closed_reset_and_pressure_account
let before = relay_pressure_event_seq();
note_relay_pressure_event();
let after = relay_pressure_event_seq();
assert!(after > before, "pressure accounting must still advance after poison");
assert!(
after > before,
"pressure accounting must still advance after poison"
);
clear_relay_idle_pressure_state_for_testing();
}
@@ -126,6 +126,7 @@ async fn c2me_channel_full_path_yields_then_sends() {
payload: make_pooled_payload(&[0xBB, 0xCC]),
flags: 2,
},
None,
)
.await
});
@@ -217,7 +217,9 @@ async fn adversarial_lockstep_alternating_attack_under_jitter_closes() {
}
}
writer_task.await.expect("writer jitter task must not panic");
writer_task
.await
.expect("writer jitter task must not panic");
assert!(closed, "alternating attack must close before EOF");
});
}
@@ -247,7 +249,10 @@ async fn integration_mixed_population_attackers_close_benign_survive() {
plaintext.push(0x01);
plaintext.extend_from_slice(&[n, n, n, n]);
}
writer.write_all(&encrypt_for_reader(&plaintext)).await.unwrap();
writer
.write_all(&encrypt_for_reader(&plaintext))
.await
.unwrap();
drop(writer);
let mut closed = false;
@@ -279,7 +284,10 @@ async fn integration_mixed_population_attackers_close_benign_survive() {
}
plaintext.push(0x01);
plaintext.extend_from_slice(&payload);
writer.write_all(&encrypt_for_reader(&plaintext)).await.unwrap();
writer
.write_all(&encrypt_for_reader(&plaintext))
.await
.unwrap();
let got = read_once(
&mut crypto_reader,
@@ -329,7 +337,10 @@ async fn light_fuzz_parallel_patterns_no_hang_or_panic() {
}
}
writer.write_all(&encrypt_for_reader(&plaintext)).await.unwrap();
writer
.write_all(&encrypt_for_reader(&plaintext))
.await
.unwrap();
drop(writer);
for _ in 0..320 {
@@ -51,7 +51,9 @@ fn make_enabled_idle_policy() -> RelayClientIdlePolicy {
fn append_tiny_frame(plaintext: &mut Vec<u8>, proto: ProtoTag) {
match proto {
ProtoTag::Abridged => plaintext.push(0x00),
ProtoTag::Intermediate | ProtoTag::Secure => plaintext.extend_from_slice(&0u32.to_le_bytes()),
ProtoTag::Intermediate | ProtoTag::Secure => {
plaintext.extend_from_slice(&0u32.to_le_bytes())
}
}
}
@@ -206,7 +208,11 @@ async fn intermediate_chunked_alternating_attack_closes_before_eof() {
let mut plaintext = Vec::with_capacity(8 * 200);
for n in 0..180u8 {
append_tiny_frame(&mut plaintext, ProtoTag::Intermediate);
append_real_frame(&mut plaintext, ProtoTag::Intermediate, [n, n ^ 1, n ^ 2, n ^ 3]);
append_real_frame(
&mut plaintext,
ProtoTag::Intermediate,
[n, n ^ 1, n ^ 2, n ^ 3],
);
}
let encrypted = encrypt_for_reader(&plaintext);
@@ -240,7 +246,9 @@ async fn intermediate_chunked_alternating_attack_closes_before_eof() {
}
}
writer_task.await.expect("intermediate writer task must not panic");
writer_task
.await
.expect("intermediate writer task must not panic");
assert!(closed, "intermediate alternating attack must fail closed");
}
@@ -290,7 +298,9 @@ async fn secure_chunked_alternating_attack_closes_before_eof() {
}
}
writer_task.await.expect("secure writer task must not panic");
writer_task
.await
.expect("secure writer task must not panic");
assert!(closed, "secure alternating attack must fail closed");
}
@@ -2,8 +2,8 @@ use super::*;
use crate::crypto::AesCtr;
use crate::stats::Stats;
use crate::stream::{BufferPool, CryptoReader};
use std::sync::atomic::AtomicU64;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use std::time::Instant;
use tokio::io::{AsyncRead, AsyncWriteExt, duplex};
@@ -156,7 +156,10 @@ fn alternating_one_to_one_closes_with_bounded_real_frame_count() {
}
let (closed_at, _, reals) = simulate_tiny_debt_pattern(&pattern, pattern.len());
assert!(closed_at.is_some());
assert!(reals <= 80, "expected bounded real frames before close, got {reals}");
assert!(
reals <= 80,
"expected bounded real frames before close, got {reals}"
);
}
#[test]
@@ -183,7 +186,10 @@ fn alternating_one_to_seven_eventually_closes() {
}
}
let (closed_at, _, _) = simulate_tiny_debt_pattern(&pattern, pattern.len());
assert!(closed_at.is_some(), "1:7 tiny-to-real must eventually close");
assert!(
closed_at.is_some(),
"1:7 tiny-to-real must eventually close"
);
}
#[test]
@@ -2,10 +2,10 @@ use super::*;
use crate::crypto::AesCtr;
use crate::stats::Stats;
use crate::stream::{BufferPool, CryptoReader};
use std::sync::atomic::AtomicU64;
use std::sync::Arc;
use tokio::io::{AsyncRead, AsyncWriteExt, duplex};
use std::sync::atomic::AtomicU64;
use std::time::Instant;
use tokio::io::{AsyncRead, AsyncWriteExt, duplex};
fn make_crypto_reader<T>(reader: T) -> CryptoReader<T>
where
+22 -5
View File
@@ -78,7 +78,8 @@ async fn relay_hol_blocking_prevention_regression() {
async fn relay_quota_mid_session_cutoff() {
let stats = Arc::new(Stats::new());
let user = "quota-mid-user";
let quota = 5000;
let quota = 5000u64;
let c2s_buf_size = 1024usize;
let (client_peer, relay_client) = duplex(8192);
let (relay_server, server_peer) = duplex(8192);
@@ -93,7 +94,7 @@ async fn relay_quota_mid_session_cutoff() {
client_writer,
server_reader,
server_writer,
1024,
c2s_buf_size,
1024,
user,
Arc::clone(&stats),
@@ -120,9 +121,25 @@ async fn relay_quota_mid_session_cutoff() {
other => panic!("Expected DataQuotaExceeded error, got: {:?}", other),
}
let mut small_buf = [0u8; 1];
let n = sp_reader.read(&mut small_buf).await.unwrap();
assert_eq!(n, 0, "Server must see EOF after quota reached");
let mut overshoot_bytes = 0usize;
let mut buf = [0u8; 256];
loop {
match timeout(Duration::from_millis(20), sp_reader.read(&mut buf)).await {
Ok(Ok(0)) => break,
Ok(Ok(n)) => overshoot_bytes = overshoot_bytes.saturating_add(n),
Ok(Err(e)) => panic!("server read must not fail after relay cutoff: {e}"),
Err(_) => break,
}
}
assert!(
overshoot_bytes <= c2s_buf_size,
"post-write cutoff may leak at most one C->S chunk after boundary, got {overshoot_bytes}"
);
assert!(
stats.get_user_quota_used(user) <= quota.saturating_add(c2s_buf_size as u64),
"accounted quota must remain bounded by one in-flight chunk overshoot"
);
}
#[tokio::test]
@@ -0,0 +1,243 @@
use super::*;
use std::collections::VecDeque;
use std::io;
use std::pin::Pin;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::task::{Context, Poll};
use tokio::io::{AsyncWrite, AsyncWriteExt};
use tokio::time::Instant;
struct ScriptedWriter {
scripted_writes: Arc<Mutex<VecDeque<usize>>>,
write_calls: Arc<AtomicUsize>,
}
impl ScriptedWriter {
fn new(script: &[usize], write_calls: Arc<AtomicUsize>) -> Self {
Self {
scripted_writes: Arc::new(Mutex::new(script.iter().copied().collect())),
write_calls,
}
}
}
impl AsyncWrite for ScriptedWriter {
fn poll_write(
self: Pin<&mut Self>,
_cx: &mut Context<'_>,
buf: &[u8],
) -> Poll<io::Result<usize>> {
let this = self.get_mut();
this.write_calls.fetch_add(1, Ordering::Relaxed);
let planned = this
.scripted_writes
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.pop_front()
.unwrap_or(buf.len());
Poll::Ready(Ok(planned.min(buf.len())))
}
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
Poll::Ready(Ok(()))
}
fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
Poll::Ready(Ok(()))
}
}
fn make_stats_io_with_script(
user: &str,
quota_limit: u64,
precharged_quota: u64,
script: &[usize],
) -> (
StatsIo<ScriptedWriter>,
Arc<Stats>,
Arc<AtomicUsize>,
Arc<AtomicBool>,
) {
let stats = Arc::new(Stats::new());
if precharged_quota > 0 {
let user_stats = stats.get_or_create_user_stats_handle(user);
stats.quota_charge_post_write(user_stats.as_ref(), precharged_quota);
}
let write_calls = Arc::new(AtomicUsize::new(0));
let quota_exceeded = Arc::new(AtomicBool::new(false));
let io = StatsIo::new(
ScriptedWriter::new(script, write_calls.clone()),
Arc::new(SharedCounters::new()),
stats.clone(),
user.to_string(),
Some(quota_limit),
quota_exceeded.clone(),
Instant::now(),
);
(io, stats, write_calls, quota_exceeded)
}
#[tokio::test]
async fn direct_partial_write_charges_only_committed_bytes_without_double_charge() {
let user = "direct-partial-charge-user";
let (mut io, stats, write_calls, quota_exceeded) =
make_stats_io_with_script(user, 1_048_576, 0, &[8 * 1024, 8 * 1024, 48 * 1024]);
let payload = vec![0xAB; 64 * 1024];
let n1 = io
.write(&payload)
.await
.expect("first partial write must succeed");
let n2 = io
.write(&payload)
.await
.expect("second partial write must succeed");
let n3 = io.write(&payload).await.expect("tail write must succeed");
assert_eq!(n1, 8 * 1024);
assert_eq!(n2, 8 * 1024);
assert_eq!(n3, 48 * 1024);
assert_eq!(write_calls.load(Ordering::Relaxed), 3);
assert_eq!(
stats.get_user_quota_used(user),
(n1 + n2 + n3) as u64,
"quota accounting must follow committed bytes only"
);
assert_eq!(
stats.get_user_total_octets(user),
(n1 + n2 + n3) as u64,
"telemetry octets should match committed bytes on successful writes"
);
assert!(
!quota_exceeded.load(Ordering::Acquire),
"quota flag should stay false under large remaining budget"
);
}
#[tokio::test]
async fn direct_hybrid_branch_selection_matches_contract() {
let near_limit = 256 * 1024u64;
let near_remaining = 32 * 1024u64;
let (mut near_io, _stats, _calls, _flag) = make_stats_io_with_script(
"direct-near-limit-hard-check-user",
near_limit,
near_limit - near_remaining,
&[4 * 1024],
);
let near_payload = vec![0x11; 4 * 1024];
let near_written = near_io
.write(&near_payload)
.await
.expect("near-limit write must succeed");
assert_eq!(near_written, 4 * 1024);
assert_eq!(
near_io.quota_bytes_since_check, 0,
"near-limit branch must go through immediate hard check"
);
let (mut far_small_io, _stats, _calls, _flag) =
make_stats_io_with_script("direct-far-small-amortized-user", 1_048_576, 0, &[4 * 1024]);
let far_small_payload = vec![0x22; 4 * 1024];
let far_small_written = far_small_io
.write(&far_small_payload)
.await
.expect("small far-from-limit write must succeed");
assert_eq!(far_small_written, 4 * 1024);
assert_eq!(
far_small_io.quota_bytes_since_check,
4 * 1024,
"small far-from-limit write must go through amortized path"
);
let (mut far_large_io, _stats, _calls, _flag) = make_stats_io_with_script(
"direct-far-large-hard-check-user",
1_048_576,
0,
&[32 * 1024],
);
let far_large_payload = vec![0x33; 32 * 1024];
let far_large_written = far_large_io
.write(&far_large_payload)
.await
.expect("large write must succeed");
assert_eq!(far_large_written, 32 * 1024);
assert_eq!(
far_large_io.quota_bytes_since_check, 0,
"large write must force immediate hard check even far from limit"
);
}
#[tokio::test]
async fn remaining_before_zero_rejects_without_calling_inner_writer() {
let user = "direct-zero-remaining-user";
let limit = 8u64;
let (mut io, stats, write_calls, quota_exceeded) =
make_stats_io_with_script(user, limit, limit, &[1]);
let err = io
.write(&[0x44])
.await
.expect_err("write must fail when remaining quota is zero");
assert!(
is_quota_io_error(&err),
"zero-remaining gate must return typed quota I/O error"
);
assert_eq!(
write_calls.load(Ordering::Relaxed),
0,
"inner poll_write must not be called when remaining quota is zero"
);
assert!(
quota_exceeded.load(Ordering::Acquire),
"zero-remaining gate must set exceeded flag"
);
assert_eq!(stats.get_user_quota_used(user), limit);
}
#[tokio::test]
async fn exceeded_flag_blocks_following_poll_before_inner_write() {
let user = "direct-exceeded-visibility-user";
let (mut io, stats, write_calls, quota_exceeded) =
make_stats_io_with_script(user, 1, 0, &[1, 1]);
let first = io
.write(&[0x55])
.await
.expect("first byte should consume remaining quota");
assert_eq!(first, 1);
assert!(
quota_exceeded.load(Ordering::Acquire),
"hard check should store quota_exceeded after boundary hit"
);
let second = io
.write(&[0x66])
.await
.expect_err("next write must be rejected by early exceeded gate");
assert!(
is_quota_io_error(&second),
"following write must fail with typed quota error"
);
assert_eq!(
write_calls.load(Ordering::Relaxed),
1,
"second write must be cut before touching inner writer"
);
assert_eq!(stats.get_user_quota_used(user), 1);
}
#[test]
fn adaptive_interval_clamp_matches_contract() {
assert_eq!(quota_adaptive_interval_bytes(0), 4 * 1024);
assert_eq!(quota_adaptive_interval_bytes(2 * 1024), 4 * 1024);
assert_eq!(quota_adaptive_interval_bytes(32 * 1024), 16 * 1024);
assert_eq!(quota_adaptive_interval_bytes(256 * 1024), 64 * 1024);
assert!(should_immediate_quota_check(32 * 1024, 4 * 1024));
assert!(should_immediate_quota_check(1_048_576, 32 * 1024));
assert!(!should_immediate_quota_check(1_048_576, 4 * 1024));
}
@@ -29,6 +29,11 @@ async fn read_available<R: AsyncRead + Unpin>(reader: &mut R, budget: Duration)
total
}
fn preload_user_quota(stats: &Stats, user: &str, bytes: u64) {
let user_stats = stats.get_or_create_user_stats_handle(user);
stats.quota_charge_post_write(user_stats.as_ref(), bytes);
}
#[tokio::test]
async fn integration_full_duplex_exact_budget_then_hard_cutoff() {
let stats = Arc::new(Stats::new());
@@ -102,14 +107,14 @@ async fn integration_full_duplex_exact_budget_then_hard_cutoff() {
relay_result,
Err(ProxyError::DataQuotaExceeded { ref user }) if user == "quota-full-duplex-boundary-user"
));
assert!(stats.get_user_total_octets(user) <= 10);
assert!(stats.get_user_quota_used(user) <= 10);
}
#[tokio::test]
async fn negative_preloaded_quota_blocks_both_directions_immediately() {
let stats = Arc::new(Stats::new());
let user = "quota-preloaded-cutoff-user";
stats.add_user_octets_from(user, 5);
preload_user_quota(stats.as_ref(), user, 5);
let (mut client_peer, relay_client) = duplex(2048);
let (relay_server, mut server_peer) = duplex(2048);
@@ -154,7 +159,7 @@ async fn negative_preloaded_quota_blocks_both_directions_immediately() {
relay_result,
Err(ProxyError::DataQuotaExceeded { .. })
));
assert!(stats.get_user_total_octets(user) <= 5);
assert!(stats.get_user_quota_used(user) <= 5);
}
#[tokio::test]
@@ -212,7 +217,7 @@ async fn edge_quota_one_bidirectional_race_allows_at_most_one_forwarded_octet()
relay_result,
Err(ProxyError::DataQuotaExceeded { .. })
));
assert!(stats.get_user_total_octets(user) <= 1);
assert!(stats.get_user_quota_used(user) <= 1);
}
#[tokio::test]
@@ -277,7 +282,7 @@ async fn adversarial_blackhat_alternating_fragmented_jitter_never_overshoots_glo
delivered_to_server + delivered_to_client <= quota as usize,
"combined forwarded bytes must never exceed configured quota"
);
assert!(stats.get_user_total_octets(user) <= quota);
assert!(stats.get_user_quota_used(user) <= quota);
}
#[tokio::test]
@@ -356,7 +361,7 @@ async fn light_fuzz_randomized_schedule_preserves_quota_and_forwarded_byte_invar
"fuzz case {case}: forwarded bytes must not exceed quota"
);
assert!(
stats.get_user_total_octets(&user) <= quota,
stats.get_user_quota_used(&user) <= quota,
"fuzz case {case}: accounted bytes must not exceed quota"
);
}
@@ -451,7 +456,7 @@ async fn stress_multi_relay_same_user_mixed_direction_jitter_respects_global_quo
}
assert!(
stats.get_user_total_octets(user) <= quota,
stats.get_user_quota_used(user) <= quota,
"global per-user quota must hold under concurrent mixed-direction relay stress"
);
assert!(
@@ -5,10 +5,13 @@ use crate::stream::BufferPool;
use rand::rngs::StdRng;
use rand::{RngExt, SeedableRng};
use std::sync::Arc;
use tokio::io::{duplex, AsyncReadExt, AsyncWriteExt};
use tokio::io::{AsyncReadExt, AsyncWriteExt, duplex};
use tokio::time::{Duration, timeout};
async fn read_available<R: tokio::io::AsyncRead + Unpin>(reader: &mut R, budget: Duration) -> usize {
async fn read_available<R: tokio::io::AsyncRead + Unpin>(
reader: &mut R,
budget: Duration,
) -> usize {
let start = tokio::time::Instant::now();
let mut total = 0usize;
let mut buf = [0u8; 128];
@@ -29,6 +32,11 @@ async fn read_available<R: tokio::io::AsyncRead + Unpin>(reader: &mut R, budget:
total
}
fn preload_user_quota(stats: &Stats, user: &str, bytes: u64) {
let user_stats = stats.get_or_create_user_stats_handle(user);
stats.quota_charge_post_write(user_stats.as_ref(), bytes);
}
#[tokio::test]
async fn positive_quota_path_forwards_both_directions_within_limit() {
let stats = Arc::new(Stats::new());
@@ -52,25 +60,34 @@ async fn positive_quota_path_forwards_both_directions_within_limit() {
Arc::new(BufferPool::new()),
));
client_peer.write_all(&[0xAA, 0xBB, 0xCC, 0xDD]).await.unwrap();
client_peer
.write_all(&[0xAA, 0xBB, 0xCC, 0xDD])
.await
.unwrap();
server_peer.read_exact(&mut [0u8; 4]).await.unwrap();
server_peer.write_all(&[0x11, 0x22, 0x33, 0x44]).await.unwrap();
server_peer
.write_all(&[0x11, 0x22, 0x33, 0x44])
.await
.unwrap();
client_peer.read_exact(&mut [0u8; 4]).await.unwrap();
drop(client_peer);
drop(server_peer);
let relay_result = timeout(Duration::from_secs(2), relay).await.unwrap().unwrap();
let relay_result = timeout(Duration::from_secs(2), relay)
.await
.unwrap()
.unwrap();
assert!(relay_result.is_ok());
assert!(stats.get_user_total_octets(user) <= 16);
assert!(stats.get_user_quota_used(user) <= 16);
}
#[tokio::test]
async fn negative_preloaded_quota_forbids_any_forwarding() {
let stats = Arc::new(Stats::new());
let user = "quota-extended-negative-user";
stats.add_user_octets_from(user, 8);
preload_user_quota(stats.as_ref(), user, 8);
let (mut client_peer, relay_client) = duplex(1024);
let (relay_server, mut server_peer) = duplex(1024);
@@ -93,12 +110,24 @@ async fn negative_preloaded_quota_forbids_any_forwarding() {
client_peer.write_all(&[0xAA]).await.unwrap();
server_peer.write_all(&[0xBB]).await.unwrap();
assert_eq!(read_available(&mut server_peer, Duration::from_millis(120)).await, 0);
assert_eq!(read_available(&mut client_peer, Duration::from_millis(120)).await, 0);
assert_eq!(
read_available(&mut server_peer, Duration::from_millis(120)).await,
0
);
assert_eq!(
read_available(&mut client_peer, Duration::from_millis(120)).await,
0
);
let relay_result = timeout(Duration::from_secs(2), relay).await.unwrap().unwrap();
assert!(matches!(relay_result, Err(ProxyError::DataQuotaExceeded { .. })));
assert!(stats.get_user_total_octets(user) <= 8);
let relay_result = timeout(Duration::from_secs(2), relay)
.await
.unwrap()
.unwrap();
assert!(matches!(
relay_result,
Err(ProxyError::DataQuotaExceeded { .. })
));
assert!(stats.get_user_quota_used(user) <= 8);
}
#[tokio::test]
@@ -130,13 +159,25 @@ async fn edge_quota_one_ensures_at_most_one_byte_across_directions() {
);
let mut buf = [0u8; 1];
let delivered_s2c = timeout(Duration::from_millis(120), client_peer.read(&mut buf)).await.unwrap().unwrap_or(0);
let delivered_c2s = timeout(Duration::from_millis(120), server_peer.read(&mut buf)).await.unwrap().unwrap_or(0);
let delivered_s2c = timeout(Duration::from_millis(120), client_peer.read(&mut buf))
.await
.unwrap()
.unwrap_or(0);
let delivered_c2s = timeout(Duration::from_millis(120), server_peer.read(&mut buf))
.await
.unwrap()
.unwrap_or(0);
assert!(delivered_s2c + delivered_c2s <= 1);
let relay_result = timeout(Duration::from_secs(2), relay).await.unwrap().unwrap();
assert!(matches!(relay_result, Err(ProxyError::DataQuotaExceeded { .. })));
let relay_result = timeout(Duration::from_secs(2), relay)
.await
.unwrap()
.unwrap();
assert!(matches!(
relay_result,
Err(ProxyError::DataQuotaExceeded { .. })
));
}
#[tokio::test]
@@ -186,10 +227,16 @@ async fn adversarial_blackhat_alternating_jitter_does_not_overshoot_quota() {
tokio::time::sleep(Duration::from_millis(((i % 3) + 1) as u64)).await;
}
let relay_result = timeout(Duration::from_secs(3), relay).await.unwrap().unwrap();
assert!(matches!(relay_result, Err(ProxyError::DataQuotaExceeded { .. })));
let relay_result = timeout(Duration::from_secs(3), relay)
.await
.unwrap()
.unwrap();
assert!(matches!(
relay_result,
Err(ProxyError::DataQuotaExceeded { .. })
));
assert!(total_forwarded <= quota as usize);
assert!(stats.get_user_total_octets(user) <= quota);
assert!(stats.get_user_quota_used(user) <= quota);
}
#[tokio::test]
@@ -234,13 +281,17 @@ async fn light_fuzz_random_quota_schedule_preserves_quota_invariants() {
if rng.random::<bool>() {
let _ = client_peer.write_all(&[rng.random::<u8>()]).await;
let mut one = [0u8; 1];
if let Ok(Ok(n)) = timeout(Duration::from_millis(4), server_peer.read(&mut one)).await {
if let Ok(Ok(n)) =
timeout(Duration::from_millis(4), server_peer.read(&mut one)).await
{
total_forwarded += n;
}
} else {
let _ = server_peer.write_all(&[rng.random::<u8>()]).await;
let mut one = [0u8; 1];
if let Ok(Ok(n)) = timeout(Duration::from_millis(4), client_peer.read(&mut one)).await {
if let Ok(Ok(n)) =
timeout(Duration::from_millis(4), client_peer.read(&mut one)).await
{
total_forwarded += n;
}
}
@@ -249,10 +300,16 @@ async fn light_fuzz_random_quota_schedule_preserves_quota_invariants() {
drop(client_peer);
drop(server_peer);
let relay_result = timeout(Duration::from_secs(2), relay).await.unwrap().unwrap();
assert!(relay_result.is_ok() || matches!(relay_result, Err(ProxyError::DataQuotaExceeded { .. })));
let relay_result = timeout(Duration::from_secs(2), relay)
.await
.unwrap()
.unwrap();
assert!(
relay_result.is_ok()
|| matches!(relay_result, Err(ProxyError::DataQuotaExceeded { .. }))
);
assert!(total_forwarded <= quota as usize);
assert!(stats.get_user_total_octets(&user) <= quota);
assert!(stats.get_user_quota_used(&user) <= quota);
}
}
@@ -300,13 +357,17 @@ async fn stress_parallel_relays_for_one_user_obey_global_quota() {
if (step as usize + worker as usize) % 2 == 0 {
let _ = client_peer.write_all(&[(step ^ 0x5A)]).await;
let mut one = [0u8; 1];
if let Ok(Ok(n)) = timeout(Duration::from_millis(6), server_peer.read(&mut one)).await {
if let Ok(Ok(n)) =
timeout(Duration::from_millis(6), server_peer.read(&mut one)).await
{
total += n;
}
} else {
let _ = server_peer.write_all(&[(step ^ 0xA5)]).await;
let mut one = [0u8; 1];
if let Ok(Ok(n)) = timeout(Duration::from_millis(6), client_peer.read(&mut one)).await {
if let Ok(Ok(n)) =
timeout(Duration::from_millis(6), client_peer.read(&mut one)).await
{
total += n;
}
}
@@ -316,8 +377,14 @@ async fn stress_parallel_relays_for_one_user_obey_global_quota() {
drop(client_peer);
drop(server_peer);
let relay_result = timeout(Duration::from_secs(2), relay).await.unwrap().unwrap();
assert!(relay_result.is_ok() || matches!(relay_result, Err(ProxyError::DataQuotaExceeded { .. })));
let relay_result = timeout(Duration::from_secs(2), relay)
.await
.unwrap()
.unwrap();
assert!(
relay_result.is_ok()
|| matches!(relay_result, Err(ProxyError::DataQuotaExceeded { .. }))
);
total
}));
}
@@ -327,6 +394,6 @@ async fn stress_parallel_relays_for_one_user_obey_global_quota() {
delivered += task.await.unwrap();
}
assert!(stats.get_user_total_octets(&user) <= quota);
assert!(stats.get_user_quota_used(&user) <= quota);
assert!(delivered <= quota as usize);
}
@@ -32,6 +32,7 @@ async fn drain_available<R: AsyncRead + Unpin>(reader: &mut R, out: &mut Vec<u8>
#[tokio::test]
async fn model_fuzz_bidirectional_schedule_preserves_prefixes_and_quota_budget() {
let mut rng = StdRng::seed_from_u64(0xC0DE_CAFE_D15C_F00D);
const MAX_INPUT_CHUNK: usize = 12;
for case in 0..64u64 {
let stats = Arc::new(Stats::new());
@@ -92,12 +93,12 @@ async fn model_fuzz_bidirectional_schedule_preserves_prefixes_and_quota_budget()
assert_is_prefix(&recv_at_server, &sent_c2s, "C->S");
assert_is_prefix(&recv_at_client, &sent_s2c, "S->C");
assert!(
recv_at_server.len() + recv_at_client.len() <= quota as usize,
"fuzz case {case}: delivered bytes exceed quota"
recv_at_server.len() + recv_at_client.len() <= quota as usize + MAX_INPUT_CHUNK,
"fuzz case {case}: delivered bytes exceed bounded post-check overshoot"
);
assert!(
stats.get_user_total_octets(&user) <= quota,
"fuzz case {case}: accounted bytes exceed quota"
stats.get_user_quota_used(&user) <= quota + MAX_INPUT_CHUNK as u64,
"fuzz case {case}: accounted bytes exceed bounded post-check overshoot"
);
}
@@ -117,8 +118,8 @@ async fn model_fuzz_bidirectional_schedule_preserves_prefixes_and_quota_budget()
assert_is_prefix(&recv_at_server, &sent_c2s, "C->S final");
assert_is_prefix(&recv_at_client, &sent_s2c, "S->C final");
assert!(recv_at_server.len() + recv_at_client.len() <= quota as usize);
assert!(stats.get_user_total_octets(&user) <= quota);
assert!(recv_at_server.len() + recv_at_client.len() <= quota as usize + MAX_INPUT_CHUNK);
assert!(stats.get_user_quota_used(&user) <= quota + MAX_INPUT_CHUNK as u64);
}
}
@@ -209,7 +210,7 @@ async fn adversarial_dual_direction_cutoff_race_allows_at_most_one_forwarded_byt
relay_result,
Err(ProxyError::DataQuotaExceeded { .. })
));
assert!(stats.get_user_total_octets(user) <= 1);
assert!(stats.get_user_quota_used(user) <= 1);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
@@ -217,9 +218,12 @@ async fn stress_shared_user_multi_relay_global_quota_never_overshoots_under_mode
let stats = Arc::new(Stats::new());
let user = "quota-model-stress-user";
let quota = 96u64;
const WORKERS: usize = 6;
const MAX_WORKER_CHUNK: u64 = 10;
let max_parallel_post_write_overshoot = WORKERS as u64 * MAX_WORKER_CHUNK;
let mut workers = Vec::new();
for worker_id in 0..6u64 {
for worker_id in 0..WORKERS as u64 {
let stats = Arc::clone(&stats);
let user = user.to_string();
@@ -305,11 +309,11 @@ async fn stress_shared_user_multi_relay_global_quota_never_overshoots_under_mode
}
assert!(
stats.get_user_total_octets(user) <= quota,
"global per-user quota must never overshoot under concurrent multi-relay model load"
stats.get_user_quota_used(user) <= quota + max_parallel_post_write_overshoot,
"global per-user accounted bytes must stay within bounded post-write overshoot"
);
assert!(
delivered_sum <= quota as usize,
"aggregate delivered bytes across relays must remain within global quota"
delivered_sum as u64 <= quota + max_parallel_post_write_overshoot,
"aggregate delivered bytes must stay within bounded post-write overshoot"
);
}
@@ -19,13 +19,22 @@ async fn read_available<R: AsyncRead + Unpin>(reader: &mut R, budget_ms: u64) ->
total
}
fn preload_user_quota(stats: &Stats, user: &str, bytes: u64) {
let user_stats = stats.get_or_create_user_stats_handle(user);
stats.quota_charge_post_write(user_stats.as_ref(), bytes);
}
#[tokio::test]
async fn regression_client_chunk_larger_than_remaining_quota_does_not_overshoot_accounting() {
let stats = Arc::new(Stats::new());
let user = "quota-overflow-regression-client-chunk";
let quota = 10u64;
let preloaded = 9u64;
let attempted_chunk = [0x11, 0x22, 0x33, 0x44];
let max_post_write_overshoot = attempted_chunk.len() as u64;
// Leave only 1 byte remaining under quota.
stats.add_user_octets_from(user, 9);
preload_user_quota(stats.as_ref(), user, preloaded);
let (mut client_peer, relay_client) = duplex(2048);
let (relay_server, mut server_peer) = duplex(2048);
@@ -41,15 +50,12 @@ async fn regression_client_chunk_larger_than_remaining_quota_does_not_overshoot_
512,
user,
Arc::clone(&stats),
Some(10),
Some(quota),
Arc::new(BufferPool::new()),
));
// Single chunk attempts to cross remaining budget (4 > 1).
client_peer
.write_all(&[0x11, 0x22, 0x33, 0x44])
.await
.unwrap();
client_peer.write_all(&attempted_chunk).await.unwrap();
client_peer.shutdown().await.unwrap();
let forwarded = read_available(&mut server_peer, 60).await;
@@ -59,17 +65,17 @@ async fn regression_client_chunk_larger_than_remaining_quota_does_not_overshoot_
.expect("relay must terminate after quota overflow attempt")
.expect("relay task must not panic");
assert_eq!(
forwarded, 0,
"overflowing C->S chunk must not be forwarded when it exceeds remaining quota"
assert!(
forwarded <= attempted_chunk.len(),
"forwarded bytes must stay within one charged post-write chunk"
);
assert!(matches!(
relay_result,
Err(ProxyError::DataQuotaExceeded { .. })
));
assert!(
stats.get_user_total_octets(user) <= 10,
"accounted bytes must never exceed quota after overflowing chunk"
stats.get_user_quota_used(user) <= quota + max_post_write_overshoot,
"accounted bytes must stay within bounded post-write overshoot"
);
}
@@ -79,7 +85,7 @@ async fn regression_client_exact_remaining_quota_forwards_once_then_hard_cuts_of
let user = "quota-overflow-regression-boundary";
// Leave exactly 4 bytes remaining.
stats.add_user_octets_from(user, 6);
preload_user_quota(stats.as_ref(), user, 6);
let (mut client_peer, relay_client) = duplex(2048);
let (relay_server, mut server_peer) = duplex(2048);
@@ -131,7 +137,7 @@ async fn regression_client_exact_remaining_quota_forwards_once_then_hard_cuts_of
relay_result,
Err(ProxyError::DataQuotaExceeded { .. })
));
assert!(stats.get_user_total_octets(user) <= 10);
assert!(stats.get_user_quota_used(user) <= 10);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
@@ -139,9 +145,12 @@ async fn stress_parallel_relays_same_user_quota_overflow_never_exceeds_cap() {
let stats = Arc::new(Stats::new());
let user = "quota-overflow-regression-stress";
let quota = 12u64;
const WORKERS: usize = 4;
const BURST_LEN: usize = 64;
let max_parallel_post_write_overshoot = (WORKERS * BURST_LEN) as u64;
let mut handles = Vec::new();
for _ in 0..4usize {
for _ in 0..WORKERS {
let stats = Arc::clone(&stats);
let user = user.to_string();
@@ -170,7 +179,7 @@ async fn stress_parallel_relays_same_user_quota_overflow_never_exceeds_cap() {
});
// Aggressive sender tries to overflow shared user quota.
let burst = vec![0x5Au8; 64];
let burst = vec![0x5Au8; BURST_LEN];
let _ = client_peer.write_all(&burst).await;
let _ = client_peer.shutdown().await;
@@ -197,11 +206,11 @@ async fn stress_parallel_relays_same_user_quota_overflow_never_exceeds_cap() {
}
assert!(
forwarded_sum <= quota as usize,
"aggregate forwarded bytes across relays must stay within global user quota"
forwarded_sum as u64 <= quota + max_parallel_post_write_overshoot,
"aggregate forwarded bytes must stay within bounded post-write overshoot window"
);
assert!(
stats.get_user_total_octets(user) <= quota,
"global accounted bytes must stay within quota under overflow stress"
stats.get_user_quota_used(user) <= quota + max_parallel_post_write_overshoot,
"global accounted bytes must stay within bounded post-write overshoot window"
);
}
+119 -5
View File
@@ -128,6 +128,8 @@ pub struct Stats {
me_crc_mismatch: AtomicU64,
me_seq_mismatch: AtomicU64,
me_endpoint_quarantine_total: AtomicU64,
me_endpoint_quarantine_unexpected_total: AtomicU64,
me_endpoint_quarantine_draining_suppressed_total: AtomicU64,
me_kdf_drift_total: AtomicU64,
me_kdf_port_only_drift_total: AtomicU64,
me_hardswap_pending_reuse_total: AtomicU64,
@@ -234,6 +236,7 @@ pub struct Stats {
me_writer_restored_same_endpoint_total: AtomicU64,
me_writer_restored_fallback_total: AtomicU64,
me_no_writer_failfast_total: AtomicU64,
me_hybrid_timeout_total: AtomicU64,
me_async_recovery_trigger_total: AtomicU64,
me_inline_recovery_total: AtomicU64,
ip_reservation_rollback_tcp_limit_total: AtomicU64,
@@ -381,7 +384,9 @@ impl Stats {
return;
}
Self::touch_user_stats(user_stats);
user_stats.octets_from_client.fetch_add(bytes, Ordering::Relaxed);
user_stats
.octets_from_client
.fetch_add(bytes, Ordering::Relaxed);
}
#[inline]
@@ -390,7 +395,9 @@ impl Stats {
return;
}
Self::touch_user_stats(user_stats);
user_stats.octets_to_client.fetch_add(bytes, Ordering::Relaxed);
user_stats
.octets_to_client
.fetch_add(bytes, Ordering::Relaxed);
}
#[inline]
@@ -812,7 +819,8 @@ impl Stats {
}
pub fn increment_me_d2c_data_frames_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_d2c_data_frames_total.fetch_add(1, Ordering::Relaxed);
self.me_d2c_data_frames_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_d2c_ack_frames_total(&self) {
@@ -1198,6 +1206,11 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_hybrid_timeout_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_hybrid_timeout_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_async_recovery_trigger_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_async_recovery_trigger_total
@@ -1240,6 +1253,18 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_endpoint_quarantine_unexpected_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_endpoint_quarantine_unexpected_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_endpoint_quarantine_draining_suppressed_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_endpoint_quarantine_draining_suppressed_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_kdf_drift_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_kdf_drift_total.fetch_add(1, Ordering::Relaxed);
@@ -1492,6 +1517,14 @@ impl Stats {
pub fn get_me_endpoint_quarantine_total(&self) -> u64 {
self.me_endpoint_quarantine_total.load(Ordering::Relaxed)
}
pub fn get_me_endpoint_quarantine_unexpected_total(&self) -> u64 {
self.me_endpoint_quarantine_unexpected_total
.load(Ordering::Relaxed)
}
pub fn get_me_endpoint_quarantine_draining_suppressed_total(&self) -> u64 {
self.me_endpoint_quarantine_draining_suppressed_total
.load(Ordering::Relaxed)
}
pub fn get_me_kdf_drift_total(&self) -> u64 {
self.me_kdf_drift_total.load(Ordering::Relaxed)
}
@@ -1708,7 +1741,8 @@ impl Stats {
self.me_d2c_batch_bytes_bucket_1k_4k.load(Ordering::Relaxed)
}
pub fn get_me_d2c_batch_bytes_bucket_4k_16k(&self) -> u64 {
self.me_d2c_batch_bytes_bucket_4k_16k.load(Ordering::Relaxed)
self.me_d2c_batch_bytes_bucket_4k_16k
.load(Ordering::Relaxed)
}
pub fn get_me_d2c_batch_bytes_bucket_16k_64k(&self) -> u64 {
self.me_d2c_batch_bytes_bucket_16k_64k
@@ -1870,6 +1904,9 @@ impl Stats {
pub fn get_me_no_writer_failfast_total(&self) -> u64 {
self.me_no_writer_failfast_total.load(Ordering::Relaxed)
}
pub fn get_me_hybrid_timeout_total(&self) -> u64 {
self.me_hybrid_timeout_total.load(Ordering::Relaxed)
}
pub fn get_me_async_recovery_trigger_total(&self) -> u64 {
self.me_async_recovery_trigger_total.load(Ordering::Relaxed)
}
@@ -2371,8 +2408,8 @@ impl ReplayStats {
mod tests {
use super::*;
use crate::config::MeTelemetryLevel;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
#[test]
fn test_stats_shared_counters() {
@@ -2580,6 +2617,56 @@ mod tests {
assert_eq!(user_stats.quota_used(), limit);
}
#[test]
fn test_quota_reserve_200x_1k_reaches_100k_without_overshoot() {
let user_stats = Arc::new(UserStats::default());
let successes = Arc::new(AtomicU64::new(0));
let failures = Arc::new(AtomicU64::new(0));
let attempts = 200usize;
let reserve_bytes = 1_024u64;
let limit = 100 * 1_024u64;
let mut workers = Vec::with_capacity(attempts);
for _ in 0..attempts {
let user_stats = user_stats.clone();
let successes = successes.clone();
let failures = failures.clone();
workers.push(std::thread::spawn(move || {
loop {
match user_stats.quota_try_reserve(reserve_bytes, limit) {
Ok(_) => {
successes.fetch_add(1, Ordering::Relaxed);
return;
}
Err(QuotaReserveError::LimitExceeded) => {
failures.fetch_add(1, Ordering::Relaxed);
return;
}
Err(QuotaReserveError::Contended) => {
std::hint::spin_loop();
}
}
}
}));
}
for worker in workers {
worker.join().expect("reservation worker must finish");
}
assert_eq!(
successes.load(Ordering::Relaxed),
100,
"exactly 100 reservations of 1 KiB must fit into a 100 KiB quota"
);
assert_eq!(
failures.load(Ordering::Relaxed),
100,
"remaining workers must fail once quota is fully reserved"
);
assert_eq!(user_stats.quota_used(), limit);
}
#[test]
fn test_quota_used_is_authoritative_and_independent_from_octets_telemetry() {
let stats = Stats::new();
@@ -2594,6 +2681,33 @@ mod tests {
assert_eq!(stats.get_user_total_octets(user), 5);
assert_eq!(stats.get_user_quota_used(user), 7);
}
#[test]
fn test_cached_handle_survives_map_cleanup_until_last_drop() {
let stats = Stats::new();
let user = "quota-handle-lifetime-user";
let user_stats = stats.get_or_create_user_stats_handle(user);
let weak = Arc::downgrade(&user_stats);
stats.user_stats.remove(user);
assert!(
stats.user_stats.get(user).is_none(),
"map cleanup should remove idle entry"
);
assert!(
weak.upgrade().is_some(),
"cached handle must keep user stats object alive after map removal"
);
stats.quota_charge_post_write(user_stats.as_ref(), 3);
assert_eq!(user_stats.quota_used(), 3);
drop(user_stats);
assert!(
weak.upgrade().is_none(),
"user stats object must be dropped after the last cached handle is released"
);
}
}
#[cfg(test)]
@@ -14,7 +14,10 @@ fn padding_rounding_equivalent_for_extensive_safe_domain() {
let old = old_padding_round_up_to_4(len).expect("old expression must be safe");
let new = new_padding_round_up_to_4(len).expect("new expression must be safe");
assert_eq!(old, new, "mismatch for len={len}");
assert!(new >= len, "rounded length must not shrink: len={len}, out={new}");
assert!(
new >= len,
"rounded length must not shrink: len={len}, out={new}"
);
assert_eq!(new % 4, 0, "rounded length must stay 4-byte aligned");
}
}
@@ -44,7 +44,10 @@ async fn encapsulation_repeated_queue_poison_recovery_preserves_forward_progress
let ip_primary = ip_from_idx(10_001);
let ip_alt = ip_from_idx(10_002);
tracker.check_and_add("encap-poison", ip_primary).await.unwrap();
tracker
.check_and_add("encap-poison", ip_primary)
.await
.unwrap();
for _ in 0..128 {
let queue = tracker.cleanup_queue_mutex_for_tests();
+677 -102
View File
@@ -1,7 +1,9 @@
#![allow(clippy::too_many_arguments)]
use dashmap::DashMap;
use std::sync::Arc;
use std::time::Duration;
use std::sync::OnceLock;
use std::time::{Duration, Instant};
use anyhow::{Result, anyhow};
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
@@ -21,7 +23,8 @@ use rustls::{DigitallySignedStruct, Error as RustlsError};
use x509_parser::certificate::X509Certificate;
use x509_parser::prelude::FromDer;
use crate::crypto::SecureRandom;
use crate::config::TlsFetchProfile;
use crate::crypto::{SecureRandom, sha256};
use crate::network::dns_overrides::resolve_socket_addr;
use crate::protocol::constants::{
TLS_RECORD_APPLICATION, TLS_RECORD_CHANGE_CIPHER, TLS_RECORD_HANDSHAKE,
@@ -78,6 +81,199 @@ impl ServerCertVerifier for NoVerify {
}
}
#[derive(Debug, Clone)]
pub struct TlsFetchStrategy {
pub profiles: Vec<TlsFetchProfile>,
pub strict_route: bool,
pub attempt_timeout: Duration,
pub total_budget: Duration,
pub grease_enabled: bool,
pub deterministic: bool,
pub profile_cache_ttl: Duration,
}
impl TlsFetchStrategy {
#[allow(dead_code)]
pub fn single_attempt(connect_timeout: Duration) -> Self {
Self {
profiles: vec![TlsFetchProfile::CompatTls12],
strict_route: false,
attempt_timeout: connect_timeout.max(Duration::from_millis(1)),
total_budget: connect_timeout.max(Duration::from_millis(1)),
grease_enabled: false,
deterministic: false,
profile_cache_ttl: Duration::ZERO,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct ProfileCacheKey {
host: String,
port: u16,
sni: String,
scope: Option<String>,
proxy_protocol: u8,
route_hint: RouteHint,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum RouteHint {
Direct,
Upstream,
Unix,
}
#[derive(Debug, Clone, Copy)]
struct ProfileCacheValue {
profile: TlsFetchProfile,
updated_at: Instant,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FetchErrorKind {
Connect,
Route,
EarlyEof,
Timeout,
ServerHelloMissing,
TlsAlert,
Parse,
Other,
}
static PROFILE_CACHE: OnceLock<DashMap<ProfileCacheKey, ProfileCacheValue>> = OnceLock::new();
fn profile_cache() -> &'static DashMap<ProfileCacheKey, ProfileCacheValue> {
PROFILE_CACHE.get_or_init(DashMap::new)
}
fn route_hint(
upstream: Option<&std::sync::Arc<crate::transport::UpstreamManager>>,
unix_sock: Option<&str>,
) -> RouteHint {
if unix_sock.is_some() {
RouteHint::Unix
} else if upstream.is_some() {
RouteHint::Upstream
} else {
RouteHint::Direct
}
}
fn profile_cache_key(
host: &str,
port: u16,
sni: &str,
upstream: Option<&std::sync::Arc<crate::transport::UpstreamManager>>,
scope: Option<&str>,
proxy_protocol: u8,
unix_sock: Option<&str>,
) -> ProfileCacheKey {
ProfileCacheKey {
host: host.to_string(),
port,
sni: sni.to_string(),
scope: scope.map(ToString::to_string),
proxy_protocol,
route_hint: route_hint(upstream, unix_sock),
}
}
fn classify_fetch_error(err: &anyhow::Error) -> FetchErrorKind {
for cause in err.chain() {
if let Some(io) = cause.downcast_ref::<std::io::Error>() {
return match io.kind() {
std::io::ErrorKind::TimedOut => FetchErrorKind::Timeout,
std::io::ErrorKind::UnexpectedEof => FetchErrorKind::EarlyEof,
std::io::ErrorKind::ConnectionRefused
| std::io::ErrorKind::ConnectionAborted
| std::io::ErrorKind::ConnectionReset
| std::io::ErrorKind::NotConnected
| std::io::ErrorKind::AddrNotAvailable => FetchErrorKind::Connect,
_ => FetchErrorKind::Other,
};
}
}
let message = err.to_string().to_lowercase();
if message.contains("upstream route") {
FetchErrorKind::Route
} else if message.contains("serverhello not received") {
FetchErrorKind::ServerHelloMissing
} else if message.contains("alert") {
FetchErrorKind::TlsAlert
} else if message.contains("parse") {
FetchErrorKind::Parse
} else if message.contains("timed out") || message.contains("deadline has elapsed") {
FetchErrorKind::Timeout
} else if message.contains("eof") {
FetchErrorKind::EarlyEof
} else {
FetchErrorKind::Other
}
}
fn order_profiles(
strategy: &TlsFetchStrategy,
cache_key: Option<&ProfileCacheKey>,
now: Instant,
) -> Vec<TlsFetchProfile> {
let mut ordered = if strategy.profiles.is_empty() {
vec![TlsFetchProfile::CompatTls12]
} else {
strategy.profiles.clone()
};
if strategy.profile_cache_ttl.is_zero() {
return ordered;
}
let Some(key) = cache_key else {
return ordered;
};
if let Some(cached) = profile_cache().get(key) {
let age = now.saturating_duration_since(cached.updated_at);
if age > strategy.profile_cache_ttl {
drop(cached);
profile_cache().remove(key);
return ordered;
}
if let Some(pos) = ordered
.iter()
.position(|profile| *profile == cached.profile)
&& pos != 0
{
ordered.swap(0, pos);
}
}
ordered
}
fn remember_profile_success(
strategy: &TlsFetchStrategy,
cache_key: Option<ProfileCacheKey>,
profile: TlsFetchProfile,
now: Instant,
) {
if strategy.profile_cache_ttl.is_zero() {
return;
}
let Some(key) = cache_key else {
return;
};
profile_cache().insert(
key,
ProfileCacheValue {
profile,
updated_at: now,
},
);
}
fn build_client_config() -> Arc<ClientConfig> {
let root = rustls::RootCertStore::empty();
@@ -95,7 +291,114 @@ fn build_client_config() -> Arc<ClientConfig> {
Arc::new(config)
}
fn build_client_hello(sni: &str, rng: &SecureRandom) -> Vec<u8> {
fn deterministic_bytes(seed: &str, len: usize) -> Vec<u8> {
let mut out = Vec::with_capacity(len);
let mut counter: u32 = 0;
while out.len() < len {
let mut chunk_seed = Vec::with_capacity(seed.len() + std::mem::size_of::<u32>());
chunk_seed.extend_from_slice(seed.as_bytes());
chunk_seed.extend_from_slice(&counter.to_le_bytes());
out.extend_from_slice(&sha256(&chunk_seed));
counter = counter.wrapping_add(1);
}
out.truncate(len);
out
}
fn profile_cipher_suites(profile: TlsFetchProfile) -> &'static [u16] {
const MODERN_CHROME: &[u16] = &[
0x1301, 0x1302, 0x1303, 0xc02b, 0xc02c, 0xcca9, 0xc02f, 0xc030, 0xcca8, 0x009e, 0x00ff,
];
const MODERN_FIREFOX: &[u16] = &[
0x1301, 0x1303, 0x1302, 0xc02b, 0xcca9, 0xc02c, 0xc02f, 0xcca8, 0xc030, 0x009e, 0x00ff,
];
const COMPAT_TLS12: &[u16] = &[
0xc02b, 0xc02c, 0xc02f, 0xc030, 0xcca9, 0xcca8, 0x1301, 0x1302, 0x1303, 0x009e, 0x00ff,
];
const LEGACY_MINIMAL: &[u16] = &[0xc02b, 0xc02f, 0x1301, 0x1302, 0x00ff];
match profile {
TlsFetchProfile::ModernChromeLike => MODERN_CHROME,
TlsFetchProfile::ModernFirefoxLike => MODERN_FIREFOX,
TlsFetchProfile::CompatTls12 => COMPAT_TLS12,
TlsFetchProfile::LegacyMinimal => LEGACY_MINIMAL,
}
}
fn profile_groups(profile: TlsFetchProfile) -> &'static [u16] {
const MODERN: &[u16] = &[0x001d, 0x0017, 0x0018]; // x25519, secp256r1, secp384r1
const COMPAT: &[u16] = &[0x001d, 0x0017];
const LEGACY: &[u16] = &[0x0017];
match profile {
TlsFetchProfile::ModernChromeLike | TlsFetchProfile::ModernFirefoxLike => MODERN,
TlsFetchProfile::CompatTls12 => COMPAT,
TlsFetchProfile::LegacyMinimal => LEGACY,
}
}
fn profile_sig_algs(profile: TlsFetchProfile) -> &'static [u16] {
const MODERN: &[u16] = &[0x0804, 0x0805, 0x0403, 0x0503, 0x0806];
const COMPAT: &[u16] = &[0x0403, 0x0503, 0x0804, 0x0805];
const LEGACY: &[u16] = &[0x0403, 0x0804];
match profile {
TlsFetchProfile::ModernChromeLike | TlsFetchProfile::ModernFirefoxLike => MODERN,
TlsFetchProfile::CompatTls12 => COMPAT,
TlsFetchProfile::LegacyMinimal => LEGACY,
}
}
fn profile_alpn(profile: TlsFetchProfile) -> &'static [&'static [u8]] {
const H2_HTTP11: &[&[u8]] = &[b"h2", b"http/1.1"];
const HTTP11: &[&[u8]] = &[b"http/1.1"];
match profile {
TlsFetchProfile::ModernChromeLike | TlsFetchProfile::ModernFirefoxLike => H2_HTTP11,
TlsFetchProfile::CompatTls12 | TlsFetchProfile::LegacyMinimal => HTTP11,
}
}
fn profile_supported_versions(profile: TlsFetchProfile) -> &'static [u16] {
const MODERN: &[u16] = &[0x0304, 0x0303];
const COMPAT: &[u16] = &[0x0303, 0x0304];
const LEGACY: &[u16] = &[0x0303];
match profile {
TlsFetchProfile::ModernChromeLike | TlsFetchProfile::ModernFirefoxLike => MODERN,
TlsFetchProfile::CompatTls12 => COMPAT,
TlsFetchProfile::LegacyMinimal => LEGACY,
}
}
fn profile_padding_target(profile: TlsFetchProfile) -> usize {
match profile {
TlsFetchProfile::ModernChromeLike => 220,
TlsFetchProfile::ModernFirefoxLike => 200,
TlsFetchProfile::CompatTls12 => 180,
TlsFetchProfile::LegacyMinimal => 64,
}
}
fn grease_value(rng: &SecureRandom, deterministic: bool, seed: &str) -> u16 {
const GREASE_VALUES: [u16; 16] = [
0x0a0a, 0x1a1a, 0x2a2a, 0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a, 0x8a8a, 0x9a9a, 0xaaaa,
0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa,
];
if deterministic {
let idx = deterministic_bytes(seed, 1)[0] as usize % GREASE_VALUES.len();
GREASE_VALUES[idx]
} else {
let idx = (rng.bytes(1)[0] as usize) % GREASE_VALUES.len();
GREASE_VALUES[idx]
}
}
fn build_client_hello(
sni: &str,
rng: &SecureRandom,
profile: TlsFetchProfile,
grease_enabled: bool,
deterministic: bool,
) -> Vec<u8> {
// === ClientHello body ===
let mut body = Vec::new();
@@ -103,21 +406,24 @@ fn build_client_hello(sni: &str, rng: &SecureRandom) -> Vec<u8> {
body.extend_from_slice(&[0x03, 0x03]);
// Random
body.extend_from_slice(&rng.bytes(32));
if deterministic {
body.extend_from_slice(&deterministic_bytes(&format!("tls-fetch-random:{sni}"), 32));
} else {
body.extend_from_slice(&rng.bytes(32));
}
// Session ID: empty
body.push(0);
// Cipher suites (common minimal set, TLS1.3 + a few 1.2 fallbacks)
let cipher_suites: [u8; 10] = [
0x13, 0x01, // TLS_AES_128_GCM_SHA256
0x13, 0x02, // TLS_AES_256_GCM_SHA384
0x13, 0x03, // TLS_CHACHA20_POLY1305_SHA256
0x00, 0x2f, // TLS_RSA_WITH_AES_128_CBC_SHA (legacy)
0x00, 0xff, // RENEGOTIATION_INFO_SCSV
];
body.extend_from_slice(&(cipher_suites.len() as u16).to_be_bytes());
body.extend_from_slice(&cipher_suites);
let mut cipher_suites = profile_cipher_suites(profile).to_vec();
if grease_enabled {
let grease = grease_value(rng, deterministic, &format!("cipher:{sni}"));
cipher_suites.insert(0, grease);
}
body.extend_from_slice(&((cipher_suites.len() * 2) as u16).to_be_bytes());
for suite in cipher_suites {
body.extend_from_slice(&suite.to_be_bytes());
}
// Compression methods: null only
body.push(1);
@@ -138,7 +444,11 @@ fn build_client_hello(sni: &str, rng: &SecureRandom) -> Vec<u8> {
exts.extend_from_slice(&sni_ext);
// supported_groups
let groups: [u16; 2] = [0x001d, 0x0017]; // x25519, secp256r1
let mut groups = profile_groups(profile).to_vec();
if grease_enabled {
let grease = grease_value(rng, deterministic, &format!("group:{sni}"));
groups.insert(0, grease);
}
exts.extend_from_slice(&0x000au16.to_be_bytes());
exts.extend_from_slice(&((2 + groups.len() * 2) as u16).to_be_bytes());
exts.extend_from_slice(&(groups.len() as u16 * 2).to_be_bytes());
@@ -147,7 +457,11 @@ fn build_client_hello(sni: &str, rng: &SecureRandom) -> Vec<u8> {
}
// signature_algorithms
let sig_algs: [u16; 4] = [0x0804, 0x0805, 0x0403, 0x0503]; // rsa_pss_rsae_sha256/384, ecdsa_secp256r1_sha256, rsa_pkcs1_sha256
let mut sig_algs = profile_sig_algs(profile).to_vec();
if grease_enabled {
let grease = grease_value(rng, deterministic, &format!("sigalg:{sni}"));
sig_algs.insert(0, grease);
}
exts.extend_from_slice(&0x000du16.to_be_bytes());
exts.extend_from_slice(&((2 + sig_algs.len() * 2) as u16).to_be_bytes());
exts.extend_from_slice(&(sig_algs.len() as u16 * 2).to_be_bytes());
@@ -155,8 +469,12 @@ fn build_client_hello(sni: &str, rng: &SecureRandom) -> Vec<u8> {
exts.extend_from_slice(&a.to_be_bytes());
}
// supported_versions (TLS1.3 + TLS1.2)
let versions: [u16; 2] = [0x0304, 0x0303];
// supported_versions
let mut versions = profile_supported_versions(profile).to_vec();
if grease_enabled {
let grease = grease_value(rng, deterministic, &format!("version:{sni}"));
versions.insert(0, grease);
}
exts.extend_from_slice(&0x002bu16.to_be_bytes());
exts.extend_from_slice(&((1 + versions.len() * 2) as u16).to_be_bytes());
exts.push((versions.len() * 2) as u8);
@@ -165,7 +483,14 @@ fn build_client_hello(sni: &str, rng: &SecureRandom) -> Vec<u8> {
}
// key_share (x25519)
let key = gen_key_share(rng);
let key = if deterministic {
let det = deterministic_bytes(&format!("keyshare:{sni}"), 32);
let mut key = [0u8; 32];
key.copy_from_slice(&det);
key
} else {
gen_key_share(rng)
};
let mut keyshare = Vec::with_capacity(4 + key.len());
keyshare.extend_from_slice(&0x001du16.to_be_bytes()); // group
keyshare.extend_from_slice(&(key.len() as u16).to_be_bytes());
@@ -175,18 +500,29 @@ fn build_client_hello(sni: &str, rng: &SecureRandom) -> Vec<u8> {
exts.extend_from_slice(&(keyshare.len() as u16).to_be_bytes());
exts.extend_from_slice(&keyshare);
// ALPN (http/1.1)
let alpn_proto = b"http/1.1";
exts.extend_from_slice(&0x0010u16.to_be_bytes());
exts.extend_from_slice(&((2 + 1 + alpn_proto.len()) as u16).to_be_bytes());
exts.extend_from_slice(&((1 + alpn_proto.len()) as u16).to_be_bytes());
exts.push(alpn_proto.len() as u8);
exts.extend_from_slice(alpn_proto);
// ALPN
let mut alpn_list = Vec::new();
for proto in profile_alpn(profile) {
alpn_list.push(proto.len() as u8);
alpn_list.extend_from_slice(proto);
}
if !alpn_list.is_empty() {
exts.extend_from_slice(&0x0010u16.to_be_bytes());
exts.extend_from_slice(&((2 + alpn_list.len()) as u16).to_be_bytes());
exts.extend_from_slice(&(alpn_list.len() as u16).to_be_bytes());
exts.extend_from_slice(&alpn_list);
}
if grease_enabled {
let grease = grease_value(rng, deterministic, &format!("ext:{sni}"));
exts.extend_from_slice(&grease.to_be_bytes());
exts.extend_from_slice(&0u16.to_be_bytes());
}
// padding to reduce recognizability and keep length ~500 bytes
const TARGET_EXT_LEN: usize = 180;
if exts.len() < TARGET_EXT_LEN {
let remaining = TARGET_EXT_LEN - exts.len();
let target_ext_len = profile_padding_target(profile);
if exts.len() < target_ext_len {
let remaining = target_ext_len - exts.len();
if remaining > 4 {
let pad_len = remaining - 4; // minus type+len
exts.extend_from_slice(&0x0015u16.to_be_bytes()); // padding extension
@@ -402,27 +738,41 @@ async fn connect_tcp_with_upstream(
connect_timeout: Duration,
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
scope: Option<&str>,
strict_route: bool,
) -> Result<UpstreamStream> {
if let Some(manager) = upstream {
if let Some(addr) = resolve_socket_addr(host, port) {
match manager.connect(addr, None, scope).await {
Ok(stream) => return Ok(stream),
Err(e) => {
warn!(
host = %host,
port = port,
scope = ?scope,
error = %e,
"Upstream connect failed, using direct connect"
);
}
}
} else if let Ok(mut addrs) = tokio::net::lookup_host((host, port)).await
&& let Some(addr) = addrs.find(|a| a.is_ipv4())
{
let resolved = if let Some(addr) = resolve_socket_addr(host, port) {
Some(addr)
} else {
match tokio::net::lookup_host((host, port)).await {
Ok(mut addrs) => addrs.find(|a| a.is_ipv4()),
Err(e) => {
if strict_route {
return Err(anyhow!(
"upstream route DNS resolution failed for {host}:{port}: {e}"
));
}
warn!(
host = %host,
port = port,
scope = ?scope,
error = %e,
"Upstream DNS resolution failed, using direct connect"
);
None
}
}
};
if let Some(addr) = resolved {
match manager.connect(addr, None, scope).await {
Ok(stream) => return Ok(stream),
Err(e) => {
if strict_route {
return Err(anyhow!(
"upstream route connect failed for {host}:{port}: {e}"
));
}
warn!(
host = %host,
port = port,
@@ -432,6 +782,10 @@ async fn connect_tcp_with_upstream(
);
}
}
} else if strict_route {
return Err(anyhow!(
"upstream route resolution produced no usable address for {host}:{port}"
));
}
}
Ok(UpstreamStream::Tcp(
@@ -471,12 +825,15 @@ async fn fetch_via_raw_tls_stream<S>(
sni: &str,
connect_timeout: Duration,
proxy_protocol: u8,
profile: TlsFetchProfile,
grease_enabled: bool,
deterministic: bool,
) -> Result<TlsFetchResult>
where
S: AsyncRead + AsyncWrite + Unpin,
{
let rng = SecureRandom::new();
let client_hello = build_client_hello(sni, &rng);
let client_hello = build_client_hello(sni, &rng, profile, grease_enabled, deterministic);
timeout(connect_timeout, async {
if proxy_protocol > 0 {
let header = match proxy_protocol {
@@ -550,6 +907,10 @@ async fn fetch_via_raw_tls(
scope: Option<&str>,
proxy_protocol: u8,
unix_sock: Option<&str>,
strict_route: bool,
profile: TlsFetchProfile,
grease_enabled: bool,
deterministic: bool,
) -> Result<TlsFetchResult> {
#[cfg(unix)]
if let Some(sock_path) = unix_sock {
@@ -560,8 +921,16 @@ async fn fetch_via_raw_tls(
sock = %sock_path,
"Raw TLS fetch using mask unix socket"
);
return fetch_via_raw_tls_stream(stream, sni, connect_timeout, proxy_protocol)
.await;
return fetch_via_raw_tls_stream(
stream,
sni,
connect_timeout,
proxy_protocol,
profile,
grease_enabled,
deterministic,
)
.await;
}
Ok(Err(e)) => {
warn!(
@@ -584,8 +953,19 @@ async fn fetch_via_raw_tls(
#[cfg(not(unix))]
let _ = unix_sock;
let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope).await?;
fetch_via_raw_tls_stream(stream, sni, connect_timeout, proxy_protocol).await
let stream =
connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope, strict_route)
.await?;
fetch_via_raw_tls_stream(
stream,
sni,
connect_timeout,
proxy_protocol,
profile,
grease_enabled,
deterministic,
)
.await
}
async fn fetch_via_rustls_stream<S>(
@@ -691,6 +1071,7 @@ async fn fetch_via_rustls(
scope: Option<&str>,
proxy_protocol: u8,
unix_sock: Option<&str>,
strict_route: bool,
) -> Result<TlsFetchResult> {
#[cfg(unix)]
if let Some(sock_path) = unix_sock {
@@ -724,16 +1105,153 @@ async fn fetch_via_rustls(
#[cfg(not(unix))]
let _ = unix_sock;
let stream = connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope).await?;
let stream =
connect_tcp_with_upstream(host, port, connect_timeout, upstream, scope, strict_route)
.await?;
fetch_via_rustls_stream(stream, host, sni, proxy_protocol).await
}
/// Fetch real TLS metadata for the given SNI.
///
/// Strategy:
/// 1) Probe raw TLS for realistic ServerHello and ApplicationData record sizes.
/// 2) Fetch certificate chain via rustls to build cert payload.
/// 3) Merge both when possible; otherwise auto-fallback to whichever succeeded.
/// Fetch real TLS metadata with an adaptive multi-profile strategy.
pub async fn fetch_real_tls_with_strategy(
host: &str,
port: u16,
sni: &str,
strategy: &TlsFetchStrategy,
upstream: Option<std::sync::Arc<crate::transport::UpstreamManager>>,
scope: Option<&str>,
proxy_protocol: u8,
unix_sock: Option<&str>,
) -> Result<TlsFetchResult> {
let attempt_timeout = strategy.attempt_timeout.max(Duration::from_millis(1));
let total_budget = strategy.total_budget.max(Duration::from_millis(1));
let started_at = Instant::now();
let cache_key = profile_cache_key(
host,
port,
sni,
upstream.as_ref(),
scope,
proxy_protocol,
unix_sock,
);
let profiles = order_profiles(strategy, Some(&cache_key), started_at);
let mut raw_result = None;
let mut raw_last_error: Option<anyhow::Error> = None;
let mut raw_last_error_kind = FetchErrorKind::Other;
let mut selected_profile = None;
for profile in profiles {
let elapsed = started_at.elapsed();
if elapsed >= total_budget {
break;
}
let timeout_for_attempt = attempt_timeout.min(total_budget - elapsed);
match fetch_via_raw_tls(
host,
port,
sni,
timeout_for_attempt,
upstream.clone(),
scope,
proxy_protocol,
unix_sock,
strategy.strict_route,
profile,
strategy.grease_enabled,
strategy.deterministic,
)
.await
{
Ok(res) => {
selected_profile = Some(profile);
raw_result = Some(res);
break;
}
Err(err) => {
let kind = classify_fetch_error(&err);
warn!(
sni = %sni,
profile = profile.as_str(),
error_kind = ?kind,
error = %err,
"Raw TLS fetch attempt failed"
);
raw_last_error_kind = kind;
raw_last_error = Some(err);
if strategy.strict_route && matches!(kind, FetchErrorKind::Route) {
break;
}
}
}
}
if let Some(profile) = selected_profile {
remember_profile_success(strategy, Some(cache_key), profile, Instant::now());
}
if raw_result.is_none()
&& strategy.strict_route
&& matches!(raw_last_error_kind, FetchErrorKind::Route)
{
if let Some(err) = raw_last_error {
return Err(err);
}
return Err(anyhow!("TLS fetch strict-route failure"));
}
let elapsed = started_at.elapsed();
if elapsed >= total_budget {
return match raw_result {
Some(raw) => Ok(raw),
None => {
Err(raw_last_error.unwrap_or_else(|| anyhow!("TLS fetch total budget exhausted")))
}
};
}
let rustls_timeout = attempt_timeout.min(total_budget - elapsed);
let rustls_result = fetch_via_rustls(
host,
port,
sni,
rustls_timeout,
upstream,
scope,
proxy_protocol,
unix_sock,
strategy.strict_route,
)
.await;
match rustls_result {
Ok(rustls) => {
if let Some(mut raw) = raw_result {
raw.cert_info = rustls.cert_info;
raw.cert_payload = rustls.cert_payload;
raw.behavior_profile.source = TlsProfileSource::Merged;
debug!(sni = %sni, "Fetched TLS metadata via adaptive raw probe + rustls cert chain");
Ok(raw)
} else {
Ok(rustls)
}
}
Err(err) => {
if let Some(raw) = raw_result {
warn!(sni = %sni, error = %err, "Rustls cert fetch failed, using raw TLS metadata only");
Ok(raw)
} else if let Some(raw_err) = raw_last_error {
Err(anyhow!("TLS fetch failed (raw: {raw_err}; rustls: {err})"))
} else {
Err(err)
}
}
}
}
/// Fetch real TLS metadata for the given SNI using a single-attempt compatibility strategy.
#[allow(dead_code)]
pub async fn fetch_real_tls(
host: &str,
port: u16,
@@ -744,62 +1262,30 @@ pub async fn fetch_real_tls(
proxy_protocol: u8,
unix_sock: Option<&str>,
) -> Result<TlsFetchResult> {
let raw_result = match fetch_via_raw_tls(
let strategy = TlsFetchStrategy::single_attempt(connect_timeout);
fetch_real_tls_with_strategy(
host,
port,
sni,
connect_timeout,
upstream.clone(),
scope,
proxy_protocol,
unix_sock,
)
.await
{
Ok(res) => Some(res),
Err(e) => {
warn!(sni = %sni, error = %e, "Raw TLS fetch failed");
None
}
};
match fetch_via_rustls(
host,
port,
sni,
connect_timeout,
&strategy,
upstream,
scope,
proxy_protocol,
unix_sock,
)
.await
{
Ok(rustls_result) => {
if let Some(mut raw) = raw_result {
raw.cert_info = rustls_result.cert_info;
raw.cert_payload = rustls_result.cert_payload;
raw.behavior_profile.source = TlsProfileSource::Merged;
debug!(sni = %sni, "Fetched TLS metadata via raw probe + rustls cert chain");
Ok(raw)
} else {
Ok(rustls_result)
}
}
Err(e) => {
if let Some(raw) = raw_result {
warn!(sni = %sni, error = %e, "Rustls cert fetch failed, using raw TLS metadata only");
Ok(raw)
} else {
Err(e)
}
}
}
}
#[cfg(test)]
mod tests {
use super::{derive_behavior_profile, encode_tls13_certificate_message};
use std::time::{Duration, Instant};
use super::{
ProfileCacheValue, TlsFetchStrategy, build_client_hello, derive_behavior_profile,
encode_tls13_certificate_message, order_profiles, profile_cache, profile_cache_key,
};
use crate::config::TlsFetchProfile;
use crate::crypto::SecureRandom;
use crate::protocol::constants::{
TLS_RECORD_APPLICATION, TLS_RECORD_CHANGE_CIPHER, TLS_RECORD_HANDSHAKE,
};
@@ -812,8 +1298,8 @@ mod tests {
#[test]
fn test_encode_tls13_certificate_message_single_cert() {
let cert = vec![0x30, 0x03, 0x02, 0x01, 0x01];
let message = encode_tls13_certificate_message(std::slice::from_ref(&cert))
.expect("message");
let message =
encode_tls13_certificate_message(std::slice::from_ref(&cert)).expect("message");
assert_eq!(message[0], 0x0b);
assert_eq!(read_u24(&message[1..4]), message.len() - 4);
@@ -848,4 +1334,93 @@ mod tests {
assert_eq!(profile.ticket_record_sizes, vec![220, 180]);
assert_eq!(profile.source, TlsProfileSource::Raw);
}
#[test]
fn test_order_profiles_prioritizes_fresh_cached_winner() {
let strategy = TlsFetchStrategy {
profiles: vec![
TlsFetchProfile::ModernChromeLike,
TlsFetchProfile::CompatTls12,
TlsFetchProfile::LegacyMinimal,
],
strict_route: true,
attempt_timeout: Duration::from_secs(1),
total_budget: Duration::from_secs(2),
grease_enabled: false,
deterministic: false,
profile_cache_ttl: Duration::from_secs(60),
};
let cache_key = profile_cache_key(
"mask.example",
443,
"tls.example",
None,
Some("tls"),
0,
None,
);
profile_cache().remove(&cache_key);
profile_cache().insert(
cache_key.clone(),
ProfileCacheValue {
profile: TlsFetchProfile::CompatTls12,
updated_at: Instant::now(),
},
);
let ordered = order_profiles(&strategy, Some(&cache_key), Instant::now());
assert_eq!(ordered[0], TlsFetchProfile::CompatTls12);
profile_cache().remove(&cache_key);
}
#[test]
fn test_order_profiles_drops_expired_cached_winner() {
let strategy = TlsFetchStrategy {
profiles: vec![
TlsFetchProfile::ModernFirefoxLike,
TlsFetchProfile::CompatTls12,
],
strict_route: true,
attempt_timeout: Duration::from_secs(1),
total_budget: Duration::from_secs(2),
grease_enabled: false,
deterministic: false,
profile_cache_ttl: Duration::from_secs(5),
};
let cache_key =
profile_cache_key("mask2.example", 443, "tls2.example", None, None, 0, None);
profile_cache().remove(&cache_key);
profile_cache().insert(
cache_key.clone(),
ProfileCacheValue {
profile: TlsFetchProfile::CompatTls12,
updated_at: Instant::now() - Duration::from_secs(6),
},
);
let ordered = order_profiles(&strategy, Some(&cache_key), Instant::now());
assert_eq!(ordered[0], TlsFetchProfile::ModernFirefoxLike);
assert!(profile_cache().get(&cache_key).is_none());
}
#[test]
fn test_deterministic_client_hello_is_stable() {
let rng = SecureRandom::new();
let first = build_client_hello(
"stable.example",
&rng,
TlsFetchProfile::ModernChromeLike,
true,
true,
);
let second = build_client_hello(
"stable.example",
&rng,
TlsFetchProfile::ModernChromeLike,
true,
true,
);
assert_eq!(first, second);
}
}
+39 -63
View File
@@ -11,17 +11,19 @@ use tracing::{debug, info, warn};
use crate::config::ProxyConfig;
use crate::error::Result;
use crate::transport::UpstreamManager;
use super::MePool;
use super::http_fetch::https_get;
use super::rotation::{MeReinitTrigger, enqueue_reinit_trigger};
use super::secret::download_proxy_secret_with_max_len;
use super::secret::download_proxy_secret_with_max_len_via_upstream;
use super::selftest::record_timeskew_sample;
use std::time::SystemTime;
async fn retry_fetch(url: &str) -> Option<ProxyConfigData> {
async fn retry_fetch(url: &str, upstream: Option<Arc<UpstreamManager>>) -> Option<ProxyConfigData> {
let delays = [1u64, 5, 15];
for (i, d) in delays.iter().enumerate() {
match fetch_proxy_config(url).await {
match fetch_proxy_config_via_upstream(url, upstream.clone()).await {
Ok(cfg) => return Some(cfg),
Err(e) => {
if i == delays.len() - 1 {
@@ -95,14 +97,19 @@ pub async fn save_proxy_config_cache(path: &str, raw_text: &str) -> Result<()> {
Ok(())
}
#[allow(dead_code)]
pub async fn fetch_proxy_config_with_raw(url: &str) -> Result<(ProxyConfigData, String)> {
let resp = reqwest::get(url).await.map_err(|e| {
crate::error::ProxyError::Proxy(format!("fetch_proxy_config GET failed: {e}"))
})?;
let http_status = resp.status().as_u16();
fetch_proxy_config_with_raw_via_upstream(url, None).await
}
if let Some(date) = resp.headers().get(reqwest::header::DATE)
&& let Ok(date_str) = date.to_str()
pub async fn fetch_proxy_config_with_raw_via_upstream(
url: &str,
upstream: Option<Arc<UpstreamManager>>,
) -> Result<(ProxyConfigData, String)> {
let resp = https_get(url, upstream).await?;
let http_status = resp.status;
if let Some(date_str) = resp.date_header.as_deref()
&& let Ok(server_time) = httpdate::parse_http_date(date_str)
&& let Ok(skew) = SystemTime::now()
.duration_since(server_time)
@@ -123,9 +130,7 @@ pub async fn fetch_proxy_config_with_raw(url: &str) -> Result<(ProxyConfigData,
}
}
let text = resp.text().await.map_err(|e| {
crate::error::ProxyError::Proxy(format!("fetch_proxy_config read failed: {e}"))
})?;
let text = String::from_utf8_lossy(&resp.body).into_owned();
let parsed = parse_proxy_config_text(&text, http_status);
Ok((parsed, text))
}
@@ -260,8 +265,16 @@ fn parse_proxy_line(line: &str) -> Option<(i32, IpAddr, u16)> {
Some((dc, ip, port))
}
#[allow(dead_code)]
pub async fn fetch_proxy_config(url: &str) -> Result<ProxyConfigData> {
fetch_proxy_config_with_raw(url)
fetch_proxy_config_via_upstream(url, None).await
}
pub async fn fetch_proxy_config_via_upstream(
url: &str,
upstream: Option<Arc<UpstreamManager>>,
) -> Result<ProxyConfigData> {
fetch_proxy_config_with_raw_via_upstream(url, upstream)
.await
.map(|(parsed, _raw)| parsed)
}
@@ -300,53 +313,7 @@ async fn run_update_cycle(
state: &mut UpdaterState,
reinit_tx: &mpsc::Sender<MeReinitTrigger>,
) {
pool.update_runtime_reinit_policy(
cfg.general.hardswap,
cfg.general.me_pool_drain_ttl_secs,
cfg.general.me_instadrain,
cfg.general.me_pool_drain_threshold,
cfg.general.me_pool_drain_soft_evict_enabled,
cfg.general.me_pool_drain_soft_evict_grace_secs,
cfg.general.me_pool_drain_soft_evict_per_writer,
cfg.general.me_pool_drain_soft_evict_budget_per_core,
cfg.general.me_pool_drain_soft_evict_cooldown_ms,
cfg.general.effective_me_pool_force_close_secs(),
cfg.general.me_pool_min_fresh_ratio,
cfg.general.me_hardswap_warmup_delay_min_ms,
cfg.general.me_hardswap_warmup_delay_max_ms,
cfg.general.me_hardswap_warmup_extra_passes,
cfg.general.me_hardswap_warmup_pass_backoff_base_ms,
cfg.general.me_bind_stale_mode,
cfg.general.me_bind_stale_ttl_secs,
cfg.general.me_secret_atomic_snapshot,
cfg.general.me_deterministic_writer_sort,
cfg.general.me_writer_pick_mode,
cfg.general.me_writer_pick_sample_size,
cfg.general.me_single_endpoint_shadow_writers,
cfg.general.me_single_endpoint_outage_mode_enabled,
cfg.general.me_single_endpoint_outage_disable_quarantine,
cfg.general.me_single_endpoint_outage_backoff_min_ms,
cfg.general.me_single_endpoint_outage_backoff_max_ms,
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
cfg.general.me_floor_mode,
cfg.general.me_adaptive_floor_idle_secs,
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
cfg.general.me_adaptive_floor_min_writers_multi_endpoint,
cfg.general.me_adaptive_floor_recover_grace_secs,
cfg.general.me_adaptive_floor_writers_per_core_total,
cfg.general.me_adaptive_floor_cpu_cores_override,
cfg.general
.me_adaptive_floor_max_extra_writers_single_per_core,
cfg.general
.me_adaptive_floor_max_extra_writers_multi_per_core,
cfg.general.me_adaptive_floor_max_active_writers_per_core,
cfg.general.me_adaptive_floor_max_warm_writers_per_core,
cfg.general.me_adaptive_floor_max_active_writers_global,
cfg.general.me_adaptive_floor_max_warm_writers_global,
cfg.general.me_health_interval_ms_unhealthy,
cfg.general.me_health_interval_ms_healthy,
cfg.general.me_warn_rate_limit_ms,
);
let upstream = pool.upstream.clone();
let required_cfg_snapshots = cfg.general.me_config_stable_snapshots.max(1);
let required_secret_snapshots = cfg.general.proxy_secret_stable_snapshots.max(1);
@@ -354,7 +321,7 @@ async fn run_update_cycle(
let mut maps_changed = false;
let mut ready_v4: Option<(ProxyConfigData, u64)> = None;
let cfg_v4 = retry_fetch("https://core.telegram.org/getProxyConfig").await;
let cfg_v4 = retry_fetch("https://core.telegram.org/getProxyConfig", upstream.clone()).await;
if let Some(cfg_v4) = cfg_v4
&& snapshot_passes_guards(cfg, &cfg_v4, "getProxyConfig")
{
@@ -378,7 +345,11 @@ async fn run_update_cycle(
}
let mut ready_v6: Option<(ProxyConfigData, u64)> = None;
let cfg_v6 = retry_fetch("https://core.telegram.org/getProxyConfigV6").await;
let cfg_v6 = retry_fetch(
"https://core.telegram.org/getProxyConfigV6",
upstream.clone(),
)
.await;
if let Some(cfg_v6) = cfg_v6
&& snapshot_passes_guards(cfg, &cfg_v6, "getProxyConfigV6")
{
@@ -456,7 +427,12 @@ async fn run_update_cycle(
pool.reset_stun_state();
if cfg.general.proxy_secret_rotate_runtime {
match download_proxy_secret_with_max_len(cfg.general.proxy_secret_len_max).await {
match download_proxy_secret_with_max_len_via_upstream(
cfg.general.proxy_secret_len_max,
upstream,
)
.await
{
Ok(secret) => {
let secret_hash = hash_secret(&secret);
let stable_hits = state.secret.observe(secret_hash);
+7 -4
View File
@@ -161,7 +161,7 @@ impl MePool {
} else {
let connect_fut = async {
if addr.is_ipv6()
&& let Some(v6) = self.detected_ipv6
&& let Some(v6) = self.nat_runtime.detected_ipv6
{
match TcpSocket::new_v6() {
Ok(sock) => {
@@ -305,7 +305,7 @@ impl MePool {
}
MeSocksKdfPolicy::Compat => {
self.stats.increment_me_socks_kdf_compat_fallback();
if self.nat_probe {
if self.nat_runtime.nat_probe {
let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress);
self.maybe_reflect_public_addr(family, bind_ip).await
} else {
@@ -313,7 +313,7 @@ impl MePool {
}
}
}
} else if self.nat_probe {
} else if self.nat_runtime.nat_probe {
let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress);
self.maybe_reflect_public_addr(family, bind_ip).await
} else {
@@ -343,7 +343,10 @@ impl MePool {
.unwrap_or_default()
.as_secs() as u32;
let secret_atomic_snapshot = self.secret_atomic_snapshot.load(Ordering::Relaxed);
let secret_atomic_snapshot = self
.writer_selection_policy
.secret_atomic_snapshot
.load(Ordering::Relaxed);
let (ks, secret) = if secret_atomic_snapshot {
let snapshot = self.secret_snapshot().await;
(snapshot.key_selector, snapshot.secret)
+256 -100
View File
@@ -7,6 +7,8 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use rand::RngExt;
use tokio::sync::Semaphore;
use tokio::task::JoinSet;
use tracing::{debug, info, warn};
use crate::config::MeFloorMode;
@@ -14,6 +16,7 @@ use crate::crypto::SecureRandom;
use crate::network::IpFamily;
use super::MePool;
use super::pool::MeFamilyRuntimeState;
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
#[allow(dead_code)]
@@ -27,6 +30,9 @@ const HEALTH_RECONNECT_BUDGET_PER_CORE: usize = 2;
const HEALTH_RECONNECT_BUDGET_PER_DC: usize = 1;
const HEALTH_RECONNECT_BUDGET_MIN: usize = 4;
const HEALTH_RECONNECT_BUDGET_MAX: usize = 128;
const FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD: u32 = 5;
const FAMILY_SUPPRESS_DURATION_SECS: u64 = 60;
const FAMILY_RECOVER_SUCCESS_STREAK_TARGET: u32 = 2;
const HEALTH_DRAIN_CLOSE_BUDGET_PER_CORE: usize = 16;
const HEALTH_DRAIN_CLOSE_BUDGET_MIN: usize = 16;
const HEALTH_DRAIN_CLOSE_BUDGET_MAX: usize = 256;
@@ -56,6 +62,17 @@ struct FamilyFloorPlan {
target_writers_total: usize,
}
#[derive(Debug)]
struct FamilyReconnectOutcome {
key: (i32, IpFamily),
dc: i32,
family: IpFamily,
alive: usize,
required: usize,
endpoint_count: usize,
restored: usize,
}
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new();
let mut next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
@@ -78,6 +95,7 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
};
tokio::time::sleep(interval).await;
pool.prune_closed_writers().await;
pool.sweep_endpoint_quarantine().await;
reap_draining_writers(&pool, &mut drain_warn_next_allowed).await;
let v4_degraded = check_family(
IpFamily::V4,
@@ -113,6 +131,8 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut floor_warn_next_allowed,
)
.await;
update_family_runtime_state(&pool, IpFamily::V4, v4_degraded);
update_family_runtime_state(&pool, IpFamily::V6, v6_degraded);
degraded_interval = v4_degraded || v6_degraded;
}
}
@@ -135,9 +155,11 @@ pub(super) async fn reap_draining_writers(
let now_epoch_secs = MePool::now_epoch_secs();
let now = Instant::now();
let drain_ttl_secs = pool
.drain_runtime
.me_pool_drain_ttl_secs
.load(std::sync::atomic::Ordering::Relaxed);
let drain_threshold = pool
.drain_runtime
.me_pool_drain_threshold
.load(std::sync::atomic::Ordering::Relaxed);
let activity = pool.registry.writer_activity_snapshot().await;
@@ -221,7 +243,10 @@ pub(super) async fn reap_draining_writers(
endpoint = %writer.addr,
generation = writer.generation,
drain_ttl_secs,
force_close_secs = pool.me_pool_force_close_secs.load(std::sync::atomic::Ordering::Relaxed),
force_close_secs = pool
.drain_runtime
.me_pool_force_close_secs
.load(std::sync::atomic::Ordering::Relaxed),
allow_drain_fallback = writer.allow_drain_fallback,
"ME draining writer remains non-empty past drain TTL"
);
@@ -365,7 +390,8 @@ async fn check_family(
endpoints.sort_unstable();
endpoints.dedup();
}
let mut reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len());
let reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len());
let reconnect_sem = Arc::new(Semaphore::new(reconnect_budget));
if pool.floor_mode() == MeFloorMode::Static {
adaptive_idle_since.clear();
@@ -422,6 +448,10 @@ async fn check_family(
floor_plan.active_writers_current,
floor_plan.warm_writers_current,
);
let live_writer_ids_by_addr = Arc::new(live_writer_ids_by_addr);
let writer_idle_since = Arc::new(writer_idle_since);
let bound_clients_by_writer = Arc::new(bound_clients_by_writer);
let mut reconnect_set = JoinSet::<FamilyReconnectOutcome>::new();
for (dc, endpoints) in dc_endpoints {
if endpoints.is_empty() {
@@ -461,7 +491,7 @@ async fn check_family(
required,
outage_backoff,
outage_next_attempt,
&mut reconnect_budget,
&reconnect_sem,
)
.await;
continue;
@@ -495,9 +525,9 @@ async fn check_family(
&endpoints,
alive,
required,
&live_writer_ids_by_addr,
&writer_idle_since,
&bound_clients_by_writer,
live_writer_ids_by_addr.as_ref(),
writer_idle_since.as_ref(),
bound_clients_by_writer.as_ref(),
idle_refresh_next_attempt,
)
.await;
@@ -510,8 +540,8 @@ async fn check_family(
&endpoints,
alive,
required,
&live_writer_ids_by_addr,
&bound_clients_by_writer,
live_writer_ids_by_addr.as_ref(),
bound_clients_by_writer.as_ref(),
shadow_rotate_deadline,
)
.await;
@@ -521,8 +551,8 @@ async fn check_family(
family_degraded = true;
let now = Instant::now();
if reconnect_budget == 0 {
let base_ms = pool.me_reconnect_backoff_base.as_millis() as u64;
if reconnect_sem.available_permits() == 0 {
let base_ms = pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64;
let next_ms = (*backoff.get(&key).unwrap_or(&base_ms)).max(base_ms);
let jitter = next_ms / JITTER_FRAC_NUM;
let wait = Duration::from_millis(next_ms)
@@ -545,7 +575,10 @@ async fn check_family(
continue;
}
let max_concurrent = pool.me_reconnect_max_concurrent_per_dc.max(1) as usize;
let max_concurrent = pool
.reconnect_runtime
.me_reconnect_max_concurrent_per_dc
.max(1) as usize;
if *inflight.get(&key).unwrap_or(&0) >= max_concurrent {
continue;
}
@@ -564,117 +597,165 @@ async fn check_family(
continue;
}
*inflight.entry(key).or_insert(0) += 1;
let mut restored = 0usize;
for _ in 0..missing {
if reconnect_budget == 0 {
break;
}
reconnect_budget = reconnect_budget.saturating_sub(1);
if pool.active_contour_writer_count_total().await
>= floor_plan.active_cap_effective_total
{
let swapped = maybe_swap_idle_writer_for_cap(
pool,
rng,
dc,
family,
&endpoints,
&live_writer_ids_by_addr,
&writer_idle_since,
&bound_clients_by_writer,
let pool_for_reconnect = pool.clone();
let rng_for_reconnect = rng.clone();
let reconnect_sem_for_dc = reconnect_sem.clone();
let endpoints_for_dc = endpoints.clone();
let live_writer_ids_by_addr_for_dc = live_writer_ids_by_addr.clone();
let writer_idle_since_for_dc = writer_idle_since.clone();
let bound_clients_by_writer_for_dc = bound_clients_by_writer.clone();
let active_cap_effective_total = floor_plan.active_cap_effective_total;
reconnect_set.spawn(async move {
let mut restored = 0usize;
for _ in 0..missing {
let Ok(reconnect_permit) = reconnect_sem_for_dc.clone().try_acquire_owned() else {
break;
};
if pool_for_reconnect.active_contour_writer_count_total().await
>= active_cap_effective_total
{
let swapped = maybe_swap_idle_writer_for_cap(
&pool_for_reconnect,
&rng_for_reconnect,
dc,
family,
&endpoints_for_dc,
live_writer_ids_by_addr_for_dc.as_ref(),
writer_idle_since_for_dc.as_ref(),
bound_clients_by_writer_for_dc.as_ref(),
)
.await;
if swapped {
pool_for_reconnect
.stats
.increment_me_floor_swap_idle_total();
restored += 1;
continue;
}
pool_for_reconnect
.stats
.increment_me_floor_cap_block_total();
pool_for_reconnect
.stats
.increment_me_floor_swap_idle_failed_total();
debug!(
dc = %dc,
?family,
alive,
required,
active_cap_effective_total,
"Adaptive floor cap reached, reconnect attempt blocked"
);
break;
}
let res = tokio::time::timeout(
pool_for_reconnect.reconnect_runtime.me_one_timeout,
pool_for_reconnect.connect_endpoints_round_robin(
dc,
&endpoints_for_dc,
rng_for_reconnect.as_ref(),
),
)
.await;
if swapped {
pool.stats.increment_me_floor_swap_idle_total();
restored += 1;
continue;
match res {
Ok(true) => {
restored += 1;
pool_for_reconnect.stats.increment_me_reconnect_success();
}
Ok(false) => {
pool_for_reconnect.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME round-robin reconnect failed")
}
Err(_) => {
pool_for_reconnect.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME reconnect timed out");
}
}
pool.stats.increment_me_floor_cap_block_total();
pool.stats.increment_me_floor_swap_idle_failed_total();
debug!(
dc = %dc,
?family,
alive,
required,
active_cap_effective_total = floor_plan.active_cap_effective_total,
"Adaptive floor cap reached, reconnect attempt blocked"
);
break;
drop(reconnect_permit);
}
let res = tokio::time::timeout(
pool.me_one_timeout,
pool.connect_endpoints_round_robin(dc, &endpoints, rng.as_ref()),
)
.await;
match res {
Ok(true) => {
restored += 1;
pool.stats.increment_me_reconnect_success();
}
Ok(false) => {
pool.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME round-robin reconnect failed")
}
Err(_) => {
pool.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME reconnect timed out");
}
}
}
let now_alive = alive + restored;
if now_alive >= required {
info!(
dc = %dc,
?family,
alive = now_alive,
FamilyReconnectOutcome {
key,
dc,
family,
alive,
required,
endpoint_count = endpoints.len(),
endpoint_count: endpoints_for_dc.len(),
restored,
}
});
}
while let Some(joined) = reconnect_set.join_next().await {
let outcome = match joined {
Ok(outcome) => outcome,
Err(join_error) => {
debug!(error = %join_error, "Health reconnect task failed");
continue;
}
};
let now = Instant::now();
let now_alive = outcome.alive + outcome.restored;
if now_alive >= outcome.required {
info!(
dc = %outcome.dc,
family = ?outcome.family,
alive = now_alive,
required = outcome.required,
endpoint_count = outcome.endpoint_count,
"ME writer floor restored for DC"
);
backoff.insert(key, pool.me_reconnect_backoff_base.as_millis() as u64);
let jitter = pool.me_reconnect_backoff_base.as_millis() as u64 / JITTER_FRAC_NUM;
let wait = pool.me_reconnect_backoff_base
backoff.insert(
outcome.key,
pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64,
);
let jitter = pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64
/ JITTER_FRAC_NUM;
let wait = pool.reconnect_runtime.me_reconnect_backoff_base
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
next_attempt.insert(key, now + wait);
next_attempt.insert(outcome.key, now + wait);
} else {
let curr = *backoff
.get(&key)
.unwrap_or(&(pool.me_reconnect_backoff_base.as_millis() as u64));
let next_ms =
(curr.saturating_mul(2)).min(pool.me_reconnect_backoff_cap.as_millis() as u64);
backoff.insert(key, next_ms);
.get(&outcome.key)
.unwrap_or(&(pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64));
let next_ms = (curr.saturating_mul(2))
.min(pool.reconnect_runtime.me_reconnect_backoff_cap.as_millis() as u64);
backoff.insert(outcome.key, next_ms);
let jitter = next_ms / JITTER_FRAC_NUM;
let wait = Duration::from_millis(next_ms)
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
next_attempt.insert(key, now + wait);
next_attempt.insert(outcome.key, now + wait);
if pool.is_runtime_ready() {
let warn_cooldown = pool.warn_rate_limit_duration();
if should_emit_rate_limited_warn(floor_warn_next_allowed, key, now, warn_cooldown) {
if should_emit_rate_limited_warn(
floor_warn_next_allowed,
outcome.key,
now,
warn_cooldown,
) {
warn!(
dc = %dc,
?family,
dc = %outcome.dc,
family = ?outcome.family,
alive = now_alive,
required,
endpoint_count = endpoints.len(),
required = outcome.required,
endpoint_count = outcome.endpoint_count,
backoff_ms = next_ms,
"DC writer floor is below required level, scheduled reconnect"
);
}
} else {
info!(
dc = %dc,
?family,
dc = %outcome.dc,
family = ?outcome.family,
alive = now_alive,
required,
endpoint_count = endpoints.len(),
required = outcome.required,
endpoint_count = outcome.endpoint_count,
backoff_ms = next_ms,
"DC writer floor is below required level during startup, scheduled reconnect"
);
}
}
if let Some(v) = inflight.get_mut(&key) {
if let Some(v) = inflight.get_mut(&outcome.key) {
*v = v.saturating_sub(1);
}
}
@@ -691,6 +772,68 @@ fn health_reconnect_budget(pool: &Arc<MePool>, dc_groups: usize) -> usize {
.clamp(HEALTH_RECONNECT_BUDGET_MIN, HEALTH_RECONNECT_BUDGET_MAX)
}
fn update_family_runtime_state(pool: &Arc<MePool>, family: IpFamily, degraded: bool) {
let now_epoch_secs = MePool::now_epoch_secs();
let previous_state = pool.family_runtime_state(family);
let mut state_since_epoch_secs = pool.family_runtime_state_since_epoch_secs(family);
let previous_suppressed_until_epoch_secs = pool.family_suppressed_until_epoch_secs(family);
let previous_fail_streak = pool.family_fail_streak(family);
let previous_recover_success_streak = pool.family_recover_success_streak(family);
let (next_state, suppressed_until_epoch_secs, fail_streak, recover_success_streak) =
if previous_suppressed_until_epoch_secs > now_epoch_secs {
let fail_streak = if degraded {
previous_fail_streak.saturating_add(1)
} else {
previous_fail_streak
};
(
MeFamilyRuntimeState::Suppressed,
previous_suppressed_until_epoch_secs,
fail_streak,
0,
)
} else if degraded {
let fail_streak = previous_fail_streak.saturating_add(1);
if fail_streak >= FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD {
(
MeFamilyRuntimeState::Suppressed,
now_epoch_secs.saturating_add(FAMILY_SUPPRESS_DURATION_SECS),
fail_streak,
0,
)
} else {
(MeFamilyRuntimeState::Degraded, 0, fail_streak, 0)
}
} else if matches!(previous_state, MeFamilyRuntimeState::Healthy) {
(MeFamilyRuntimeState::Healthy, 0, 0, 0)
} else {
let recover_success_streak = previous_recover_success_streak.saturating_add(1);
if recover_success_streak >= FAMILY_RECOVER_SUCCESS_STREAK_TARGET {
(MeFamilyRuntimeState::Healthy, 0, 0, 0)
} else {
(
MeFamilyRuntimeState::Recovering,
0,
0,
recover_success_streak,
)
}
};
if next_state != previous_state || state_since_epoch_secs == 0 {
state_since_epoch_secs = now_epoch_secs;
}
pool.set_family_runtime_state(
family,
next_state,
state_since_epoch_secs,
suppressed_until_epoch_secs,
fail_streak,
recover_success_streak,
);
}
fn should_emit_rate_limited_warn(
next_allowed: &mut HashMap<(i32, IpFamily), Instant>,
key: (i32, IpFamily),
@@ -715,6 +858,7 @@ fn adaptive_floor_class_min(
) -> usize {
if endpoint_count <= 1 {
let min_single = (pool
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint
.load(std::sync::atomic::Ordering::Relaxed) as usize)
.max(1);
@@ -971,7 +1115,7 @@ async fn maybe_swap_idle_writer_for_cap(
};
let connected = match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
)
.await
@@ -1077,7 +1221,7 @@ async fn maybe_refresh_idle_writer_for_dc(
};
let rotate_ok = match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
)
.await
@@ -1188,7 +1332,7 @@ async fn recover_single_endpoint_outage(
required: usize,
outage_backoff: &mut HashMap<(i32, IpFamily), u64>,
outage_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
reconnect_budget: &mut usize,
reconnect_sem: &Arc<Semaphore>,
) {
let now = Instant::now();
if let Some(ts) = outage_next_attempt.get(&key)
@@ -1198,7 +1342,7 @@ async fn recover_single_endpoint_outage(
}
let (min_backoff_ms, max_backoff_ms) = pool.single_endpoint_outage_backoff_bounds_ms();
if *reconnect_budget == 0 {
if reconnect_sem.available_permits() == 0 {
outage_next_attempt.insert(key, now + Duration::from_millis(min_backoff_ms.max(250)));
debug!(
dc = %key.0,
@@ -1209,7 +1353,17 @@ async fn recover_single_endpoint_outage(
);
return;
}
*reconnect_budget = (*reconnect_budget).saturating_sub(1);
let Ok(_reconnect_permit) = reconnect_sem.clone().try_acquire_owned() else {
outage_next_attempt.insert(key, now + Duration::from_millis(min_backoff_ms.max(250)));
debug!(
dc = %key.0,
family = ?key.1,
%endpoint,
required,
"Single-endpoint outage reconnect deferred by semaphore saturation"
);
return;
};
pool.stats
.increment_me_single_endpoint_outage_reconnect_attempt_total();
@@ -1218,7 +1372,7 @@ async fn recover_single_endpoint_outage(
pool.stats
.increment_me_single_endpoint_quarantine_bypass_total();
match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_one_for_dc(endpoint, key.0, rng.as_ref()),
)
.await
@@ -1247,7 +1401,7 @@ async fn recover_single_endpoint_outage(
} else {
let one_endpoint = [endpoint];
match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_endpoints_round_robin(key.0, &one_endpoint, rng.as_ref()),
)
.await
@@ -1372,7 +1526,7 @@ async fn maybe_rotate_single_endpoint_shadow(
};
let rotate_ok = match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
)
.await
@@ -1687,6 +1841,8 @@ mod tests {
general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms,
)
+183
View File
@@ -0,0 +1,183 @@
use std::sync::Arc;
use std::time::Duration;
use http_body_util::{BodyExt, Empty};
use hyper::header::{CONNECTION, DATE, HOST, USER_AGENT};
use hyper::{Method, Request};
use hyper_util::rt::TokioIo;
use rustls::pki_types::ServerName;
use tokio::net::TcpStream;
use tokio::time::timeout;
use tokio_rustls::TlsConnector;
use tracing::debug;
use crate::error::{ProxyError, Result};
use crate::network::dns_overrides::resolve_socket_addr;
use crate::transport::{UpstreamManager, UpstreamStream};
const HTTP_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
const HTTP_REQUEST_TIMEOUT: Duration = Duration::from_secs(15);
pub(crate) struct HttpsGetResponse {
pub(crate) status: u16,
pub(crate) date_header: Option<String>,
pub(crate) body: Vec<u8>,
}
fn build_tls_client_config() -> Arc<rustls::ClientConfig> {
let mut root_store = rustls::RootCertStore::empty();
root_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
let provider = rustls::crypto::ring::default_provider();
let config = rustls::ClientConfig::builder_with_provider(Arc::new(provider))
.with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])
.expect("HTTPS fetch rustls protocol versions must be valid")
.with_root_certificates(root_store)
.with_no_client_auth();
Arc::new(config)
}
fn extract_host_port_path(url: &str) -> Result<(String, u16, String)> {
let parsed =
url::Url::parse(url).map_err(|e| ProxyError::Proxy(format!("invalid URL '{url}': {e}")))?;
if parsed.scheme() != "https" {
return Err(ProxyError::Proxy(format!(
"unsupported URL scheme '{}': only https is supported",
parsed.scheme()
)));
}
let host = parsed
.host_str()
.ok_or_else(|| ProxyError::Proxy(format!("URL has no host: {url}")))?
.to_string();
let port = parsed
.port_or_known_default()
.ok_or_else(|| ProxyError::Proxy(format!("URL has no known port: {url}")))?;
let mut path = parsed.path().to_string();
if path.is_empty() {
path.push('/');
}
if let Some(query) = parsed.query() {
path.push('?');
path.push_str(query);
}
Ok((host, port, path))
}
async fn resolve_target_addr(host: &str, port: u16) -> Result<std::net::SocketAddr> {
if let Some(addr) = resolve_socket_addr(host, port) {
return Ok(addr);
}
let addrs: Vec<std::net::SocketAddr> = tokio::net::lookup_host((host, port))
.await
.map_err(|e| ProxyError::Proxy(format!("DNS resolve failed for {host}:{port}: {e}")))?
.collect();
if let Some(addr) = addrs.iter().copied().find(|addr| addr.is_ipv4()) {
return Ok(addr);
}
addrs
.first()
.copied()
.ok_or_else(|| ProxyError::Proxy(format!("DNS returned no addresses for {host}:{port}")))
}
async fn connect_https_transport(
host: &str,
port: u16,
upstream: Option<Arc<UpstreamManager>>,
) -> Result<UpstreamStream> {
if let Some(manager) = upstream {
let target = resolve_target_addr(host, port).await?;
return timeout(HTTP_CONNECT_TIMEOUT, manager.connect(target, None, None))
.await
.map_err(|_| ProxyError::Proxy(format!("upstream connect timeout for {host}:{port}")))?
.map_err(|e| {
ProxyError::Proxy(format!("upstream connect failed for {host}:{port}: {e}"))
});
}
if let Some(addr) = resolve_socket_addr(host, port) {
let stream = timeout(HTTP_CONNECT_TIMEOUT, TcpStream::connect(addr))
.await
.map_err(|_| ProxyError::Proxy(format!("connect timeout for {host}:{port}")))?
.map_err(|e| ProxyError::Proxy(format!("connect failed for {host}:{port}: {e}")))?;
return Ok(UpstreamStream::Tcp(stream));
}
let stream = timeout(HTTP_CONNECT_TIMEOUT, TcpStream::connect((host, port)))
.await
.map_err(|_| ProxyError::Proxy(format!("connect timeout for {host}:{port}")))?
.map_err(|e| ProxyError::Proxy(format!("connect failed for {host}:{port}: {e}")))?;
Ok(UpstreamStream::Tcp(stream))
}
pub(crate) async fn https_get(
url: &str,
upstream: Option<Arc<UpstreamManager>>,
) -> Result<HttpsGetResponse> {
let (host, port, path_and_query) = extract_host_port_path(url)?;
let stream = connect_https_transport(&host, port, upstream).await?;
let server_name = ServerName::try_from(host.clone())
.map_err(|_| ProxyError::Proxy(format!("invalid TLS server name: {host}")))?;
let connector = TlsConnector::from(build_tls_client_config());
let tls_stream = timeout(HTTP_REQUEST_TIMEOUT, connector.connect(server_name, stream))
.await
.map_err(|_| ProxyError::Proxy(format!("TLS handshake timeout for {host}:{port}")))?
.map_err(|e| ProxyError::Proxy(format!("TLS handshake failed for {host}:{port}: {e}")))?;
let (mut sender, connection) = hyper::client::conn::http1::handshake(TokioIo::new(tls_stream))
.await
.map_err(|e| ProxyError::Proxy(format!("HTTP handshake failed for {host}:{port}: {e}")))?;
tokio::spawn(async move {
if let Err(e) = connection.await {
debug!(error = %e, "HTTPS fetch connection task failed");
}
});
let host_header = if port == 443 {
host.clone()
} else {
format!("{host}:{port}")
};
let request = Request::builder()
.method(Method::GET)
.uri(path_and_query)
.header(HOST, host_header)
.header(USER_AGENT, "telemt-middle-proxy/1")
.header(CONNECTION, "close")
.body(Empty::<bytes::Bytes>::new())
.map_err(|e| ProxyError::Proxy(format!("build HTTP request failed for {url}: {e}")))?;
let response = timeout(HTTP_REQUEST_TIMEOUT, sender.send_request(request))
.await
.map_err(|_| ProxyError::Proxy(format!("HTTP request timeout for {url}")))?
.map_err(|e| ProxyError::Proxy(format!("HTTP request failed for {url}: {e}")))?;
let status = response.status().as_u16();
let date_header = response
.headers()
.get(DATE)
.and_then(|value| value.to_str().ok())
.map(|value| value.to_string());
let body = timeout(HTTP_REQUEST_TIMEOUT, response.into_body().collect())
.await
.map_err(|_| ProxyError::Proxy(format!("HTTP body read timeout for {url}")))?
.map_err(|e| ProxyError::Proxy(format!("HTTP body read failed for {url}: {e}")))?
.to_bytes()
.to_vec();
Ok(HttpsGetResponse {
status,
date_header,
body,
})
}
+5 -2
View File
@@ -13,6 +13,7 @@ mod health_integration_tests;
#[cfg(test)]
#[path = "tests/health_regression_tests.rs"]
mod health_regression_tests;
mod http_fetch;
mod ping;
mod pool;
mod pool_config;
@@ -44,7 +45,8 @@ use bytes::Bytes;
#[allow(unused_imports)]
pub use config_updater::{
ProxyConfigData, fetch_proxy_config, fetch_proxy_config_with_raw, load_proxy_config_cache,
ProxyConfigData, fetch_proxy_config, fetch_proxy_config_via_upstream,
fetch_proxy_config_with_raw, fetch_proxy_config_with_raw_via_upstream, load_proxy_config_cache,
me_config_updater, save_proxy_config_cache,
};
pub use health::{me_drain_timeout_enforcer, me_health_monitor, me_zombie_writer_watchdog};
@@ -57,7 +59,8 @@ pub use pool::MePool;
pub use pool_nat::{detect_public_ip, stun_probe};
pub use registry::ConnRegistry;
pub use rotation::{MeReinitTrigger, me_reinit_scheduler, me_rotation_task};
pub use secret::fetch_proxy_secret;
#[allow(unused_imports)]
pub use secret::{fetch_proxy_secret, fetch_proxy_secret_with_upstream};
pub(crate) use selftest::{bnd_snapshot, timeskew_snapshot, upstream_bnd_snapshots};
pub use wire::proto_flags_for_tag;
File diff suppressed because it is too large Load Diff
+2 -2
View File
@@ -72,7 +72,7 @@ impl MePool {
}
if changed {
self.rebuild_endpoint_dc_map().await;
self.writer_available.notify_waiters();
self.notify_writer_epoch();
}
if changed {
SnapshotApplyOutcome::AppliedChanged
@@ -112,7 +112,7 @@ impl MePool {
pub async fn reconnect_all(self: &Arc<Self>) {
let ws = self.writers.read().await.clone();
for w in ws {
for w in ws.iter() {
if let Ok(()) = self
.connect_one_for_dc(w.addr, w.writer_dc, self.rng.as_ref())
.await
+10 -5
View File
@@ -14,7 +14,10 @@ use super::pool::MePool;
impl MePool {
pub async fn init(self: &Arc<Self>, pool_size: usize, rng: &Arc<SecureRandom>) -> Result<()> {
let family_order = self.family_order();
let connect_concurrency = self.me_reconnect_max_concurrent_per_dc.max(1) as usize;
let connect_concurrency = self
.reconnect_runtime
.me_reconnect_max_concurrent_per_dc
.max(1) as usize;
let ks = self.key_selector().await;
info!(
me_servers = self.proxy_map_v4.read().await.len(),
@@ -250,10 +253,12 @@ impl MePool {
return false;
}
if self.me_warmup_stagger_enabled {
let jitter =
rand::rng().random_range(0..=self.me_warmup_step_jitter.as_millis() as u64);
let delay_ms = self.me_warmup_step_delay.as_millis() as u64 + jitter;
if self.reconnect_runtime.me_warmup_stagger_enabled {
let jitter = rand::rng().random_range(
0..=self.reconnect_runtime.me_warmup_step_jitter.as_millis() as u64,
);
let delay_ms =
self.reconnect_runtime.me_warmup_step_delay.as_millis() as u64 + jitter;
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
}
}
+37 -27
View File
@@ -42,10 +42,10 @@ pub async fn detect_public_ip() -> Option<IpAddr> {
impl MePool {
fn configured_stun_servers(&self) -> Vec<String> {
if !self.nat_stun_servers.is_empty() {
return self.nat_stun_servers.clone();
if !self.nat_runtime.nat_stun_servers.is_empty() {
return self.nat_runtime.nat_stun_servers.clone();
}
if let Some(s) = &self.nat_stun
if let Some(s) = &self.nat_runtime.nat_stun
&& !s.trim().is_empty()
{
return vec![s.clone()];
@@ -64,7 +64,7 @@ impl MePool {
let mut next_idx = 0usize;
let mut live_servers = Vec::new();
let mut best_by_ip: HashMap<IpAddr, (usize, std::net::SocketAddr)> = HashMap::new();
let concurrency = self.nat_probe_concurrency.max(1);
let concurrency = self.nat_runtime.nat_probe_concurrency.max(1);
while next_idx < servers.len() || !join_set.is_empty() {
while next_idx < servers.len() && join_set.len() < concurrency {
@@ -137,9 +137,13 @@ impl MePool {
}
pub(super) fn translate_ip_for_nat(&self, ip: IpAddr) -> IpAddr {
let nat_ip = self
.nat_ip_cfg
.or_else(|| self.nat_ip_detected.try_read().ok().and_then(|g| *g));
let nat_ip = self.nat_runtime.nat_ip_cfg.or_else(|| {
self.nat_runtime
.nat_ip_detected
.try_read()
.ok()
.and_then(|g| *g)
});
let Some(nat_ip) = nat_ip else {
return ip;
@@ -163,7 +167,7 @@ impl MePool {
addr: std::net::SocketAddr,
reflected: Option<std::net::SocketAddr>,
) -> std::net::SocketAddr {
let ip = if let Some(nat_ip) = self.nat_ip_cfg {
let ip = if let Some(nat_ip) = self.nat_runtime.nat_ip_cfg {
match (addr.ip(), nat_ip) {
(IpAddr::V4(_), IpAddr::V4(dst)) => IpAddr::V4(dst),
(IpAddr::V6(_), IpAddr::V6(dst)) => IpAddr::V6(dst),
@@ -185,22 +189,22 @@ impl MePool {
}
pub(super) async fn maybe_detect_nat_ip(&self, local_ip: IpAddr) -> Option<IpAddr> {
if self.nat_ip_cfg.is_some() {
return self.nat_ip_cfg;
if self.nat_runtime.nat_ip_cfg.is_some() {
return self.nat_runtime.nat_ip_cfg;
}
if !(is_bogon(local_ip) || local_ip.is_loopback() || local_ip.is_unspecified()) {
return None;
}
if let Some(ip) = *self.nat_ip_detected.read().await {
if let Some(ip) = *self.nat_runtime.nat_ip_detected.read().await {
return Some(ip);
}
match fetch_public_ipv4_with_retry().await {
Ok(Some(ip)) => {
{
let mut guard = self.nat_ip_detected.write().await;
let mut guard = self.nat_runtime.nat_ip_detected.write().await;
*guard = Some(IpAddr::V4(ip));
}
info!(public_ip = %ip, "Auto-detected public IP for NAT translation");
@@ -231,10 +235,10 @@ impl MePool {
}
// Backoff window
if use_shared_cache
&& let Some(until) = *self.stun_backoff_until.read().await
&& let Some(until) = *self.nat_runtime.stun_backoff_until.read().await
&& Instant::now() < until
{
if let Ok(cache) = self.nat_reflection_cache.try_lock() {
if let Ok(cache) = self.nat_runtime.nat_reflection_cache.try_lock() {
let slot = match family {
IpFamily::V4 => cache.v4,
IpFamily::V6 => cache.v6,
@@ -244,7 +248,8 @@ impl MePool {
return None;
}
if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
if use_shared_cache && let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock()
{
let slot = match family {
IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6,
@@ -258,18 +263,18 @@ impl MePool {
let _singleflight_guard = if use_shared_cache {
Some(match family {
IpFamily::V4 => self.nat_reflection_singleflight_v4.lock().await,
IpFamily::V6 => self.nat_reflection_singleflight_v6.lock().await,
IpFamily::V4 => self.nat_runtime.nat_reflection_singleflight_v4.lock().await,
IpFamily::V6 => self.nat_runtime.nat_reflection_singleflight_v6.lock().await,
})
} else {
None
};
if use_shared_cache
&& let Some(until) = *self.stun_backoff_until.read().await
&& let Some(until) = *self.nat_runtime.stun_backoff_until.read().await
&& Instant::now() < until
{
if let Ok(cache) = self.nat_reflection_cache.try_lock() {
if let Ok(cache) = self.nat_runtime.nat_reflection_cache.try_lock() {
let slot = match family {
IpFamily::V4 => cache.v4,
IpFamily::V6 => cache.v6,
@@ -279,7 +284,8 @@ impl MePool {
return None;
}
if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
if use_shared_cache && let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock()
{
let slot = match family {
IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6,
@@ -292,13 +298,14 @@ impl MePool {
}
let attempt = if use_shared_cache {
self.nat_probe_attempts
self.nat_runtime
.nat_probe_attempts
.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
} else {
0
};
let configured_servers = self.configured_stun_servers();
let live_snapshot = self.nat_stun_live_servers.read().await.clone();
let live_snapshot = self.nat_runtime.nat_stun_live_servers.read().await.clone();
let primary_servers = if live_snapshot.is_empty() {
configured_servers.clone()
} else {
@@ -322,14 +329,15 @@ impl MePool {
let live_server_count = live_servers.len();
if !live_servers.is_empty() {
*self.nat_stun_live_servers.write().await = live_servers;
*self.nat_runtime.nat_stun_live_servers.write().await = live_servers;
} else {
self.nat_stun_live_servers.write().await.clear();
self.nat_runtime.nat_stun_live_servers.write().await.clear();
}
if let Some(reflected_addr) = selected_reflected {
if use_shared_cache {
self.nat_probe_attempts
self.nat_runtime
.nat_probe_attempts
.store(0, std::sync::atomic::Ordering::Relaxed);
}
info!(
@@ -338,7 +346,9 @@ impl MePool {
"STUN-Quorum reached, IP: {}",
reflected_addr.ip()
);
if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
if use_shared_cache
&& let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock()
{
let slot = match family {
IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6,
@@ -350,7 +360,7 @@ impl MePool {
if use_shared_cache {
let backoff = Duration::from_secs(60 * 2u64.pow((attempt as u32).min(6)));
*self.stun_backoff_until.write().await = Some(Instant::now() + backoff);
*self.nat_runtime.stun_backoff_until.write().await = Some(Instant::now() + backoff);
}
None
}
+38 -1
View File
@@ -13,13 +13,40 @@ use super::pool::{MePool, RefillDcKey, RefillEndpointKey, WriterContour};
const ME_FLAP_UPTIME_THRESHOLD_SECS: u64 = 20;
const ME_FLAP_QUARANTINE_SECS: u64 = 25;
const ME_FLAP_MIN_UPTIME_MILLIS: u64 = 500;
const ME_REFILL_TOTAL_ATTEMPT_CAP: u32 = 20;
impl MePool {
pub(super) async fn sweep_endpoint_quarantine(&self) {
let configured = self
.endpoint_dc_map
.read()
.await
.keys()
.copied()
.collect::<HashSet<SocketAddr>>();
let now = Instant::now();
let mut guard = self.endpoint_quarantine.lock().await;
guard.retain(|addr, expiry| *expiry > now && configured.contains(addr));
}
pub(super) async fn maybe_quarantine_flapping_endpoint(
&self,
addr: SocketAddr,
uptime: Duration,
reason: &'static str,
) {
if uptime < Duration::from_millis(ME_FLAP_MIN_UPTIME_MILLIS) {
debug!(
%addr,
reason,
uptime_ms = uptime.as_millis(),
min_uptime_ms = ME_FLAP_MIN_UPTIME_MILLIS,
"Skipping flap quarantine for ultra-short writer lifetime"
);
return;
}
if uptime > Duration::from_secs(ME_FLAP_UPTIME_THRESHOLD_SECS) {
return;
}
@@ -31,6 +58,7 @@ impl MePool {
self.stats.increment_me_endpoint_quarantine_total();
warn!(
%addr,
reason,
uptime_ms = uptime.as_millis(),
quarantine_secs = ME_FLAP_QUARANTINE_SECS,
"ME endpoint temporarily quarantined due to rapid writer flap"
@@ -205,11 +233,16 @@ impl MePool {
}
async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr, writer_dc: i32) -> bool {
let fast_retries = self.me_reconnect_fast_retry_count.max(1);
let fast_retries = self.reconnect_runtime.me_reconnect_fast_retry_count.max(1);
let mut total_attempts = 0u32;
let same_endpoint_quarantined = self.is_endpoint_quarantined(addr).await;
if !same_endpoint_quarantined {
for attempt in 0..fast_retries {
if total_attempts >= ME_REFILL_TOTAL_ATTEMPT_CAP {
break;
}
total_attempts = total_attempts.saturating_add(1);
self.stats.increment_me_reconnect_attempt();
match self
.connect_one_for_dc(addr, writer_dc, self.rng.as_ref())
@@ -250,6 +283,10 @@ impl MePool {
}
for attempt in 0..fast_retries {
if total_attempts >= ME_REFILL_TOTAL_ATTEMPT_CAP {
break;
}
total_attempts = total_attempts.saturating_add(1);
self.stats.increment_me_reconnect_attempt();
if self
.connect_endpoints_round_robin(writer_dc, &dc_endpoints, self.rng.as_ref())
+50 -20
View File
@@ -37,16 +37,23 @@ impl MePool {
}
fn clear_pending_hardswap_state(&self) {
self.pending_hardswap_generation.store(0, Ordering::Relaxed);
self.pending_hardswap_started_at_epoch_secs
self.reinit
.pending_hardswap_generation
.store(0, Ordering::Relaxed);
self.pending_hardswap_map_hash.store(0, Ordering::Relaxed);
self.warm_generation.store(0, Ordering::Relaxed);
self.reinit
.pending_hardswap_started_at_epoch_secs
.store(0, Ordering::Relaxed);
self.reinit
.pending_hardswap_map_hash
.store(0, Ordering::Relaxed);
self.reinit.warm_generation.store(0, Ordering::Relaxed);
}
async fn promote_warm_generation_to_active(&self, generation: u64) {
self.active_generation.store(generation, Ordering::Relaxed);
self.warm_generation.store(0, Ordering::Relaxed);
self.reinit
.active_generation
.store(generation, Ordering::Relaxed);
self.reinit.warm_generation.store(0, Ordering::Relaxed);
let ws = self.writers.read().await;
for writer in ws.iter() {
@@ -184,8 +191,14 @@ impl MePool {
}
fn hardswap_warmup_connect_delay_ms(&self) -> u64 {
let min_ms = self.me_hardswap_warmup_delay_min_ms.load(Ordering::Relaxed);
let max_ms = self.me_hardswap_warmup_delay_max_ms.load(Ordering::Relaxed);
let min_ms = self
.reinit
.me_hardswap_warmup_delay_min_ms
.load(Ordering::Relaxed);
let max_ms = self
.reinit
.me_hardswap_warmup_delay_max_ms
.load(Ordering::Relaxed);
let (min_ms, max_ms) = if min_ms <= max_ms {
(min_ms, max_ms)
} else {
@@ -199,9 +212,11 @@ impl MePool {
fn hardswap_warmup_backoff_ms(&self, pass_idx: usize) -> u64 {
let base_ms = self
.reinit
.me_hardswap_warmup_pass_backoff_base_ms
.load(Ordering::Relaxed);
let cap_ms = (self.me_reconnect_backoff_cap.as_millis() as u64).max(base_ms);
let cap_ms =
(self.reconnect_runtime.me_reconnect_backoff_cap.as_millis() as u64).max(base_ms);
let shift = (pass_idx as u32).min(20);
let scaled = base_ms.saturating_mul(1u64 << shift);
let core = scaled.min(cap_ms);
@@ -244,6 +259,7 @@ impl MePool {
desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>,
) {
let extra_passes = self
.reinit
.me_hardswap_warmup_extra_passes
.load(Ordering::Relaxed)
.min(10) as usize;
@@ -369,13 +385,20 @@ impl MePool {
let desired_map_hash = Self::desired_map_hash(&desired_by_dc);
let previous_generation = self.current_generation();
let hardswap = self.hardswap.load(Ordering::Relaxed);
let hardswap = self.reinit.hardswap.load(Ordering::Relaxed);
let generation = if hardswap {
let pending_generation = self.pending_hardswap_generation.load(Ordering::Relaxed);
let pending_generation = self
.reinit
.pending_hardswap_generation
.load(Ordering::Relaxed);
let pending_started_at = self
.reinit
.pending_hardswap_started_at_epoch_secs
.load(Ordering::Relaxed);
let pending_map_hash = self.pending_hardswap_map_hash.load(Ordering::Relaxed);
let pending_map_hash = self
.reinit
.pending_hardswap_map_hash
.load(Ordering::Relaxed);
let pending_age_secs = now_epoch_secs.saturating_sub(pending_started_at);
let pending_ttl_expired =
pending_started_at > 0 && pending_age_secs > ME_HARDSWAP_PENDING_TTL_SECS;
@@ -405,24 +428,30 @@ impl MePool {
"ME hardswap pending generation expired by TTL; starting fresh generation"
);
}
let next_generation = self.generation.fetch_add(1, Ordering::Relaxed) + 1;
self.pending_hardswap_generation
let next_generation = self.reinit.generation.fetch_add(1, Ordering::Relaxed) + 1;
self.reinit
.pending_hardswap_generation
.store(next_generation, Ordering::Relaxed);
self.pending_hardswap_started_at_epoch_secs
self.reinit
.pending_hardswap_started_at_epoch_secs
.store(now_epoch_secs, Ordering::Relaxed);
self.pending_hardswap_map_hash
self.reinit
.pending_hardswap_map_hash
.store(desired_map_hash, Ordering::Relaxed);
self.warm_generation
self.reinit
.warm_generation
.store(next_generation, Ordering::Relaxed);
next_generation
}
} else {
self.clear_pending_hardswap_state();
self.generation.fetch_add(1, Ordering::Relaxed) + 1
self.reinit.generation.fetch_add(1, Ordering::Relaxed) + 1
};
if hardswap {
self.warm_generation.store(generation, Ordering::Relaxed);
self.reinit
.warm_generation
.store(generation, Ordering::Relaxed);
self.warmup_generation_for_all_dcs(rng, generation, &desired_by_dc)
.await;
} else {
@@ -436,7 +465,8 @@ impl MePool {
.map(|w| (w.writer_dc, w.addr))
.collect();
let min_ratio = Self::permille_to_ratio(
self.me_pool_min_fresh_ratio_permille
self.drain_runtime
.me_pool_min_fresh_ratio_permille
.load(Ordering::Relaxed),
);
let (coverage_ratio, missing_dc) =
@@ -94,9 +94,9 @@ impl MePool {
pub(crate) async fn api_nat_stun_snapshot(&self) -> MeApiNatStunSnapshot {
let now = Instant::now();
let mut configured_servers = if !self.nat_stun_servers.is_empty() {
self.nat_stun_servers.clone()
} else if let Some(stun) = &self.nat_stun {
let mut configured_servers = if !self.nat_runtime.nat_stun_servers.is_empty() {
self.nat_runtime.nat_stun_servers.clone()
} else if let Some(stun) = &self.nat_runtime.nat_stun {
if stun.trim().is_empty() {
Vec::new()
} else {
@@ -108,11 +108,11 @@ impl MePool {
configured_servers.sort();
configured_servers.dedup();
let mut live_servers = self.nat_stun_live_servers.read().await.clone();
let mut live_servers = self.nat_runtime.nat_stun_live_servers.read().await.clone();
live_servers.sort();
live_servers.dedup();
let reflection = self.nat_reflection_cache.lock().await;
let reflection = self.nat_runtime.nat_reflection_cache.lock().await;
let reflection_v4 = reflection.v4.map(|(ts, addr)| MeApiNatReflectionSnapshot {
addr,
age_secs: now.saturating_duration_since(ts).as_secs(),
@@ -123,17 +123,19 @@ impl MePool {
});
drop(reflection);
let backoff_until = *self.stun_backoff_until.read().await;
let backoff_until = *self.nat_runtime.stun_backoff_until.read().await;
let stun_backoff_remaining_ms = backoff_until.and_then(|until| {
(until > now).then_some(until.duration_since(now).as_millis() as u64)
});
MeApiNatStunSnapshot {
nat_probe_enabled: self.nat_probe,
nat_probe_enabled: self.nat_runtime.nat_probe,
nat_probe_disabled_runtime: self
.nat_runtime
.nat_probe_disabled
.load(std::sync::atomic::Ordering::Relaxed),
nat_probe_attempts: self
.nat_runtime
.nat_probe_attempts
.load(std::sync::atomic::Ordering::Relaxed),
configured_servers,
+91 -28
View File
@@ -160,7 +160,7 @@ impl MePool {
let writers = self.writers.read().await.clone();
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
for writer in writers {
for writer in writers.iter() {
if writer.draining.load(Ordering::Relaxed) {
continue;
}
@@ -197,7 +197,7 @@ impl MePool {
let writers = self.writers.read().await.clone();
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
for writer in writers {
for writer in writers.iter() {
if writer.draining.load(Ordering::Relaxed) {
continue;
}
@@ -224,7 +224,10 @@ impl MePool {
pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot {
let now_epoch_secs = Self::now_epoch_secs();
let active_generation = self.current_generation();
let drain_ttl_secs = self.me_pool_drain_ttl_secs.load(Ordering::Relaxed);
let drain_ttl_secs = self
.drain_runtime
.me_pool_drain_ttl_secs
.load(Ordering::Relaxed);
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
if self.decision.ipv4_me {
@@ -255,7 +258,7 @@ impl MePool {
let mut dc_rtt_agg = HashMap::<i16, (f64, u64)>::new();
let mut writer_rows = Vec::<MeApiWriterStatusSnapshot>::with_capacity(writers.len());
for writer in writers {
for writer in writers.iter() {
let endpoint = writer.addr;
let dc = i16::try_from(writer.writer_dc).ok();
let draining = writer.draining.load(Ordering::Relaxed);
@@ -293,9 +296,7 @@ impl MePool {
WriterContour::Draining => "draining",
};
if !draining
&& let Some(dc_idx) = dc
{
if !draining && let Some(dc_idx) = dc {
*live_writers_by_dc_endpoint
.entry((dc_idx, endpoint))
.or_insert(0) += 1;
@@ -338,6 +339,7 @@ impl MePool {
let mut fresh_alive_writers = 0usize;
let floor_mode = self.floor_mode();
let adaptive_cpu_cores = (self
.floor_runtime
.me_adaptive_floor_cpu_cores_effective
.load(Ordering::Relaxed) as usize)
.max(1);
@@ -352,22 +354,26 @@ impl MePool {
self.required_writers_for_dc_with_floor_mode(endpoint_count, false);
let floor_min = if endpoint_count <= 1 {
(self
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint
.load(Ordering::Relaxed) as usize)
.max(1)
.min(base_required.max(1))
} else {
(self
.floor_runtime
.me_adaptive_floor_min_writers_multi_endpoint
.load(Ordering::Relaxed) as usize)
.max(1)
.min(base_required.max(1))
};
let extra_per_core = if endpoint_count <= 1 {
self.me_adaptive_floor_max_extra_writers_single_per_core
self.floor_runtime
.me_adaptive_floor_max_extra_writers_single_per_core
.load(Ordering::Relaxed) as usize
} else {
self.me_adaptive_floor_max_extra_writers_multi_per_core
self.floor_runtime
.me_adaptive_floor_max_extra_writers_multi_per_core
.load(Ordering::Relaxed) as usize
};
let floor_max =
@@ -438,6 +444,7 @@ impl MePool {
let now = Instant::now();
let now_epoch_secs = Self::now_epoch_secs();
let pending_started_at = self
.reinit
.pending_hardswap_started_at_epoch_secs
.load(Ordering::Relaxed);
let pending_hardswap_age_secs =
@@ -479,119 +486,175 @@ impl MePool {
}
MeApiRuntimeSnapshot {
active_generation: self.active_generation.load(Ordering::Relaxed),
warm_generation: self.warm_generation.load(Ordering::Relaxed),
pending_hardswap_generation: self.pending_hardswap_generation.load(Ordering::Relaxed),
active_generation: self.reinit.active_generation.load(Ordering::Relaxed),
warm_generation: self.reinit.warm_generation.load(Ordering::Relaxed),
pending_hardswap_generation: self
.reinit
.pending_hardswap_generation
.load(Ordering::Relaxed),
pending_hardswap_age_secs,
hardswap_enabled: self.hardswap.load(Ordering::Relaxed),
hardswap_enabled: self.reinit.hardswap.load(Ordering::Relaxed),
floor_mode: floor_mode_label(self.floor_mode()),
adaptive_floor_idle_secs: self.me_adaptive_floor_idle_secs.load(Ordering::Relaxed),
adaptive_floor_idle_secs: self
.floor_runtime
.me_adaptive_floor_idle_secs
.load(Ordering::Relaxed),
adaptive_floor_min_writers_single_endpoint: self
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint
.load(Ordering::Relaxed),
adaptive_floor_min_writers_multi_endpoint: self
.floor_runtime
.me_adaptive_floor_min_writers_multi_endpoint
.load(Ordering::Relaxed),
adaptive_floor_recover_grace_secs: self
.floor_runtime
.me_adaptive_floor_recover_grace_secs
.load(Ordering::Relaxed),
adaptive_floor_writers_per_core_total: self
.floor_runtime
.me_adaptive_floor_writers_per_core_total
.load(Ordering::Relaxed) as u16,
adaptive_floor_cpu_cores_override: self
.floor_runtime
.me_adaptive_floor_cpu_cores_override
.load(Ordering::Relaxed) as u16,
adaptive_floor_max_extra_writers_single_per_core: self
.floor_runtime
.me_adaptive_floor_max_extra_writers_single_per_core
.load(Ordering::Relaxed)
as u16,
adaptive_floor_max_extra_writers_multi_per_core: self
.floor_runtime
.me_adaptive_floor_max_extra_writers_multi_per_core
.load(Ordering::Relaxed)
as u16,
adaptive_floor_max_active_writers_per_core: self
.floor_runtime
.me_adaptive_floor_max_active_writers_per_core
.load(Ordering::Relaxed)
as u16,
adaptive_floor_max_warm_writers_per_core: self
.floor_runtime
.me_adaptive_floor_max_warm_writers_per_core
.load(Ordering::Relaxed)
as u16,
adaptive_floor_max_active_writers_global: self
.floor_runtime
.me_adaptive_floor_max_active_writers_global
.load(Ordering::Relaxed),
adaptive_floor_max_warm_writers_global: self
.floor_runtime
.me_adaptive_floor_max_warm_writers_global
.load(Ordering::Relaxed),
adaptive_floor_cpu_cores_detected: self
.floor_runtime
.me_adaptive_floor_cpu_cores_detected
.load(Ordering::Relaxed),
adaptive_floor_cpu_cores_effective: self
.floor_runtime
.me_adaptive_floor_cpu_cores_effective
.load(Ordering::Relaxed),
adaptive_floor_global_cap_raw: self
.floor_runtime
.me_adaptive_floor_global_cap_raw
.load(Ordering::Relaxed),
adaptive_floor_global_cap_effective: self
.floor_runtime
.me_adaptive_floor_global_cap_effective
.load(Ordering::Relaxed),
adaptive_floor_target_writers_total: self
.floor_runtime
.me_adaptive_floor_target_writers_total
.load(Ordering::Relaxed),
adaptive_floor_active_cap_configured: self
.floor_runtime
.me_adaptive_floor_active_cap_configured
.load(Ordering::Relaxed),
adaptive_floor_active_cap_effective: self
.floor_runtime
.me_adaptive_floor_active_cap_effective
.load(Ordering::Relaxed),
adaptive_floor_warm_cap_configured: self
.floor_runtime
.me_adaptive_floor_warm_cap_configured
.load(Ordering::Relaxed),
adaptive_floor_warm_cap_effective: self
.floor_runtime
.me_adaptive_floor_warm_cap_effective
.load(Ordering::Relaxed),
adaptive_floor_active_writers_current: self
.floor_runtime
.me_adaptive_floor_active_writers_current
.load(Ordering::Relaxed),
adaptive_floor_warm_writers_current: self
.floor_runtime
.me_adaptive_floor_warm_writers_current
.load(Ordering::Relaxed),
me_keepalive_enabled: self.me_keepalive_enabled,
me_keepalive_interval_secs: self.me_keepalive_interval.as_secs(),
me_keepalive_jitter_secs: self.me_keepalive_jitter.as_secs(),
me_keepalive_payload_random: self.me_keepalive_payload_random,
rpc_proxy_req_every_secs: self.rpc_proxy_req_every_secs.load(Ordering::Relaxed),
me_reconnect_max_concurrent_per_dc: self.me_reconnect_max_concurrent_per_dc,
me_reconnect_backoff_base_ms: self.me_reconnect_backoff_base.as_millis() as u64,
me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64,
me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed),
me_pool_force_close_secs: self.me_pool_force_close_secs.load(Ordering::Relaxed),
me_keepalive_enabled: self.writer_lifecycle.me_keepalive_enabled,
me_keepalive_interval_secs: self.writer_lifecycle.me_keepalive_interval.as_secs(),
me_keepalive_jitter_secs: self.writer_lifecycle.me_keepalive_jitter.as_secs(),
me_keepalive_payload_random: self.writer_lifecycle.me_keepalive_payload_random,
rpc_proxy_req_every_secs: self
.writer_lifecycle
.rpc_proxy_req_every_secs
.load(Ordering::Relaxed),
me_reconnect_max_concurrent_per_dc: self
.reconnect_runtime
.me_reconnect_max_concurrent_per_dc,
me_reconnect_backoff_base_ms: self
.reconnect_runtime
.me_reconnect_backoff_base
.as_millis() as u64,
me_reconnect_backoff_cap_ms: self.reconnect_runtime.me_reconnect_backoff_cap.as_millis()
as u64,
me_reconnect_fast_retry_count: self.reconnect_runtime.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: self
.drain_runtime
.me_pool_drain_ttl_secs
.load(Ordering::Relaxed),
me_pool_force_close_secs: self
.drain_runtime
.me_pool_force_close_secs
.load(Ordering::Relaxed),
me_pool_min_fresh_ratio: Self::permille_to_ratio(
self.me_pool_min_fresh_ratio_permille
self.drain_runtime
.me_pool_min_fresh_ratio_permille
.load(Ordering::Relaxed),
),
me_bind_stale_mode: bind_stale_mode_label(self.bind_stale_mode()),
me_bind_stale_ttl_secs: self.me_bind_stale_ttl_secs.load(Ordering::Relaxed),
me_bind_stale_ttl_secs: self
.binding_policy
.me_bind_stale_ttl_secs
.load(Ordering::Relaxed),
me_single_endpoint_shadow_writers: self
.single_endpoint_runtime
.me_single_endpoint_shadow_writers
.load(Ordering::Relaxed),
me_single_endpoint_outage_mode_enabled: self
.single_endpoint_runtime
.me_single_endpoint_outage_mode_enabled
.load(Ordering::Relaxed),
me_single_endpoint_outage_disable_quarantine: self
.single_endpoint_runtime
.me_single_endpoint_outage_disable_quarantine
.load(Ordering::Relaxed),
me_single_endpoint_outage_backoff_min_ms: self
.single_endpoint_runtime
.me_single_endpoint_outage_backoff_min_ms
.load(Ordering::Relaxed),
me_single_endpoint_outage_backoff_max_ms: self
.single_endpoint_runtime
.me_single_endpoint_outage_backoff_max_ms
.load(Ordering::Relaxed),
me_single_endpoint_shadow_rotate_every_secs: self
.single_endpoint_runtime
.me_single_endpoint_shadow_rotate_every_secs
.load(Ordering::Relaxed),
me_deterministic_writer_sort: self.me_deterministic_writer_sort.load(Ordering::Relaxed),
me_deterministic_writer_sort: self
.writer_selection_policy
.me_deterministic_writer_sort
.load(Ordering::Relaxed),
me_writer_pick_mode: writer_pick_mode_label(self.writer_pick_mode()),
me_writer_pick_sample_size: self.writer_pick_sample_size() as u8,
me_socks_kdf_policy: socks_kdf_policy_label(self.socks_kdf_policy()),
+366 -339
View File
@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::io::ErrorKind;
use std::net::SocketAddr;
use std::sync::Arc;
@@ -25,6 +26,7 @@ const ME_ACTIVE_PING_SECS: u64 = 25;
const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
const ME_PING_TRACKER_CLEANUP_EVERY: u32 = 32;
#[derive(Clone, Copy)]
enum WriterTeardownMode {
@@ -36,6 +38,240 @@ fn is_me_peer_closed_error(error: &ProxyError) -> bool {
matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof)
}
enum WriterLifecycleExit {
Reader(Result<()>),
Writer(Result<()>),
Ping,
Signal,
Cancelled,
}
async fn writer_command_loop(
mut rx: mpsc::Receiver<WriterCommand>,
mut rpc_writer: RpcWriter,
cancel: CancellationToken,
) -> Result<()> {
loop {
tokio::select! {
cmd = rx.recv() => {
match cmd {
Some(WriterCommand::Data(payload)) => {
rpc_writer.send(&payload).await?;
}
Some(WriterCommand::DataAndFlush(payload)) => {
rpc_writer.send_and_flush(&payload).await?;
}
Some(WriterCommand::Close) | None => return Ok(()),
}
}
_ = cancel.cancelled() => return Ok(()),
}
}
}
#[allow(clippy::too_many_arguments)]
async fn ping_loop(
pool_ping: std::sync::Weak<MePool>,
writer_id: u64,
tx_ping: mpsc::Sender<WriterCommand>,
ping_tracker_ping: Arc<tokio::sync::Mutex<HashMap<i64, Instant>>>,
stats_ping: Arc<crate::stats::Stats>,
keepalive_enabled: bool,
keepalive_interval: Duration,
keepalive_jitter: Duration,
cancel_ping_token: CancellationToken,
) {
let mut ping_id: i64 = rand::random::<i64>();
let mut cleanup_tick: u32 = 0;
let idle_interval_cap = Duration::from_secs(ME_IDLE_KEEPALIVE_MAX_SECS);
// Per-writer jittered start to avoid phase sync.
let startup_jitter = if keepalive_enabled {
let mut interval = keepalive_interval;
let Some(pool) = pool_ping.upgrade() else {
return;
};
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter =
rand::rng().random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let wait = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(wait)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(startup_jitter) => {}
}
loop {
let wait = if keepalive_enabled {
let mut interval = keepalive_interval;
let Some(pool) = pool_ping.upgrade() else {
return;
};
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
interval
+ Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter =
rand::rng().random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let secs = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(secs)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(wait) => {}
}
let sent_id = ping_id;
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_PING_U32.to_le_bytes());
p.extend_from_slice(&sent_id.to_le_bytes());
{
let mut tracker = ping_tracker_ping.lock().await;
cleanup_tick = cleanup_tick.wrapping_add(1);
if cleanup_tick.is_multiple_of(ME_PING_TRACKER_CLEANUP_EVERY) {
let before = tracker.len();
tracker.retain(|_, ts| ts.elapsed() < Duration::from_secs(120));
let expired = before.saturating_sub(tracker.len());
if expired > 0 {
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
}
}
tracker.insert(sent_id, std::time::Instant::now());
}
ping_id = ping_id.wrapping_add(1);
stats_ping.increment_me_keepalive_sent();
if tx_ping
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
.await
.is_err()
{
stats_ping.increment_me_keepalive_failed();
debug!("ME ping failed, removing dead writer");
return;
}
}
}
#[allow(clippy::too_many_arguments)]
async fn rpc_proxy_req_signal_loop(
pool_signal: std::sync::Weak<MePool>,
writer_id: u64,
tx_signal: mpsc::Sender<WriterCommand>,
stats_signal: Arc<crate::stats::Stats>,
cancel_signal: CancellationToken,
keepalive_jitter_signal: Duration,
rpc_proxy_req_every_secs: u64,
) {
if rpc_proxy_req_every_secs == 0 {
// Disabled service signal loop must stay parked until writer cancellation.
// Returning immediately here would complete `select!` and tear down writer lifecycle.
cancel_signal.cancelled().await;
return;
}
let interval = Duration::from_secs(rpc_proxy_req_every_secs);
let startup_jitter_ms = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
rand::rng().random_range(0..=effective_jitter_ms as u64)
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
}
loop {
let wait = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
interval
+ Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(wait) => {}
}
let Some(pool) = pool_signal.upgrade() else {
return;
};
let Some(meta) = pool.registry.get_last_writer_meta(writer_id).await else {
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
};
let (conn_id, mut service_rx) = pool.registry.register().await;
// Service RPC_PROXY_REQ signal path is intentionally route-only:
// do not bind synthetic conn_id into regular writer/client accounting.
let payload = build_proxy_req_payload(
conn_id,
meta.client_addr,
meta.our_addr,
&[],
pool.proxy_tag.as_deref(),
meta.proto_flags,
);
if tx_signal
.send(WriterCommand::DataAndFlush(payload))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
return;
}
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
if matches!(
tokio::time::timeout(
Duration::from_millis(ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS),
service_rx.recv(),
)
.await,
Ok(Some(_))
) {
stats_signal.increment_me_rpc_proxy_req_signal_response_total();
}
let mut close_payload = Vec::with_capacity(12);
close_payload.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
close_payload.extend_from_slice(&conn_id.to_le_bytes());
if tx_signal
.send(WriterCommand::DataAndFlush(Bytes::from(close_payload)))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
return;
}
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
let _ = pool.registry.unregister(conn_id).await;
}
}
impl MePool {
pub(crate) async fn prune_closed_writers(self: &Arc<Self>) {
let closed_writer_ids: Vec<u64> = {
@@ -136,46 +372,15 @@ impl MePool {
let draining_started_at_epoch_secs = Arc::new(AtomicU64::new(0));
let drain_deadline_epoch_secs = Arc::new(AtomicU64::new(0));
let allow_drain_fallback = Arc::new(AtomicBool::new(false));
let (tx, mut rx) = mpsc::channel::<WriterCommand>(self.writer_cmd_channel_capacity);
let mut rpc_writer = RpcWriter {
let (tx, rx) =
mpsc::channel::<WriterCommand>(self.writer_lifecycle.writer_cmd_channel_capacity);
let rpc_writer = RpcWriter {
writer: hs.wr,
key: hs.write_key,
iv: hs.write_iv,
seq_no: 0,
crc_mode: hs.crc_mode,
};
let cancel_wr = cancel.clone();
let cleanup_done = Arc::new(AtomicBool::new(false));
let cleanup_for_writer = cleanup_done.clone();
let pool_writer_task = Arc::downgrade(self);
tokio::spawn(async move {
loop {
tokio::select! {
cmd = rx.recv() => {
match cmd {
Some(WriterCommand::Data(payload)) => {
if rpc_writer.send(&payload).await.is_err() { break; }
}
Some(WriterCommand::DataAndFlush(payload)) => {
if rpc_writer.send_and_flush(&payload).await.is_err() { break; }
}
Some(WriterCommand::Close) | None => break,
}
}
_ = cancel_wr.cancelled() => break,
}
}
if cleanup_for_writer
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
if let Some(pool) = pool_writer_task.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
cancel_wr.cancel();
}
}
});
let writer = MeWriter {
id: writer_id,
addr,
@@ -193,329 +398,135 @@ impl MePool {
drain_deadline_epoch_secs: drain_deadline_epoch_secs.clone(),
allow_drain_fallback: allow_drain_fallback.clone(),
};
self.writers.write().await.push(writer.clone());
self.writers
.update(|writers| writers.push(writer.clone()))
.await;
self.registry.register_writer(writer_id, tx.clone()).await;
self.registry.mark_writer_idle(writer_id).await;
self.conn_count.fetch_add(1, Ordering::Relaxed);
self.writer_available.notify_one();
self.notify_writer_epoch();
let reg = self.registry.clone();
let writers_arc = self.writers_arc();
let ping_tracker = self.ping_tracker.clone();
let ping_tracker = Arc::new(tokio::sync::Mutex::new(HashMap::<i64, Instant>::new()));
let ping_tracker_reader = ping_tracker.clone();
let ping_tracker_ping = ping_tracker.clone();
let rtt_stats = self.rtt_stats.clone();
let stats_reader = self.stats.clone();
let stats_reader_close = self.stats.clone();
let stats_ping = self.stats.clone();
let pool = Arc::downgrade(self);
let cancel_ping = cancel.clone();
let tx_ping = tx.clone();
let ping_tracker_ping = ping_tracker.clone();
let cleanup_for_reader = cleanup_done.clone();
let cleanup_for_ping = cleanup_done.clone();
let keepalive_enabled = self.me_keepalive_enabled;
let keepalive_interval = self.me_keepalive_interval;
let keepalive_jitter = self.me_keepalive_jitter;
let rpc_proxy_req_every_secs = self.rpc_proxy_req_every_secs.load(Ordering::Relaxed);
let tx_signal = tx.clone();
let stats_signal = self.stats.clone();
let cancel_signal = cancel.clone();
let cleanup_for_signal = cleanup_done.clone();
let pool_signal = Arc::downgrade(self);
let keepalive_jitter_signal = self.me_keepalive_jitter;
let cancel_reader_token = cancel.clone();
let cancel_ping_token = cancel_ping.clone();
let reader_route_data_wait_ms = self.me_reader_route_data_wait_ms.clone();
tokio::spawn(async move {
let res = reader_loop(
hs.rd,
hs.read_key,
hs.read_iv,
hs.crc_mode,
reg.clone(),
BytesMut::new(),
BytesMut::new(),
tx.clone(),
ping_tracker_reader,
rtt_stats.clone(),
stats_reader,
writer_id,
degraded.clone(),
rtt_ema_ms_x10.clone(),
reader_route_data_wait_ms,
cancel_reader_token.clone(),
)
.await;
let idle_close_by_peer = if let Err(e) = res.as_ref() {
is_me_peer_closed_error(e) && reg.is_writer_empty(writer_id).await
} else {
false
};
if idle_close_by_peer {
stats_reader_close.increment_me_idle_close_by_peer_total();
info!(writer_id, "ME socket closed by peer on idle writer");
}
if cleanup_for_reader
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
if let Some(pool) = pool.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
// Fallback for shutdown races: make writer task exit quickly so stale
// channels are observable by periodic prune.
cancel_reader_token.cancel();
}
}
if let Err(e) = res
&& !idle_close_by_peer
{
warn!(error = %e, "ME reader ended");
}
let remaining = writers_arc.read().await.len();
debug!(writer_id, remaining, "ME reader task finished");
});
let pool_lifecycle = Arc::downgrade(self);
let pool_ping = Arc::downgrade(self);
let pool_signal = Arc::downgrade(self);
let tx_reader = tx.clone();
let tx_ping = tx.clone();
let tx_signal = tx.clone();
let keepalive_enabled = self.writer_lifecycle.me_keepalive_enabled;
let keepalive_interval = self.writer_lifecycle.me_keepalive_interval;
let keepalive_jitter = self.writer_lifecycle.me_keepalive_jitter;
let keepalive_jitter_signal = self.writer_lifecycle.me_keepalive_jitter;
let rpc_proxy_req_every_secs = self
.writer_lifecycle
.rpc_proxy_req_every_secs
.load(Ordering::Relaxed);
let cancel_reader = cancel.clone();
let cancel_writer = cancel.clone();
let cancel_ping = cancel.clone();
let cancel_signal = cancel.clone();
let cancel_select = cancel.clone();
let cancel_cleanup = cancel.clone();
let reader_route_data_wait_ms = self.transport_policy.me_reader_route_data_wait_ms.clone();
tokio::spawn(async move {
let mut ping_id: i64 = rand::random::<i64>();
let idle_interval_cap = Duration::from_secs(ME_IDLE_KEEPALIVE_MAX_SECS);
// Per-writer jittered start to avoid phase sync.
let startup_jitter = if keepalive_enabled {
let mut interval = keepalive_interval;
if let Some(pool) = pool_ping.upgrade() {
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
} else {
return;
// Reader MUST be the first branch in biased select! to avoid read starvation.
let exit = tokio::select! {
biased;
reader_res = reader_loop(
hs.rd,
hs.read_key,
hs.read_iv,
hs.crc_mode,
reg.clone(),
BytesMut::new(),
BytesMut::new(),
tx_reader,
ping_tracker_reader,
rtt_stats,
stats_reader,
writer_id,
degraded,
rtt_ema_ms_x10,
reader_route_data_wait_ms,
cancel_reader,
) => WriterLifecycleExit::Reader(reader_res),
writer_res = writer_command_loop(rx, rpc_writer, cancel_writer) => {
WriterLifecycleExit::Writer(writer_res)
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter = rand::rng()
.random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let wait = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(wait)
_ = ping_loop(
pool_ping,
writer_id,
tx_ping,
ping_tracker_ping,
stats_ping,
keepalive_enabled,
keepalive_interval,
keepalive_jitter,
cancel_ping,
) => WriterLifecycleExit::Ping,
_ = rpc_proxy_req_signal_loop(
pool_signal,
writer_id,
tx_signal,
stats_signal,
cancel_signal,
keepalive_jitter_signal,
rpc_proxy_req_every_secs,
) => WriterLifecycleExit::Signal,
_ = cancel_select.cancelled() => WriterLifecycleExit::Cancelled,
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(startup_jitter) => {}
}
loop {
let wait = if keepalive_enabled {
let mut interval = keepalive_interval;
if let Some(pool) = pool_ping.upgrade() {
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
match exit {
WriterLifecycleExit::Reader(res) => {
let idle_close_by_peer = if let Err(e) = res.as_ref() {
is_me_peer_closed_error(e) && reg.is_writer_empty(writer_id).await
} else {
break;
false
};
if idle_close_by_peer {
stats_reader_close.increment_me_idle_close_by_peer_total();
info!(writer_id, "ME socket closed by peer on idle writer");
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms =
keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
interval
+ Duration::from_millis(
rand::rng().random_range(0..=effective_jitter_ms as u64),
)
} else {
let jitter = rand::rng()
.random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let secs = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(secs)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => {
break;
}
_ = tokio::time::sleep(wait) => {}
}
let sent_id = ping_id;
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_PING_U32.to_le_bytes());
p.extend_from_slice(&sent_id.to_le_bytes());
{
let mut tracker = ping_tracker_ping.lock().await;
let now_epoch_ms = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64;
let mut run_cleanup = false;
if let Some(pool) = pool_ping.upgrade() {
let last_cleanup_ms = pool
.ping_tracker_last_cleanup_epoch_ms
.load(Ordering::Relaxed);
if now_epoch_ms.saturating_sub(last_cleanup_ms) >= 30_000
&& pool
.ping_tracker_last_cleanup_epoch_ms
.compare_exchange(
last_cleanup_ms,
now_epoch_ms,
Ordering::AcqRel,
Ordering::Relaxed,
)
.is_ok()
{
run_cleanup = true;
}
}
if run_cleanup {
let before = tracker.len();
tracker.retain(|_, (ts, _)| ts.elapsed() < Duration::from_secs(120));
let expired = before.saturating_sub(tracker.len());
if expired > 0 {
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
}
}
tracker.insert(sent_id, (std::time::Instant::now(), writer_id));
}
ping_id = ping_id.wrapping_add(1);
stats_ping.increment_me_keepalive_sent();
if tx_ping
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
.await
.is_err()
{
stats_ping.increment_me_keepalive_failed();
debug!("ME ping failed, removing dead writer");
cancel_ping.cancel();
if cleanup_for_ping
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
&& let Some(pool) = pool_ping.upgrade()
if let Err(e) = res
&& !idle_close_by_peer
{
pool.remove_writer_and_close_clients(writer_id).await;
warn!(error = %e, "ME reader ended");
}
break;
}
}
});
tokio::spawn(async move {
if rpc_proxy_req_every_secs == 0 {
return;
}
let interval = Duration::from_secs(rpc_proxy_req_every_secs);
let startup_jitter_ms = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
rand::rng().random_range(0..=effective_jitter_ms as u64)
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
}
loop {
let wait = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
interval
+ Duration::from_millis(
rand::rng().random_range(0..=effective_jitter_ms as u64),
)
};
tokio::select! {
_ = cancel_signal.cancelled() => break,
_ = tokio::time::sleep(wait) => {}
}
let Some(pool) = pool_signal.upgrade() else {
break;
};
let Some(meta) = pool.registry.get_last_writer_meta(writer_id).await else {
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
};
let (conn_id, mut service_rx) = pool.registry.register().await;
if !pool
.registry
.bind_writer(conn_id, writer_id, meta.clone())
.await
{
let _ = pool.registry.unregister(conn_id).await;
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
}
let payload = build_proxy_req_payload(
conn_id,
meta.client_addr,
meta.our_addr,
&[],
pool.proxy_tag.as_deref(),
meta.proto_flags,
);
if tx_signal
.send(WriterCommand::DataAndFlush(payload))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
cancel_signal.cancel();
if cleanup_for_signal
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
pool.remove_writer_and_close_clients(writer_id).await;
WriterLifecycleExit::Writer(res) => {
if let Err(e) = res {
warn!(error = %e, "ME writer command loop ended");
}
break;
}
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
if matches!(
tokio::time::timeout(
Duration::from_millis(ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS),
service_rx.recv(),
)
.await,
Ok(Some(_))
) {
stats_signal.increment_me_rpc_proxy_req_signal_response_total();
WriterLifecycleExit::Ping => {
debug!(writer_id, "ME ping loop finished");
}
let mut close_payload = Vec::with_capacity(12);
close_payload.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
close_payload.extend_from_slice(&conn_id.to_le_bytes());
if tx_signal
.send(WriterCommand::DataAndFlush(Bytes::from(close_payload)))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
cancel_signal.cancel();
if cleanup_for_signal
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
pool.remove_writer_and_close_clients(writer_id).await;
}
break;
WriterLifecycleExit::Signal => {
debug!(writer_id, "ME rpc_proxy_req signal loop finished");
}
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
let _ = pool.registry.unregister(conn_id).await;
WriterLifecycleExit::Cancelled => {}
}
if let Some(pool) = pool_lifecycle.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
// Fallback for shutdown races: make lifecycle exit observable by prune.
cancel_cleanup.cancel();
}
let remaining = writers_arc.read().await.len();
debug!(writer_id, remaining, "ME writer lifecycle task finished");
});
Ok(())
@@ -594,23 +605,36 @@ impl MePool {
// The close command below is only a best-effort accelerator for task shutdown.
// Cleanup progress must never depend on command-channel availability.
let _ = self.registry.writer_lost(writer_id).await;
{
let mut tracker = self.ping_tracker.lock().await;
tracker.retain(|_, (_, wid)| *wid != writer_id);
}
self.rtt_stats.lock().await.remove(&writer_id);
if let Some(tx) = close_tx {
let _ = tx.send(WriterCommand::Close).await;
// Keep teardown critical path non-blocking: close is best-effort only.
let _ = tx.try_send(WriterCommand::Close);
}
if let Some(addr) = removed_addr {
if let Some(uptime) = removed_uptime {
// Quarantine flapping endpoints regardless of draining state.
self.maybe_quarantine_flapping_endpoint(addr, uptime).await;
// Quarantine contract: only unexpected removals are considered endpoint flap.
if trigger_refill {
self.stats
.increment_me_endpoint_quarantine_unexpected_total();
self.maybe_quarantine_flapping_endpoint(addr, uptime, "unexpected")
.await;
} else {
self.stats
.increment_me_endpoint_quarantine_draining_suppressed_total();
debug!(
%addr,
uptime_ms = uptime.as_millis(),
"Skipping endpoint quarantine for draining writer removal"
);
}
}
if trigger_refill && let Some(writer_dc) = removed_dc {
self.trigger_immediate_refill_for_dc(addr, writer_dc);
}
}
if removed {
self.notify_writer_epoch();
}
removed
}
@@ -676,7 +700,10 @@ impl MePool {
MeBindStaleMode::Never => false,
MeBindStaleMode::Always => true,
MeBindStaleMode::Ttl => {
let ttl_secs = self.me_bind_stale_ttl_secs.load(Ordering::Relaxed);
let ttl_secs = self
.binding_policy
.me_bind_stale_ttl_secs
.load(Ordering::Relaxed);
if ttl_secs == 0 {
return true;
}

Some files were not shown because too many files have changed in this diff Show More