Compare commits

..

No commits in common. "7de822dd153aa5f197601e25e60c6ae562bea603" and "80cb1bc221033523c2a6febebd71208b19e69c3c" have entirely different histories.

37 changed files with 1300 additions and 2401 deletions

View File

@ -5,68 +5,26 @@ on:
tags: tags:
- '[0-9]+.[0-9]+.[0-9]+' - '[0-9]+.[0-9]+.[0-9]+'
workflow_dispatch: workflow_dispatch:
inputs:
tag:
description: 'Release tag (example: 3.3.15)'
required: true
type: string
concurrency: concurrency:
group: release-${{ github.ref_name }}-${{ github.event.inputs.tag || 'auto' }} group: release-${{ github.ref }}
cancel-in-progress: true cancel-in-progress: true
permissions: permissions:
contents: read contents: read
packages: write
env: env:
CARGO_TERM_COLOR: always CARGO_TERM_COLOR: always
BINARY_NAME: telemt BINARY_NAME: telemt
jobs: jobs:
prepare:
name: Prepare
runs-on: ubuntu-latest
outputs:
version: ${{ steps.vars.outputs.version }}
prerelease: ${{ steps.vars.outputs.prerelease }}
steps:
- name: Resolve version
id: vars
shell: bash
run: |
set -euo pipefail
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.tag }}"
else
VERSION="${GITHUB_REF#refs/tags/}"
fi
VERSION="${VERSION#refs/tags/}"
if [ -z "${VERSION}" ]; then
echo "Release version is empty" >&2
exit 1
fi
if [[ "${VERSION}" == *-* ]]; then
PRERELEASE=true
else
PRERELEASE=false
fi
echo "version=${VERSION}" >> "${GITHUB_OUTPUT}"
echo "prerelease=${PRERELEASE}" >> "${GITHUB_OUTPUT}"
# ========================== # ==========================
# GNU / glibc # GNU / glibc
# ========================== # ==========================
build-gnu: build-gnu:
name: GNU ${{ matrix.asset }} name: GNU ${{ matrix.asset }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: prepare
container: container:
image: rust:slim-bookworm image: rust:slim-bookworm
@ -111,23 +69,18 @@ jobs:
- uses: actions/cache@v4 - uses: actions/cache@v4
with: with:
path: | path: |
/usr/local/cargo/registry ~/.cargo/registry
/usr/local/cargo/git ~/.cargo/git
target target
key: gnu-${{ matrix.asset }}-${{ hashFiles('**/Cargo.lock') }} key: gnu-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
gnu-${{ matrix.asset }}-
gnu-
- name: Build - name: Build
shell: bash
run: | run: |
set -euo pipefail
if [ "${{ matrix.target }}" = "aarch64-unknown-linux-gnu" ]; then if [ "${{ matrix.target }}" = "aarch64-unknown-linux-gnu" ]; then
export CC=aarch64-linux-gnu-gcc export CC=aarch64-linux-gnu-gcc
export CXX=aarch64-linux-gnu-g++ export CXX=aarch64-linux-gnu-g++
export RUSTFLAGS="-C linker=aarch64-linux-gnu-gcc -C lto=fat -C panic=abort" export RUSTFLAGS="-C linker=aarch64-linux-gnu-gcc -C lto=fat -C panic=abort"
else else
export CC=clang export CC=clang
export CXX=clang++ export CXX=clang++
@ -138,25 +91,22 @@ jobs:
CPU_FLAGS="-C target-cpu=x86-64" CPU_FLAGS="-C target-cpu=x86-64"
fi fi
export RUSTFLAGS="-C linker=clang -C link-arg=-fuse-ld=lld -C lto=fat -C panic=abort ${CPU_FLAGS}" export RUSTFLAGS="-C linker=clang -C link-arg=-fuse-ld=lld -C lto=fat -C panic=abort $CPU_FLAGS"
fi fi
cargo build --release --target ${{ matrix.target }} -j "$(nproc)" cargo build --release --target ${{ matrix.target }}
- name: Package - name: Package
shell: bash
run: | run: |
set -euo pipefail
mkdir -p dist mkdir -p dist
cp "target/${{ matrix.target }}/release/${{ env.BINARY_NAME }}" dist/telemt cp target/${{ matrix.target }}/release/${{ env.BINARY_NAME }} dist/telemt
cd dist cd dist
tar -czf "${{ matrix.asset }}.tar.gz" \ tar -czf ${{ matrix.asset }}.tar.gz \
--owner=0 --group=0 --numeric-owner \ --owner=0 --group=0 --numeric-owner \
telemt telemt
sha256sum "${{ matrix.asset }}.tar.gz" > "${{ matrix.asset }}.tar.gz.sha256" sha256sum ${{ matrix.asset }}.tar.gz > ${{ matrix.asset }}.sha256
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
@ -169,7 +119,6 @@ jobs:
build-musl: build-musl:
name: MUSL ${{ matrix.asset }} name: MUSL ${{ matrix.asset }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: prepare
container: container:
image: rust:slim-bookworm image: rust:slim-bookworm
@ -209,29 +158,30 @@ jobs:
- name: Install aarch64 musl toolchain - name: Install aarch64 musl toolchain
if: matrix.target == 'aarch64-unknown-linux-musl' if: matrix.target == 'aarch64-unknown-linux-musl'
shell: bash
run: | run: |
set -euo pipefail set -e
TOOLCHAIN_DIR="$HOME/.musl-aarch64" TOOLCHAIN_DIR="$HOME/.musl-aarch64"
ARCHIVE="aarch64-linux-musl-cross.tgz" ARCHIVE="aarch64-linux-musl-cross.tgz"
URL="https://github.com/telemt/telemt/releases/download/toolchains/${ARCHIVE}" URL="https://github.com/telemt/telemt/releases/download/toolchains/$ARCHIVE"
if [ -x "${TOOLCHAIN_DIR}/bin/aarch64-linux-musl-gcc" ]; then if [ -x "$TOOLCHAIN_DIR/bin/aarch64-linux-musl-gcc" ]; then
echo "MUSL toolchain cached" echo "MUSL toolchain cached"
else else
echo "⬇️ Downloading MUSL toolchain..."
curl -fL \ curl -fL \
--retry 5 \ --retry 5 \
--retry-delay 3 \ --retry-delay 3 \
--connect-timeout 10 \ --connect-timeout 10 \
--max-time 120 \ --max-time 120 \
-o "${ARCHIVE}" "${URL}" -o "$ARCHIVE" "$URL"
mkdir -p "${TOOLCHAIN_DIR}" mkdir -p "$TOOLCHAIN_DIR"
tar -xzf "${ARCHIVE}" --strip-components=1 -C "${TOOLCHAIN_DIR}" tar -xzf "$ARCHIVE" --strip-components=1 -C "$TOOLCHAIN_DIR"
fi fi
echo "${TOOLCHAIN_DIR}/bin" >> "${GITHUB_PATH}" echo "$TOOLCHAIN_DIR/bin" >> $GITHUB_PATH
- name: Add rust target - name: Add rust target
run: rustup target add ${{ matrix.target }} run: rustup target add ${{ matrix.target }}
@ -242,20 +192,15 @@ jobs:
/usr/local/cargo/registry /usr/local/cargo/registry
/usr/local/cargo/git /usr/local/cargo/git
target target
key: musl-${{ matrix.asset }}-${{ hashFiles('**/Cargo.lock') }} key: musl-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
musl-${{ matrix.asset }}-
musl-
- name: Build - name: Build
shell: bash
run: | run: |
set -euo pipefail
if [ "${{ matrix.target }}" = "aarch64-unknown-linux-musl" ]; then if [ "${{ matrix.target }}" = "aarch64-unknown-linux-musl" ]; then
export CC=aarch64-linux-musl-gcc export CC=aarch64-linux-musl-gcc
export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc
export RUSTFLAGS="-C target-feature=+crt-static -C linker=aarch64-linux-musl-gcc -C lto=fat -C panic=abort" export RUSTFLAGS="-C target-feature=+crt-static -C linker=aarch64-linux-musl-gcc -C lto=fat -C panic=abort"
else else
export CC=musl-gcc export CC=musl-gcc
export CC_x86_64_unknown_linux_musl=musl-gcc export CC_x86_64_unknown_linux_musl=musl-gcc
@ -266,25 +211,22 @@ jobs:
CPU_FLAGS="-C target-cpu=x86-64" CPU_FLAGS="-C target-cpu=x86-64"
fi fi
export RUSTFLAGS="-C target-feature=+crt-static -C lto=fat -C panic=abort ${CPU_FLAGS}" export RUSTFLAGS="-C target-feature=+crt-static -C lto=fat -C panic=abort $CPU_FLAGS"
fi fi
cargo build --release --target ${{ matrix.target }} -j "$(nproc)" cargo build --release --target ${{ matrix.target }}
- name: Package - name: Package
shell: bash
run: | run: |
set -euo pipefail
mkdir -p dist mkdir -p dist
cp "target/${{ matrix.target }}/release/${{ env.BINARY_NAME }}" dist/telemt cp target/${{ matrix.target }}/release/${{ env.BINARY_NAME }} dist/telemt
cd dist cd dist
tar -czf "${{ matrix.asset }}.tar.gz" \ tar -czf ${{ matrix.asset }}.tar.gz \
--owner=0 --group=0 --numeric-owner \ --owner=0 --group=0 --numeric-owner \
telemt telemt
sha256sum "${{ matrix.asset }}.tar.gz" > "${{ matrix.asset }}.tar.gz.sha256" sha256sum ${{ matrix.asset }}.tar.gz > ${{ matrix.asset }}.sha256
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
@ -297,7 +239,7 @@ jobs:
release: release:
name: Release name: Release
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: [prepare, build-gnu, build-musl] needs: [build-gnu, build-musl]
permissions: permissions:
contents: write contents: write
@ -307,22 +249,17 @@ jobs:
with: with:
path: artifacts path: artifacts
- name: Flatten artifacts - name: Flatten
shell: bash
run: | run: |
set -euo pipefail mkdir dist
mkdir -p dist
find artifacts -type f -exec cp {} dist/ \; find artifacts -type f -exec cp {} dist/ \;
- name: Create GitHub Release - name: Create Release
uses: softprops/action-gh-release@v2 uses: softprops/action-gh-release@v2
with: with:
tag_name: ${{ needs.prepare.outputs.version }}
target_commitish: ${{ github.sha }}
files: dist/* files: dist/*
generate_release_notes: true generate_release_notes: true
prerelease: ${{ needs.prepare.outputs.prerelease == 'true' }} prerelease: ${{ contains(github.ref, '-') }}
overwrite_files: true
# ========================== # ==========================
# Docker # Docker
@ -330,7 +267,7 @@ jobs:
docker: docker:
name: Docker name: Docker
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: [prepare, release] needs: [build-gnu, build-musl]
permissions: permissions:
contents: read contents: read
@ -339,8 +276,21 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: docker/setup-qemu-action@v3 - uses: actions/download-artifact@v4
with:
path: dist
- name: Extract binaries
run: |
mkdir bin
tar -xzf dist/telemt-x86_64-linux-musl/telemt-x86_64-linux-musl.tar.gz -C bin
mv bin/telemt bin/telemt-amd64
tar -xzf dist/telemt-aarch64-linux-musl/telemt-aarch64-linux-musl.tar.gz -C bin
mv bin/telemt bin/telemt-arm64
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3 - uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3 - uses: docker/login-action@v3
@ -349,57 +299,19 @@ jobs:
username: ${{ github.actor }} username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }} password: ${{ secrets.GITHUB_TOKEN }}
- name: Probe release assets - name: Extract version
shell: bash id: vars
env: run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
VERSION: ${{ needs.prepare.outputs.version }}
run: |
set -euo pipefail
for asset in \ - name: Build & Push (multi-arch)
telemt-x86_64-linux-musl.tar.gz \
telemt-x86_64-linux-musl.tar.gz.sha256 \
telemt-aarch64-linux-musl.tar.gz \
telemt-aarch64-linux-musl.tar.gz.sha256
do
curl -fsIL \
--retry 10 \
--retry-delay 3 \
"https://github.com/${GITHUB_REPOSITORY}/releases/download/${VERSION}/${asset}" \
> /dev/null
done
- name: Compute image tags
id: meta
shell: bash
env:
VERSION: ${{ needs.prepare.outputs.version }}
run: |
set -euo pipefail
IMAGE="$(echo "ghcr.io/${GITHUB_REPOSITORY}" | tr '[:upper:]' '[:lower:]')"
TAGS="${IMAGE}:${VERSION}"
if [[ "${VERSION}" != *-* ]]; then
TAGS="${TAGS}"$'\n'"${IMAGE}:latest"
fi
{
echo "tags<<EOF"
printf '%s\n' "${TAGS}"
echo "EOF"
} >> "${GITHUB_OUTPUT}"
- name: Build & Push
uses: docker/build-push-action@v6 uses: docker/build-push-action@v6
with: with:
context: . context: .
push: true push: true
pull: true
platforms: linux/amd64,linux/arm64 platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }} tags: |
ghcr.io/${{ github.repository }}:${{ steps.vars.outputs.VERSION }}
ghcr.io/${{ github.repository }}:latest
build-args: | build-args: |
TELEMT_REPOSITORY=${{ github.repository }} BINARY_AMD64=bin/telemt-amd64
TELEMT_VERSION=${{ needs.prepare.outputs.version }} BINARY_ARM64=bin/telemt-arm64
cache-from: type=gha
cache-to: type=gha,mode=max

2
Cargo.lock generated
View File

@ -2793,7 +2793,7 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
[[package]] [[package]]
name = "telemt" name = "telemt"
version = "3.3.32" version = "3.3.31"
dependencies = [ dependencies = [
"aes", "aes",
"anyhow", "anyhow",

View File

@ -1,8 +1,11 @@
[package] [package]
name = "telemt" name = "telemt"
version = "3.3.32" version = "3.3.31"
edition = "2024" edition = "2024"
[profile.release]
codegen-units = 1
[features] [features]
redteam_offline_expected_fail = [] redteam_offline_expected_fail = []

View File

@ -1,7 +1,8 @@
# syntax=docker/dockerfile:1 # syntax=docker/dockerfile:1
ARG TELEMT_REPOSITORY=telemt/telemt ARG TARGETARCH
ARG TELEMT_VERSION=latest ARG BINARY_AMD64
ARG BINARY_ARM64
# ========================== # ==========================
# Minimal Image # Minimal Image
@ -9,51 +10,54 @@ ARG TELEMT_VERSION=latest
FROM debian:12-slim AS minimal FROM debian:12-slim AS minimal
ARG TARGETARCH ARG TARGETARCH
ARG TELEMT_REPOSITORY ARG BINARY_AMD64
ARG TELEMT_VERSION ARG BINARY_ARM64
RUN set -eux; \ RUN set -eux; \
apt-get update; \ apt-get update; \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
binutils \ binutils \
ca-certificates \
curl \ curl \
tar; \ xz-utils \
ca-certificates; \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# --- Select correct binary ---
RUN set -eux; \ RUN set -eux; \
case "${TARGETARCH}" in \ case "${TARGETARCH}" in \
amd64) ASSET="telemt-x86_64-linux-musl.tar.gz" ;; \ amd64) BIN="${BINARY_AMD64}" ;; \
arm64) ASSET="telemt-aarch64-linux-musl.tar.gz" ;; \ arm64) BIN="${BINARY_ARM64}" ;; \
*) echo "Unsupported TARGETARCH: ${TARGETARCH}" >&2; exit 1 ;; \ *) echo "Unsupported TARGETARCH: ${TARGETARCH}" >&2; exit 1 ;; \
esac; \ esac; \
VERSION="${TELEMT_VERSION#refs/tags/}"; \ echo "Using binary: $BIN"; \
if [ -z "${VERSION}" ] || [ "${VERSION}" = "latest" ]; then \ test -f "$BIN"; \
BASE_URL="https://github.com/${TELEMT_REPOSITORY}/releases/latest/download"; \ cp "$BIN" /telemt
else \
BASE_URL="https://github.com/${TELEMT_REPOSITORY}/releases/download/${VERSION}"; \ # --- Install UPX (arch-aware) ---
fi; \ RUN set -eux; \
case "${TARGETARCH}" in \
amd64) UPX_ARCH="amd64" ;; \
arm64) UPX_ARCH="arm64" ;; \
*) echo "Unsupported TARGETARCH: ${TARGETARCH}" >&2; exit 1 ;; \
esac; \
\
curl -fL \ curl -fL \
--retry 5 \ --retry 5 \
--retry-delay 3 \ --retry-delay 3 \
--connect-timeout 10 \ --connect-timeout 10 \
--max-time 120 \ --max-time 120 \
-o "/tmp/${ASSET}" \ -o /tmp/upx.tar.xz \
"${BASE_URL}/${ASSET}"; \ "https://github.com/telemt/telemt/releases/download/toolchains/upx-${UPX_ARCH}_linux.tar.xz"; \
curl -fL \ \
--retry 5 \ tar -xf /tmp/upx.tar.xz -C /tmp; \
--retry-delay 3 \ install -m 0755 /tmp/upx*/upx /usr/local/bin/upx; \
--connect-timeout 10 \ rm -rf /tmp/upx*
--max-time 120 \
-o "/tmp/${ASSET}.sha256" \ # --- Optimize binary ---
"${BASE_URL}/${ASSET}.sha256"; \ RUN set -eux; \
cd /tmp; \ test -f /telemt; \
sha256sum -c "${ASSET}.sha256"; \
tar -xzf "${ASSET}" -C /tmp; \
test -f /tmp/telemt; \
install -m 0755 /tmp/telemt /telemt; \
strip --strip-unneeded /telemt || true; \ strip --strip-unneeded /telemt || true; \
rm -f "/tmp/${ASSET}" "/tmp/${ASSET}.sha256" /tmp/telemt upx --best --lzma /telemt || true
# ========================== # ==========================
# Debug Image # Debug Image

View File

@ -50,8 +50,6 @@ This document lists all configuration keys accepted by `config.toml`.
| me_d2c_flush_batch_max_bytes | `usize` | `131072` | `4096..=2_097_152`. | Max ME->client payload bytes coalesced before flush. | | me_d2c_flush_batch_max_bytes | `usize` | `131072` | `4096..=2_097_152`. | Max ME->client payload bytes coalesced before flush. |
| me_d2c_flush_batch_max_delay_us | `u64` | `500` | `0..=5000`. | Max microsecond wait for coalescing more ME->client frames (`0` disables timed coalescing). | | me_d2c_flush_batch_max_delay_us | `u64` | `500` | `0..=5000`. | Max microsecond wait for coalescing more ME->client frames (`0` disables timed coalescing). |
| me_d2c_ack_flush_immediate | `bool` | `true` | — | Flushes client writer immediately after quick-ack write. | | me_d2c_ack_flush_immediate | `bool` | `true` | — | Flushes client writer immediately after quick-ack write. |
| me_quota_soft_overshoot_bytes | `u64` | `65536` | `0..=16_777_216`. | Extra per-route quota allowance (bytes) tolerated before writer-side quota enforcement drops route data. |
| me_d2c_frame_buf_shrink_threshold_bytes | `usize` | `262144` | `4096..=16_777_216`. | Threshold for shrinking oversized ME->client frame-aggregation buffers after flush. |
| direct_relay_copy_buf_c2s_bytes | `usize` | `65536` | `4096..=1_048_576`. | Copy buffer size for client->DC direction in direct relay. | | direct_relay_copy_buf_c2s_bytes | `usize` | `65536` | `4096..=1_048_576`. | Copy buffer size for client->DC direction in direct relay. |
| direct_relay_copy_buf_s2c_bytes | `usize` | `262144` | `8192..=2_097_152`. | Copy buffer size for DC->client direction in direct relay. | | direct_relay_copy_buf_s2c_bytes | `usize` | `262144` | `8192..=2_097_152`. | Copy buffer size for DC->client direction in direct relay. |
| crypto_pending_buffer | `usize` | `262144` | — | Max pending ciphertext buffer per client writer (bytes). | | crypto_pending_buffer | `usize` | `262144` | — | Max pending ciphertext buffer per client writer (bytes). |
@ -245,10 +243,6 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
| Parameter | Type | Default | Constraints / validation | Description | | Parameter | Type | Default | Constraints / validation | Description |
|---|---|---|---|---| |---|---|---|---|---|
| client_handshake | `u64` | `30` | — | Client handshake timeout. | | client_handshake | `u64` | `30` | — | Client handshake timeout. |
| relay_idle_policy_v2_enabled | `bool` | `true` | — | Enables soft/hard middle-relay client idle policy. |
| relay_client_idle_soft_secs | `u64` | `120` | Must be `> 0`; must be `<= relay_client_idle_hard_secs`. | Soft idle threshold for middle-relay client uplink inactivity (seconds). |
| relay_client_idle_hard_secs | `u64` | `360` | Must be `> 0`; must be `>= relay_client_idle_soft_secs`. | Hard idle threshold for middle-relay client uplink inactivity (seconds). |
| relay_idle_grace_after_downstream_activity_secs | `u64` | `30` | Must be `<= relay_client_idle_hard_secs`. | Extra hard-idle grace after recent downstream activity (seconds). |
| tg_connect | `u64` | `10` | — | Upstream Telegram connect timeout. | | tg_connect | `u64` | `10` | — | Upstream Telegram connect timeout. |
| client_keepalive | `u64` | `15` | — | Client keepalive timeout. | | client_keepalive | `u64` | `15` | — | Client keepalive timeout. |
| client_ack | `u64` | `90` | — | Client ACK timeout. | | client_ack | `u64` | `90` | — | Client ACK timeout. |
@ -261,9 +255,6 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
|---|---|---|---|---| |---|---|---|---|---|
| tls_domain | `String` | `"petrovich.ru"` | — | Primary TLS domain used in fake TLS handshake profile. | | tls_domain | `String` | `"petrovich.ru"` | — | Primary TLS domain used in fake TLS handshake profile. |
| tls_domains | `String[]` | `[]` | — | Additional TLS domains for generating multiple links. | | tls_domains | `String[]` | `[]` | — | Additional TLS domains for generating multiple links. |
| unknown_sni_action | `"drop" \| "mask"` | `"drop"` | — | Action for TLS ClientHello with unknown/non-configured SNI. |
| tls_fetch_scope | `String` | `""` | Value is trimmed during load; empty keeps default upstream routing behavior. | Upstream scope tag used for TLS-front metadata fetches. |
| tls_fetch | `Table` | built-in defaults | See `[censorship.tls_fetch]` section below. | TLS-front metadata fetch strategy settings. |
| mask | `bool` | `true` | — | Enables masking/fronting relay mode. | | mask | `bool` | `true` | — | Enables masking/fronting relay mode. |
| mask_host | `String \| null` | `null` | — | Upstream mask host for TLS fronting relay. | | mask_host | `String \| null` | `null` | — | Upstream mask host for TLS fronting relay. |
| mask_port | `u16` | `443` | — | Upstream mask port for TLS fronting relay. | | mask_port | `u16` | `443` | — | Upstream mask port for TLS fronting relay. |
@ -289,18 +280,6 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
| mask_timing_normalization_floor_ms | `u64` | `0` | Must be `> 0` when timing normalization is enabled; must be `<= ceiling`. | Lower bound (ms) for masking outcome normalization target. | | mask_timing_normalization_floor_ms | `u64` | `0` | Must be `> 0` when timing normalization is enabled; must be `<= ceiling`. | Lower bound (ms) for masking outcome normalization target. |
| mask_timing_normalization_ceiling_ms | `u64` | `0` | Must be `>= floor`; must be `<= 60000`. | Upper bound (ms) for masking outcome normalization target. | | mask_timing_normalization_ceiling_ms | `u64` | `0` | Must be `>= floor`; must be `<= 60000`. | Upper bound (ms) for masking outcome normalization target. |
## [censorship.tls_fetch]
| Parameter | Type | Default | Constraints / validation | Description |
|---|---|---|---|---|
| profiles | `("modern_chrome_like" \| "modern_firefox_like" \| "compat_tls12" \| "legacy_minimal")[]` | `["modern_chrome_like", "modern_firefox_like", "compat_tls12", "legacy_minimal"]` | Empty list falls back to defaults; values are deduplicated preserving order. | Ordered ClientHello profile fallback chain for TLS-front metadata fetch. |
| strict_route | `bool` | `true` | — | Fails closed on upstream-route connect errors instead of falling back to direct TCP when route is configured. |
| attempt_timeout_ms | `u64` | `5000` | Must be `> 0`. | Timeout budget per one TLS-fetch profile attempt (ms). |
| total_budget_ms | `u64` | `15000` | Must be `> 0`. | Total wall-clock budget across all TLS-fetch attempts (ms). |
| grease_enabled | `bool` | `false` | — | Enables GREASE-style random values in selected ClientHello extensions for fetch traffic. |
| deterministic | `bool` | `false` | — | Enables deterministic ClientHello randomness for debugging/tests. |
| profile_cache_ttl_secs | `u64` | `600` | `0` disables cache. | TTL for winner-profile cache entries used by TLS fetch path. |
### Shape-channel hardening notes (`[censorship]`) ### Shape-channel hardening notes (`[censorship]`)
These parameters are designed to reduce one specific fingerprint source during masking: the exact number of bytes sent from proxy to `mask_host` for invalid or probing traffic. These parameters are designed to reduce one specific fingerprint source during masking: the exact number of bytes sent from proxy to `mask_host` for invalid or probing traffic.

View File

@ -63,12 +63,9 @@ user3 = "00000000000000000000000000000003"
curl -s http://127.0.0.1:9091/v1/users | jq curl -s http://127.0.0.1:9091/v1/users | jq
``` ```
## "Unknown TLS SNI" Error
You probably updated tls_domain, but users are still connecting via old links with the previous domain.
## How to view metrics ## How to view metrics
1. Open the config `nano /etc/telemt/telemt.toml` 1. Open the config `nano /etc/telemt.toml`
2. Add the following parameters 2. Add the following parameters
```toml ```toml
[server] [server]

View File

@ -64,12 +64,9 @@ user3 = "00000000000000000000000000000003"
curl -s http://127.0.0.1:9091/v1/users | jq curl -s http://127.0.0.1:9091/v1/users | jq
``` ```
## Ошибка "Unknown TLS SNI"
Возможно, вы обновили tls_domain, но пользователи всё ещё пытаются подключаться по старым ссылкам с прежним доменом.
## Как посмотреть метрики ## Как посмотреть метрики
1. Открыть конфиг `nano /etc/telemt/telemt.toml` 1. Открыть конфиг `nano /etc/telemt.toml`
2. Добавить следующие параметры 2. Добавить следующие параметры
```toml ```toml
[server] [server]

View File

@ -27,12 +27,12 @@ chmod +x /bin/telemt
**0. Check port and generate secrets** **0. Check port and generate secrets**
The port you have selected for use should not be in the list: The port you have selected for use should be MISSING from the list, when:
```bash ```bash
netstat -lnp netstat -lnp
``` ```
Generate 16 bytes/32 characters in HEX format with OpenSSL or another way: Generate 16 bytes/32 characters HEX with OpenSSL or another way:
```bash ```bash
openssl rand -hex 16 openssl rand -hex 16
``` ```
@ -50,7 +50,7 @@ Save the obtained result somewhere. You will need it later!
**1. Place your config to /etc/telemt/telemt.toml** **1. Place your config to /etc/telemt/telemt.toml**
Create the config directory: Create config directory:
```bash ```bash
mkdir /etc/telemt mkdir /etc/telemt
``` ```
@ -59,7 +59,7 @@ Open nano
```bash ```bash
nano /etc/telemt/telemt.toml nano /etc/telemt/telemt.toml
``` ```
Insert your configuration: paste your config
```toml ```toml
# === General Settings === # === General Settings ===
@ -94,8 +94,7 @@ then Ctrl+S -> Ctrl+X to save
> [!WARNING] > [!WARNING]
> Replace the value of the hello parameter with the value you obtained in step 0. > Replace the value of the hello parameter with the value you obtained in step 0.
> Additionally, change the value of the tls_domain parameter to a different website. > Replace the value of the tls_domain parameter with another website.
> Changing the tls_domain parameter will break all links that use the old domain!
--- ---
@ -106,14 +105,14 @@ useradd -d /opt/telemt -m -r -U telemt
chown -R telemt:telemt /etc/telemt chown -R telemt:telemt /etc/telemt
``` ```
**3. Create service in /etc/systemd/system/telemt.service** **3. Create service on /etc/systemd/system/telemt.service**
Open nano Open nano
```bash ```bash
nano /etc/systemd/system/telemt.service nano /etc/systemd/system/telemt.service
``` ```
Insert this Systemd module: paste this Systemd Module
```bash ```bash
[Unit] [Unit]
Description=Telemt Description=Telemt
@ -148,16 +147,13 @@ systemctl daemon-reload
**6.** For automatic startup at system boot, enter `systemctl enable telemt` **6.** For automatic startup at system boot, enter `systemctl enable telemt`
**7.** To get the link(s), enter: **7.** To get the link(s), enter
```bash ```bash
curl -s http://127.0.0.1:9091/v1/users | jq curl -s http://127.0.0.1:9091/v1/users | jq
``` ```
> Any number of people can use one link. > Any number of people can use one link.
> [!WARNING]
> Only the command from step 7 can provide a working link. Do not try to create it yourself or copy it from anywhere if you are not sure what you are doing!
--- ---
# Telemt via Docker Compose # Telemt via Docker Compose

View File

@ -95,7 +95,6 @@ hello = "00000000000000000000000000000000"
> [!WARNING] > [!WARNING]
> Замените значение параметра hello на значение, которое вы получили в пункте 0. > Замените значение параметра hello на значение, которое вы получили в пункте 0.
> Так же замените значение параметра tls_domain на другой сайт. > Так же замените значение параметра tls_domain на другой сайт.
> Изменение параметра tls_domain сделает нерабочими все ссылки, использующие старый домен!
--- ---

View File

@ -651,9 +651,6 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
} }
if old.general.me_route_no_writer_mode != new.general.me_route_no_writer_mode if old.general.me_route_no_writer_mode != new.general.me_route_no_writer_mode
|| old.general.me_route_no_writer_wait_ms != new.general.me_route_no_writer_wait_ms || old.general.me_route_no_writer_wait_ms != new.general.me_route_no_writer_wait_ms
|| old.general.me_route_hybrid_max_wait_ms != new.general.me_route_hybrid_max_wait_ms
|| old.general.me_route_blocking_send_timeout_ms
!= new.general.me_route_blocking_send_timeout_ms
|| old.general.me_route_inline_recovery_attempts || old.general.me_route_inline_recovery_attempts
!= new.general.me_route_inline_recovery_attempts != new.general.me_route_inline_recovery_attempts
|| old.general.me_route_inline_recovery_wait_ms || old.general.me_route_inline_recovery_wait_ms

View File

@ -277,8 +277,6 @@ pub(crate) async fn initialize_me_pool(
config.general.me_warn_rate_limit_ms, config.general.me_warn_rate_limit_ms,
config.general.me_route_no_writer_mode, config.general.me_route_no_writer_mode,
config.general.me_route_no_writer_wait_ms, config.general.me_route_no_writer_wait_ms,
config.general.me_route_hybrid_max_wait_ms,
config.general.me_route_blocking_send_timeout_ms,
config.general.me_route_inline_recovery_attempts, config.general.me_route_inline_recovery_attempts,
config.general.me_route_inline_recovery_wait_ms, config.general.me_route_inline_recovery_wait_ms,
); );

View File

@ -115,7 +115,8 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
); );
std::process::exit(1); std::process::exit(1);
} }
} else if let Err(e) = std::fs::create_dir_all(data_path) { } else {
if let Err(e) = std::fs::create_dir_all(data_path) {
eprintln!( eprintln!(
"[telemt] Can't create data_path {}: {}", "[telemt] Can't create data_path {}: {}",
data_path.display(), data_path.display(),
@ -123,6 +124,7 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
); );
std::process::exit(1); std::process::exit(1);
} }
}
if let Err(e) = std::env::set_current_dir(data_path) { if let Err(e) = std::env::set_current_dir(data_path) {
eprintln!( eprintln!(

View File

@ -1558,40 +1558,6 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0 0
} }
); );
let _ = writeln!(
out,
"# HELP telemt_me_endpoint_quarantine_unexpected_total ME endpoint quarantines caused by unexpected writer removals"
);
let _ = writeln!(
out,
"# TYPE telemt_me_endpoint_quarantine_unexpected_total counter"
);
let _ = writeln!(
out,
"telemt_me_endpoint_quarantine_unexpected_total {}",
if me_allows_normal {
stats.get_me_endpoint_quarantine_unexpected_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_endpoint_quarantine_draining_suppressed_total Draining writer removals that skipped endpoint quarantine"
);
let _ = writeln!(
out,
"# TYPE telemt_me_endpoint_quarantine_draining_suppressed_total counter"
);
let _ = writeln!(
out,
"telemt_me_endpoint_quarantine_draining_suppressed_total {}",
if me_allows_normal {
stats.get_me_endpoint_quarantine_draining_suppressed_total()
} else {
0
}
);
let _ = writeln!( let _ = writeln!(
out, out,
@ -2352,20 +2318,6 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0 0
} }
); );
let _ = writeln!(
out,
"# HELP telemt_me_hybrid_timeout_total ME hybrid route timeouts after bounded retry window"
);
let _ = writeln!(out, "# TYPE telemt_me_hybrid_timeout_total counter");
let _ = writeln!(
out,
"telemt_me_hybrid_timeout_total {}",
if me_allows_normal {
stats.get_me_hybrid_timeout_total()
} else {
0
}
);
let _ = writeln!( let _ = writeln!(
out, out,
"# HELP telemt_me_async_recovery_trigger_total Async ME recovery trigger attempts from route path" "# HELP telemt_me_async_recovery_trigger_total Async ME recovery trigger attempts from route path"
@ -2656,9 +2608,6 @@ mod tests {
stats.increment_me_d2c_write_mode(crate::stats::MeD2cWriteMode::Coalesced); stats.increment_me_d2c_write_mode(crate::stats::MeD2cWriteMode::Coalesced);
stats.increment_me_d2c_quota_reject_total(crate::stats::MeD2cQuotaRejectStage::PostWrite); stats.increment_me_d2c_quota_reject_total(crate::stats::MeD2cQuotaRejectStage::PostWrite);
stats.observe_me_d2c_frame_buf_shrink(4096); stats.observe_me_d2c_frame_buf_shrink(4096);
stats.increment_me_endpoint_quarantine_total();
stats.increment_me_endpoint_quarantine_unexpected_total();
stats.increment_me_endpoint_quarantine_draining_suppressed_total();
stats.increment_user_connects("alice"); stats.increment_user_connects("alice");
stats.increment_user_curr_connects("alice"); stats.increment_user_curr_connects("alice");
stats.add_user_octets_from("alice", 1024); stats.add_user_octets_from("alice", 1024);
@ -2709,9 +2658,6 @@ mod tests {
assert!(output.contains("telemt_me_d2c_quota_reject_total{stage=\"post_write\"} 1")); assert!(output.contains("telemt_me_d2c_quota_reject_total{stage=\"post_write\"} 1"));
assert!(output.contains("telemt_me_d2c_frame_buf_shrink_total 1")); assert!(output.contains("telemt_me_d2c_frame_buf_shrink_total 1"));
assert!(output.contains("telemt_me_d2c_frame_buf_shrink_bytes_total 4096")); assert!(output.contains("telemt_me_d2c_frame_buf_shrink_bytes_total 4096"));
assert!(output.contains("telemt_me_endpoint_quarantine_total 1"));
assert!(output.contains("telemt_me_endpoint_quarantine_unexpected_total 1"));
assert!(output.contains("telemt_me_endpoint_quarantine_draining_suppressed_total 1"));
assert!(output.contains("telemt_user_connections_total{user=\"alice\"} 1")); assert!(output.contains("telemt_user_connections_total{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_connections_current{user=\"alice\"} 1")); assert!(output.contains("telemt_user_connections_current{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_octets_from_client{user=\"alice\"} 1024")); assert!(output.contains("telemt_user_octets_from_client{user=\"alice\"} 1024"));
@ -2778,12 +2724,6 @@ mod tests {
assert!(output.contains("# TYPE telemt_me_d2c_write_mode_total counter")); assert!(output.contains("# TYPE telemt_me_d2c_write_mode_total counter"));
assert!(output.contains("# TYPE telemt_me_d2c_batch_frames_bucket_total counter")); assert!(output.contains("# TYPE telemt_me_d2c_batch_frames_bucket_total counter"));
assert!(output.contains("# TYPE telemt_me_d2c_flush_duration_us_bucket_total counter")); assert!(output.contains("# TYPE telemt_me_d2c_flush_duration_us_bucket_total counter"));
assert!(output.contains("# TYPE telemt_me_endpoint_quarantine_total counter"));
assert!(output.contains("# TYPE telemt_me_endpoint_quarantine_unexpected_total counter"));
assert!(
output
.contains("# TYPE telemt_me_endpoint_quarantine_draining_suppressed_total counter")
);
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter")); assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
assert!( assert!(
output output

View File

@ -4,7 +4,7 @@ use std::collections::{BTreeSet, HashMap};
use std::future::Future; use std::future::Future;
use std::hash::{BuildHasher, Hash}; use std::hash::{BuildHasher, Hash};
use std::net::{IpAddr, SocketAddr}; use std::net::{IpAddr, SocketAddr};
use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, Mutex, OnceLock}; use std::sync::{Arc, Mutex, OnceLock};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
@ -36,6 +36,7 @@ enum C2MeCommand {
const DESYNC_DEDUP_WINDOW: Duration = Duration::from_secs(60); const DESYNC_DEDUP_WINDOW: Duration = Duration::from_secs(60);
const DESYNC_DEDUP_MAX_ENTRIES: usize = 65_536; const DESYNC_DEDUP_MAX_ENTRIES: usize = 65_536;
const DESYNC_DEDUP_PRUNE_SCAN_LIMIT: usize = 1024;
const DESYNC_FULL_CACHE_EMIT_MIN_INTERVAL: Duration = Duration::from_millis(1000); const DESYNC_FULL_CACHE_EMIT_MIN_INTERVAL: Duration = Duration::from_millis(1000);
const DESYNC_ERROR_CLASS: &str = "frame_too_large_crypto_desync"; const DESYNC_ERROR_CLASS: &str = "frame_too_large_crypto_desync";
const C2ME_CHANNEL_CAPACITY_FALLBACK: usize = 128; const C2ME_CHANNEL_CAPACITY_FALLBACK: usize = 128;
@ -56,21 +57,12 @@ const ME_D2C_FRAME_BUF_SHRINK_HYSTERESIS_FACTOR: usize = 2;
const ME_D2C_SINGLE_WRITE_COALESCE_MAX_BYTES: usize = 128 * 1024; const ME_D2C_SINGLE_WRITE_COALESCE_MAX_BYTES: usize = 128 * 1024;
const QUOTA_RESERVE_SPIN_RETRIES: usize = 32; const QUOTA_RESERVE_SPIN_RETRIES: usize = 32;
static DESYNC_DEDUP: OnceLock<DashMap<u64, Instant>> = OnceLock::new(); static DESYNC_DEDUP: OnceLock<DashMap<u64, Instant>> = OnceLock::new();
static DESYNC_DEDUP_PREVIOUS: OnceLock<DashMap<u64, Instant>> = OnceLock::new();
static DESYNC_HASHER: OnceLock<RandomState> = OnceLock::new(); static DESYNC_HASHER: OnceLock<RandomState> = OnceLock::new();
static DESYNC_FULL_CACHE_LAST_EMIT_AT: OnceLock<Mutex<Option<Instant>>> = OnceLock::new(); static DESYNC_FULL_CACHE_LAST_EMIT_AT: OnceLock<Mutex<Option<Instant>>> = OnceLock::new();
static DESYNC_DEDUP_ROTATION_STATE: OnceLock<Mutex<DesyncDedupRotationState>> = OnceLock::new(); static DESYNC_DEDUP_EVER_SATURATED: OnceLock<AtomicBool> = OnceLock::new();
// Invariant for async callers:
// this std::sync::Mutex is allowed only because critical sections are short,
// synchronous, and MUST never cross an `.await`.
static RELAY_IDLE_CANDIDATE_REGISTRY: OnceLock<Mutex<RelayIdleCandidateRegistry>> = OnceLock::new(); static RELAY_IDLE_CANDIDATE_REGISTRY: OnceLock<Mutex<RelayIdleCandidateRegistry>> = OnceLock::new();
static RELAY_IDLE_MARK_SEQ: AtomicU64 = AtomicU64::new(0); static RELAY_IDLE_MARK_SEQ: AtomicU64 = AtomicU64::new(0);
#[derive(Default)]
struct DesyncDedupRotationState {
current_started_at: Option<Instant>,
}
struct RelayForensicsState { struct RelayForensicsState {
trace_id: u64, trace_id: u64,
conn_id: u64, conn_id: u64,
@ -103,7 +95,6 @@ fn relay_idle_candidate_registry() -> &'static Mutex<RelayIdleCandidateRegistry>
fn relay_idle_candidate_registry_lock() -> std::sync::MutexGuard<'static, RelayIdleCandidateRegistry> fn relay_idle_candidate_registry_lock() -> std::sync::MutexGuard<'static, RelayIdleCandidateRegistry>
{ {
// Keep lock scope narrow and synchronous: callers must drop guard before any `.await`.
let registry = relay_idle_candidate_registry(); let registry = relay_idle_candidate_registry();
match registry.lock() { match registry.lock() {
Ok(guard) => guard, Ok(guard) => guard,
@ -321,76 +312,64 @@ fn should_emit_full_desync(key: u64, all_full: bool, now: Instant) -> bool {
return true; return true;
} }
let dedup_current = DESYNC_DEDUP.get_or_init(DashMap::new); let dedup = DESYNC_DEDUP.get_or_init(DashMap::new);
let dedup_previous = DESYNC_DEDUP_PREVIOUS.get_or_init(DashMap::new); let saturated_before = dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES;
let rotation_state = let ever_saturated = DESYNC_DEDUP_EVER_SATURATED.get_or_init(|| AtomicBool::new(false));
DESYNC_DEDUP_ROTATION_STATE.get_or_init(|| Mutex::new(DesyncDedupRotationState::default())); if saturated_before {
ever_saturated.store(true, Ordering::Relaxed);
let mut state = match rotation_state.lock() {
Ok(guard) => guard,
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = DesyncDedupRotationState::default();
rotation_state.clear_poison();
guard
}
};
let rotate_now = match state.current_started_at {
Some(current_started_at) => match now.checked_duration_since(current_started_at) {
Some(elapsed) => elapsed >= DESYNC_DEDUP_WINDOW,
None => true,
},
None => true,
};
if rotate_now {
dedup_previous.clear();
for entry in dedup_current.iter() {
dedup_previous.insert(*entry.key(), *entry.value());
}
dedup_current.clear();
state.current_started_at = Some(now);
} }
if let Some(seen_at) = dedup_current.get(&key).map(|entry| *entry.value()) { if let Some(mut seen_at) = dedup.get_mut(&key) {
let within_window = match now.checked_duration_since(seen_at) { if now.duration_since(*seen_at) >= DESYNC_DEDUP_WINDOW {
Some(elapsed) => elapsed < DESYNC_DEDUP_WINDOW, *seen_at = now;
None => true,
};
if within_window {
return false;
}
dedup_current.insert(key, now);
return true; return true;
} }
if let Some(seen_at) = dedup_previous.get(&key).map(|entry| *entry.value()) {
let within_window = match now.checked_duration_since(seen_at) {
Some(elapsed) => elapsed < DESYNC_DEDUP_WINDOW,
None => true,
};
if within_window {
// Keep the original timestamp when promoting from previous bucket,
// so dedup expiry remains tied to first-seen time.
dedup_current.insert(key, seen_at);
return false; return false;
} }
dedup_previous.remove(&key);
if dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES {
let mut stale_keys = Vec::new();
let mut oldest_candidate: Option<(u64, Instant)> = None;
for entry in dedup.iter().take(DESYNC_DEDUP_PRUNE_SCAN_LIMIT) {
let key = *entry.key();
let seen_at = *entry.value();
match oldest_candidate {
Some((_, oldest_seen)) if seen_at >= oldest_seen => {}
_ => oldest_candidate = Some((key, seen_at)),
} }
if dedup_current.len() >= DESYNC_DEDUP_MAX_ENTRIES { if now.duration_since(seen_at) >= DESYNC_DEDUP_WINDOW {
// Bounded eviction path: rotate buckets instead of scanning/evicting stale_keys.push(*entry.key());
// arbitrary entries from a saturated single map.
dedup_previous.clear();
for entry in dedup_current.iter() {
dedup_previous.insert(*entry.key(), *entry.value());
} }
dedup_current.clear(); }
state.current_started_at = Some(now); for stale_key in stale_keys {
dedup_current.insert(key, now); dedup.remove(&stale_key);
}
if dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES {
let Some((evict_key, _)) = oldest_candidate else {
return false;
};
dedup.remove(&evict_key);
dedup.insert(key, now);
return should_emit_full_desync_full_cache(now);
}
}
dedup.insert(key, now);
let saturated_after = dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES;
// Preserve the first sequential insert that reaches capacity as a normal
// emit, while still gating concurrent newcomer churn after the cache has
// ever been observed at saturation.
let was_ever_saturated = if saturated_after {
ever_saturated.swap(true, Ordering::Relaxed)
} else {
ever_saturated.load(Ordering::Relaxed)
};
if saturated_before || (saturated_after && was_ever_saturated) {
should_emit_full_desync_full_cache(now) should_emit_full_desync_full_cache(now)
} else { } else {
dedup_current.insert(key, now);
true true
} }
} }
@ -426,20 +405,8 @@ fn clear_desync_dedup_for_testing() {
if let Some(dedup) = DESYNC_DEDUP.get() { if let Some(dedup) = DESYNC_DEDUP.get() {
dedup.clear(); dedup.clear();
} }
if let Some(dedup_previous) = DESYNC_DEDUP_PREVIOUS.get() { if let Some(ever_saturated) = DESYNC_DEDUP_EVER_SATURATED.get() {
dedup_previous.clear(); ever_saturated.store(false, Ordering::Relaxed);
}
if let Some(rotation_state) = DESYNC_DEDUP_ROTATION_STATE.get() {
match rotation_state.lock() {
Ok(mut guard) => {
*guard = DesyncDedupRotationState::default();
}
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = DesyncDedupRotationState::default();
rotation_state.clear_poison();
}
}
} }
if let Some(last_emit_at) = DESYNC_FULL_CACHE_LAST_EMIT_AT.get() { if let Some(last_emit_at) = DESYNC_FULL_CACHE_LAST_EMIT_AT.get() {
match last_emit_at.lock() { match last_emit_at.lock() {

View File

@ -128,8 +128,6 @@ pub struct Stats {
me_crc_mismatch: AtomicU64, me_crc_mismatch: AtomicU64,
me_seq_mismatch: AtomicU64, me_seq_mismatch: AtomicU64,
me_endpoint_quarantine_total: AtomicU64, me_endpoint_quarantine_total: AtomicU64,
me_endpoint_quarantine_unexpected_total: AtomicU64,
me_endpoint_quarantine_draining_suppressed_total: AtomicU64,
me_kdf_drift_total: AtomicU64, me_kdf_drift_total: AtomicU64,
me_kdf_port_only_drift_total: AtomicU64, me_kdf_port_only_drift_total: AtomicU64,
me_hardswap_pending_reuse_total: AtomicU64, me_hardswap_pending_reuse_total: AtomicU64,
@ -236,7 +234,6 @@ pub struct Stats {
me_writer_restored_same_endpoint_total: AtomicU64, me_writer_restored_same_endpoint_total: AtomicU64,
me_writer_restored_fallback_total: AtomicU64, me_writer_restored_fallback_total: AtomicU64,
me_no_writer_failfast_total: AtomicU64, me_no_writer_failfast_total: AtomicU64,
me_hybrid_timeout_total: AtomicU64,
me_async_recovery_trigger_total: AtomicU64, me_async_recovery_trigger_total: AtomicU64,
me_inline_recovery_total: AtomicU64, me_inline_recovery_total: AtomicU64,
ip_reservation_rollback_tcp_limit_total: AtomicU64, ip_reservation_rollback_tcp_limit_total: AtomicU64,
@ -1206,11 +1203,6 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed); .fetch_add(1, Ordering::Relaxed);
} }
} }
pub fn increment_me_hybrid_timeout_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_hybrid_timeout_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_async_recovery_trigger_total(&self) { pub fn increment_me_async_recovery_trigger_total(&self) {
if self.telemetry_me_allows_normal() { if self.telemetry_me_allows_normal() {
self.me_async_recovery_trigger_total self.me_async_recovery_trigger_total
@ -1253,18 +1245,6 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed); .fetch_add(1, Ordering::Relaxed);
} }
} }
pub fn increment_me_endpoint_quarantine_unexpected_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_endpoint_quarantine_unexpected_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_endpoint_quarantine_draining_suppressed_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_endpoint_quarantine_draining_suppressed_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_kdf_drift_total(&self) { pub fn increment_me_kdf_drift_total(&self) {
if self.telemetry_me_allows_normal() { if self.telemetry_me_allows_normal() {
self.me_kdf_drift_total.fetch_add(1, Ordering::Relaxed); self.me_kdf_drift_total.fetch_add(1, Ordering::Relaxed);
@ -1517,14 +1497,6 @@ impl Stats {
pub fn get_me_endpoint_quarantine_total(&self) -> u64 { pub fn get_me_endpoint_quarantine_total(&self) -> u64 {
self.me_endpoint_quarantine_total.load(Ordering::Relaxed) self.me_endpoint_quarantine_total.load(Ordering::Relaxed)
} }
pub fn get_me_endpoint_quarantine_unexpected_total(&self) -> u64 {
self.me_endpoint_quarantine_unexpected_total
.load(Ordering::Relaxed)
}
pub fn get_me_endpoint_quarantine_draining_suppressed_total(&self) -> u64 {
self.me_endpoint_quarantine_draining_suppressed_total
.load(Ordering::Relaxed)
}
pub fn get_me_kdf_drift_total(&self) -> u64 { pub fn get_me_kdf_drift_total(&self) -> u64 {
self.me_kdf_drift_total.load(Ordering::Relaxed) self.me_kdf_drift_total.load(Ordering::Relaxed)
} }
@ -1904,9 +1876,6 @@ impl Stats {
pub fn get_me_no_writer_failfast_total(&self) -> u64 { pub fn get_me_no_writer_failfast_total(&self) -> u64 {
self.me_no_writer_failfast_total.load(Ordering::Relaxed) self.me_no_writer_failfast_total.load(Ordering::Relaxed)
} }
pub fn get_me_hybrid_timeout_total(&self) -> u64 {
self.me_hybrid_timeout_total.load(Ordering::Relaxed)
}
pub fn get_me_async_recovery_trigger_total(&self) -> u64 { pub fn get_me_async_recovery_trigger_total(&self) -> u64 {
self.me_async_recovery_trigger_total.load(Ordering::Relaxed) self.me_async_recovery_trigger_total.load(Ordering::Relaxed)
} }

View File

@ -244,11 +244,12 @@ fn order_profiles(
if let Some(pos) = ordered if let Some(pos) = ordered
.iter() .iter()
.position(|profile| *profile == cached.profile) .position(|profile| *profile == cached.profile)
&& pos != 0
{ {
if pos != 0 {
ordered.swap(0, pos); ordered.swap(0, pos);
} }
} }
}
ordered ordered
} }

View File

@ -314,6 +314,53 @@ async fn run_update_cycle(
reinit_tx: &mpsc::Sender<MeReinitTrigger>, reinit_tx: &mpsc::Sender<MeReinitTrigger>,
) { ) {
let upstream = pool.upstream.clone(); let upstream = pool.upstream.clone();
pool.update_runtime_reinit_policy(
cfg.general.hardswap,
cfg.general.me_pool_drain_ttl_secs,
cfg.general.me_instadrain,
cfg.general.me_pool_drain_threshold,
cfg.general.me_pool_drain_soft_evict_enabled,
cfg.general.me_pool_drain_soft_evict_grace_secs,
cfg.general.me_pool_drain_soft_evict_per_writer,
cfg.general.me_pool_drain_soft_evict_budget_per_core,
cfg.general.me_pool_drain_soft_evict_cooldown_ms,
cfg.general.effective_me_pool_force_close_secs(),
cfg.general.me_pool_min_fresh_ratio,
cfg.general.me_hardswap_warmup_delay_min_ms,
cfg.general.me_hardswap_warmup_delay_max_ms,
cfg.general.me_hardswap_warmup_extra_passes,
cfg.general.me_hardswap_warmup_pass_backoff_base_ms,
cfg.general.me_bind_stale_mode,
cfg.general.me_bind_stale_ttl_secs,
cfg.general.me_secret_atomic_snapshot,
cfg.general.me_deterministic_writer_sort,
cfg.general.me_writer_pick_mode,
cfg.general.me_writer_pick_sample_size,
cfg.general.me_single_endpoint_shadow_writers,
cfg.general.me_single_endpoint_outage_mode_enabled,
cfg.general.me_single_endpoint_outage_disable_quarantine,
cfg.general.me_single_endpoint_outage_backoff_min_ms,
cfg.general.me_single_endpoint_outage_backoff_max_ms,
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
cfg.general.me_floor_mode,
cfg.general.me_adaptive_floor_idle_secs,
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
cfg.general.me_adaptive_floor_min_writers_multi_endpoint,
cfg.general.me_adaptive_floor_recover_grace_secs,
cfg.general.me_adaptive_floor_writers_per_core_total,
cfg.general.me_adaptive_floor_cpu_cores_override,
cfg.general
.me_adaptive_floor_max_extra_writers_single_per_core,
cfg.general
.me_adaptive_floor_max_extra_writers_multi_per_core,
cfg.general.me_adaptive_floor_max_active_writers_per_core,
cfg.general.me_adaptive_floor_max_warm_writers_per_core,
cfg.general.me_adaptive_floor_max_active_writers_global,
cfg.general.me_adaptive_floor_max_warm_writers_global,
cfg.general.me_health_interval_ms_unhealthy,
cfg.general.me_health_interval_ms_healthy,
cfg.general.me_warn_rate_limit_ms,
);
let required_cfg_snapshots = cfg.general.me_config_stable_snapshots.max(1); let required_cfg_snapshots = cfg.general.me_config_stable_snapshots.max(1);
let required_secret_snapshots = cfg.general.proxy_secret_stable_snapshots.max(1); let required_secret_snapshots = cfg.general.proxy_secret_stable_snapshots.max(1);

View File

@ -161,7 +161,7 @@ impl MePool {
} else { } else {
let connect_fut = async { let connect_fut = async {
if addr.is_ipv6() if addr.is_ipv6()
&& let Some(v6) = self.nat_runtime.detected_ipv6 && let Some(v6) = self.detected_ipv6
{ {
match TcpSocket::new_v6() { match TcpSocket::new_v6() {
Ok(sock) => { Ok(sock) => {
@ -305,7 +305,7 @@ impl MePool {
} }
MeSocksKdfPolicy::Compat => { MeSocksKdfPolicy::Compat => {
self.stats.increment_me_socks_kdf_compat_fallback(); self.stats.increment_me_socks_kdf_compat_fallback();
if self.nat_runtime.nat_probe { if self.nat_probe {
let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress); let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress);
self.maybe_reflect_public_addr(family, bind_ip).await self.maybe_reflect_public_addr(family, bind_ip).await
} else { } else {
@ -313,7 +313,7 @@ impl MePool {
} }
} }
} }
} else if self.nat_runtime.nat_probe { } else if self.nat_probe {
let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress); let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress);
self.maybe_reflect_public_addr(family, bind_ip).await self.maybe_reflect_public_addr(family, bind_ip).await
} else { } else {
@ -343,10 +343,7 @@ impl MePool {
.unwrap_or_default() .unwrap_or_default()
.as_secs() as u32; .as_secs() as u32;
let secret_atomic_snapshot = self let secret_atomic_snapshot = self.secret_atomic_snapshot.load(Ordering::Relaxed);
.writer_selection_policy
.secret_atomic_snapshot
.load(Ordering::Relaxed);
let (ks, secret) = if secret_atomic_snapshot { let (ks, secret) = if secret_atomic_snapshot {
let snapshot = self.secret_snapshot().await; let snapshot = self.secret_snapshot().await;
(snapshot.key_selector, snapshot.secret) (snapshot.key_selector, snapshot.secret)

View File

@ -7,8 +7,6 @@ use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use rand::RngExt; use rand::RngExt;
use tokio::sync::Semaphore;
use tokio::task::JoinSet;
use tracing::{debug, info, warn}; use tracing::{debug, info, warn};
use crate::config::MeFloorMode; use crate::config::MeFloorMode;
@ -16,7 +14,6 @@ use crate::crypto::SecureRandom;
use crate::network::IpFamily; use crate::network::IpFamily;
use super::MePool; use super::MePool;
use super::pool::MeFamilyRuntimeState;
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
#[allow(dead_code)] #[allow(dead_code)]
@ -30,9 +27,6 @@ const HEALTH_RECONNECT_BUDGET_PER_CORE: usize = 2;
const HEALTH_RECONNECT_BUDGET_PER_DC: usize = 1; const HEALTH_RECONNECT_BUDGET_PER_DC: usize = 1;
const HEALTH_RECONNECT_BUDGET_MIN: usize = 4; const HEALTH_RECONNECT_BUDGET_MIN: usize = 4;
const HEALTH_RECONNECT_BUDGET_MAX: usize = 128; const HEALTH_RECONNECT_BUDGET_MAX: usize = 128;
const FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD: u32 = 5;
const FAMILY_SUPPRESS_DURATION_SECS: u64 = 60;
const FAMILY_RECOVER_SUCCESS_STREAK_TARGET: u32 = 2;
const HEALTH_DRAIN_CLOSE_BUDGET_PER_CORE: usize = 16; const HEALTH_DRAIN_CLOSE_BUDGET_PER_CORE: usize = 16;
const HEALTH_DRAIN_CLOSE_BUDGET_MIN: usize = 16; const HEALTH_DRAIN_CLOSE_BUDGET_MIN: usize = 16;
const HEALTH_DRAIN_CLOSE_BUDGET_MAX: usize = 256; const HEALTH_DRAIN_CLOSE_BUDGET_MAX: usize = 256;
@ -62,17 +56,6 @@ struct FamilyFloorPlan {
target_writers_total: usize, target_writers_total: usize,
} }
#[derive(Debug)]
struct FamilyReconnectOutcome {
key: (i32, IpFamily),
dc: i32,
family: IpFamily,
alive: usize,
required: usize,
endpoint_count: usize,
restored: usize,
}
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) { pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new(); let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new();
let mut next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new(); let mut next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
@ -95,7 +78,6 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
}; };
tokio::time::sleep(interval).await; tokio::time::sleep(interval).await;
pool.prune_closed_writers().await; pool.prune_closed_writers().await;
pool.sweep_endpoint_quarantine().await;
reap_draining_writers(&pool, &mut drain_warn_next_allowed).await; reap_draining_writers(&pool, &mut drain_warn_next_allowed).await;
let v4_degraded = check_family( let v4_degraded = check_family(
IpFamily::V4, IpFamily::V4,
@ -131,8 +113,6 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut floor_warn_next_allowed, &mut floor_warn_next_allowed,
) )
.await; .await;
update_family_runtime_state(&pool, IpFamily::V4, v4_degraded);
update_family_runtime_state(&pool, IpFamily::V6, v6_degraded);
degraded_interval = v4_degraded || v6_degraded; degraded_interval = v4_degraded || v6_degraded;
} }
} }
@ -155,11 +135,9 @@ pub(super) async fn reap_draining_writers(
let now_epoch_secs = MePool::now_epoch_secs(); let now_epoch_secs = MePool::now_epoch_secs();
let now = Instant::now(); let now = Instant::now();
let drain_ttl_secs = pool let drain_ttl_secs = pool
.drain_runtime
.me_pool_drain_ttl_secs .me_pool_drain_ttl_secs
.load(std::sync::atomic::Ordering::Relaxed); .load(std::sync::atomic::Ordering::Relaxed);
let drain_threshold = pool let drain_threshold = pool
.drain_runtime
.me_pool_drain_threshold .me_pool_drain_threshold
.load(std::sync::atomic::Ordering::Relaxed); .load(std::sync::atomic::Ordering::Relaxed);
let activity = pool.registry.writer_activity_snapshot().await; let activity = pool.registry.writer_activity_snapshot().await;
@ -243,10 +221,7 @@ pub(super) async fn reap_draining_writers(
endpoint = %writer.addr, endpoint = %writer.addr,
generation = writer.generation, generation = writer.generation,
drain_ttl_secs, drain_ttl_secs,
force_close_secs = pool force_close_secs = pool.me_pool_force_close_secs.load(std::sync::atomic::Ordering::Relaxed),
.drain_runtime
.me_pool_force_close_secs
.load(std::sync::atomic::Ordering::Relaxed),
allow_drain_fallback = writer.allow_drain_fallback, allow_drain_fallback = writer.allow_drain_fallback,
"ME draining writer remains non-empty past drain TTL" "ME draining writer remains non-empty past drain TTL"
); );
@ -390,8 +365,7 @@ async fn check_family(
endpoints.sort_unstable(); endpoints.sort_unstable();
endpoints.dedup(); endpoints.dedup();
} }
let reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len()); let mut reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len());
let reconnect_sem = Arc::new(Semaphore::new(reconnect_budget));
if pool.floor_mode() == MeFloorMode::Static { if pool.floor_mode() == MeFloorMode::Static {
adaptive_idle_since.clear(); adaptive_idle_since.clear();
@ -448,10 +422,6 @@ async fn check_family(
floor_plan.active_writers_current, floor_plan.active_writers_current,
floor_plan.warm_writers_current, floor_plan.warm_writers_current,
); );
let live_writer_ids_by_addr = Arc::new(live_writer_ids_by_addr);
let writer_idle_since = Arc::new(writer_idle_since);
let bound_clients_by_writer = Arc::new(bound_clients_by_writer);
let mut reconnect_set = JoinSet::<FamilyReconnectOutcome>::new();
for (dc, endpoints) in dc_endpoints { for (dc, endpoints) in dc_endpoints {
if endpoints.is_empty() { if endpoints.is_empty() {
@ -491,7 +461,7 @@ async fn check_family(
required, required,
outage_backoff, outage_backoff,
outage_next_attempt, outage_next_attempt,
&reconnect_sem, &mut reconnect_budget,
) )
.await; .await;
continue; continue;
@ -525,9 +495,9 @@ async fn check_family(
&endpoints, &endpoints,
alive, alive,
required, required,
live_writer_ids_by_addr.as_ref(), &live_writer_ids_by_addr,
writer_idle_since.as_ref(), &writer_idle_since,
bound_clients_by_writer.as_ref(), &bound_clients_by_writer,
idle_refresh_next_attempt, idle_refresh_next_attempt,
) )
.await; .await;
@ -540,8 +510,8 @@ async fn check_family(
&endpoints, &endpoints,
alive, alive,
required, required,
live_writer_ids_by_addr.as_ref(), &live_writer_ids_by_addr,
bound_clients_by_writer.as_ref(), &bound_clients_by_writer,
shadow_rotate_deadline, shadow_rotate_deadline,
) )
.await; .await;
@ -551,8 +521,8 @@ async fn check_family(
family_degraded = true; family_degraded = true;
let now = Instant::now(); let now = Instant::now();
if reconnect_sem.available_permits() == 0 { if reconnect_budget == 0 {
let base_ms = pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64; let base_ms = pool.me_reconnect_backoff_base.as_millis() as u64;
let next_ms = (*backoff.get(&key).unwrap_or(&base_ms)).max(base_ms); let next_ms = (*backoff.get(&key).unwrap_or(&base_ms)).max(base_ms);
let jitter = next_ms / JITTER_FRAC_NUM; let jitter = next_ms / JITTER_FRAC_NUM;
let wait = Duration::from_millis(next_ms) let wait = Duration::from_millis(next_ms)
@ -575,10 +545,7 @@ async fn check_family(
continue; continue;
} }
let max_concurrent = pool let max_concurrent = pool.me_reconnect_max_concurrent_per_dc.max(1) as usize;
.reconnect_runtime
.me_reconnect_max_concurrent_per_dc
.max(1) as usize;
if *inflight.get(&key).unwrap_or(&0) >= max_concurrent { if *inflight.get(&key).unwrap_or(&0) >= max_concurrent {
continue; continue;
} }
@ -597,165 +564,117 @@ async fn check_family(
continue; continue;
} }
*inflight.entry(key).or_insert(0) += 1; *inflight.entry(key).or_insert(0) += 1;
let pool_for_reconnect = pool.clone();
let rng_for_reconnect = rng.clone();
let reconnect_sem_for_dc = reconnect_sem.clone();
let endpoints_for_dc = endpoints.clone();
let live_writer_ids_by_addr_for_dc = live_writer_ids_by_addr.clone();
let writer_idle_since_for_dc = writer_idle_since.clone();
let bound_clients_by_writer_for_dc = bound_clients_by_writer.clone();
let active_cap_effective_total = floor_plan.active_cap_effective_total;
reconnect_set.spawn(async move {
let mut restored = 0usize; let mut restored = 0usize;
for _ in 0..missing { for _ in 0..missing {
let Ok(reconnect_permit) = reconnect_sem_for_dc.clone().try_acquire_owned() else { if reconnect_budget == 0 {
break; break;
}; }
if pool_for_reconnect.active_contour_writer_count_total().await reconnect_budget = reconnect_budget.saturating_sub(1);
>= active_cap_effective_total if pool.active_contour_writer_count_total().await
>= floor_plan.active_cap_effective_total
{ {
let swapped = maybe_swap_idle_writer_for_cap( let swapped = maybe_swap_idle_writer_for_cap(
&pool_for_reconnect, pool,
&rng_for_reconnect, rng,
dc, dc,
family, family,
&endpoints_for_dc, &endpoints,
live_writer_ids_by_addr_for_dc.as_ref(), &live_writer_ids_by_addr,
writer_idle_since_for_dc.as_ref(), &writer_idle_since,
bound_clients_by_writer_for_dc.as_ref(), &bound_clients_by_writer,
) )
.await; .await;
if swapped { if swapped {
pool_for_reconnect pool.stats.increment_me_floor_swap_idle_total();
.stats
.increment_me_floor_swap_idle_total();
restored += 1; restored += 1;
continue; continue;
} }
pool_for_reconnect pool.stats.increment_me_floor_cap_block_total();
.stats pool.stats.increment_me_floor_swap_idle_failed_total();
.increment_me_floor_cap_block_total();
pool_for_reconnect
.stats
.increment_me_floor_swap_idle_failed_total();
debug!( debug!(
dc = %dc, dc = %dc,
?family, ?family,
alive, alive,
required, required,
active_cap_effective_total, active_cap_effective_total = floor_plan.active_cap_effective_total,
"Adaptive floor cap reached, reconnect attempt blocked" "Adaptive floor cap reached, reconnect attempt blocked"
); );
break; break;
} }
let res = tokio::time::timeout( let res = tokio::time::timeout(
pool_for_reconnect.reconnect_runtime.me_one_timeout, pool.me_one_timeout,
pool_for_reconnect.connect_endpoints_round_robin( pool.connect_endpoints_round_robin(dc, &endpoints, rng.as_ref()),
dc,
&endpoints_for_dc,
rng_for_reconnect.as_ref(),
),
) )
.await; .await;
match res { match res {
Ok(true) => { Ok(true) => {
restored += 1; restored += 1;
pool_for_reconnect.stats.increment_me_reconnect_success(); pool.stats.increment_me_reconnect_success();
} }
Ok(false) => { Ok(false) => {
pool_for_reconnect.stats.increment_me_reconnect_attempt(); pool.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME round-robin reconnect failed") debug!(dc = %dc, ?family, "ME round-robin reconnect failed")
} }
Err(_) => { Err(_) => {
pool_for_reconnect.stats.increment_me_reconnect_attempt(); pool.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME reconnect timed out"); debug!(dc = %dc, ?family, "ME reconnect timed out");
} }
} }
drop(reconnect_permit);
} }
FamilyReconnectOutcome { let now_alive = alive + restored;
key, if now_alive >= required {
dc,
family,
alive,
required,
endpoint_count: endpoints_for_dc.len(),
restored,
}
});
}
while let Some(joined) = reconnect_set.join_next().await {
let outcome = match joined {
Ok(outcome) => outcome,
Err(join_error) => {
debug!(error = %join_error, "Health reconnect task failed");
continue;
}
};
let now = Instant::now();
let now_alive = outcome.alive + outcome.restored;
if now_alive >= outcome.required {
info!( info!(
dc = %outcome.dc, dc = %dc,
family = ?outcome.family, ?family,
alive = now_alive, alive = now_alive,
required = outcome.required, required,
endpoint_count = outcome.endpoint_count, endpoint_count = endpoints.len(),
"ME writer floor restored for DC" "ME writer floor restored for DC"
); );
backoff.insert( backoff.insert(key, pool.me_reconnect_backoff_base.as_millis() as u64);
outcome.key, let jitter = pool.me_reconnect_backoff_base.as_millis() as u64 / JITTER_FRAC_NUM;
pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64, let wait = pool.me_reconnect_backoff_base
);
let jitter = pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64
/ JITTER_FRAC_NUM;
let wait = pool.reconnect_runtime.me_reconnect_backoff_base
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1))); + Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
next_attempt.insert(outcome.key, now + wait); next_attempt.insert(key, now + wait);
} else { } else {
let curr = *backoff let curr = *backoff
.get(&outcome.key) .get(&key)
.unwrap_or(&(pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64)); .unwrap_or(&(pool.me_reconnect_backoff_base.as_millis() as u64));
let next_ms = (curr.saturating_mul(2)) let next_ms =
.min(pool.reconnect_runtime.me_reconnect_backoff_cap.as_millis() as u64); (curr.saturating_mul(2)).min(pool.me_reconnect_backoff_cap.as_millis() as u64);
backoff.insert(outcome.key, next_ms); backoff.insert(key, next_ms);
let jitter = next_ms / JITTER_FRAC_NUM; let jitter = next_ms / JITTER_FRAC_NUM;
let wait = Duration::from_millis(next_ms) let wait = Duration::from_millis(next_ms)
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1))); + Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
next_attempt.insert(outcome.key, now + wait); next_attempt.insert(key, now + wait);
if pool.is_runtime_ready() { if pool.is_runtime_ready() {
let warn_cooldown = pool.warn_rate_limit_duration(); let warn_cooldown = pool.warn_rate_limit_duration();
if should_emit_rate_limited_warn( if should_emit_rate_limited_warn(floor_warn_next_allowed, key, now, warn_cooldown) {
floor_warn_next_allowed,
outcome.key,
now,
warn_cooldown,
) {
warn!( warn!(
dc = %outcome.dc, dc = %dc,
family = ?outcome.family, ?family,
alive = now_alive, alive = now_alive,
required = outcome.required, required,
endpoint_count = outcome.endpoint_count, endpoint_count = endpoints.len(),
backoff_ms = next_ms, backoff_ms = next_ms,
"DC writer floor is below required level, scheduled reconnect" "DC writer floor is below required level, scheduled reconnect"
); );
} }
} else { } else {
info!( info!(
dc = %outcome.dc, dc = %dc,
family = ?outcome.family, ?family,
alive = now_alive, alive = now_alive,
required = outcome.required, required,
endpoint_count = outcome.endpoint_count, endpoint_count = endpoints.len(),
backoff_ms = next_ms, backoff_ms = next_ms,
"DC writer floor is below required level during startup, scheduled reconnect" "DC writer floor is below required level during startup, scheduled reconnect"
); );
} }
} }
if let Some(v) = inflight.get_mut(&outcome.key) { if let Some(v) = inflight.get_mut(&key) {
*v = v.saturating_sub(1); *v = v.saturating_sub(1);
} }
} }
@ -772,68 +691,6 @@ fn health_reconnect_budget(pool: &Arc<MePool>, dc_groups: usize) -> usize {
.clamp(HEALTH_RECONNECT_BUDGET_MIN, HEALTH_RECONNECT_BUDGET_MAX) .clamp(HEALTH_RECONNECT_BUDGET_MIN, HEALTH_RECONNECT_BUDGET_MAX)
} }
fn update_family_runtime_state(pool: &Arc<MePool>, family: IpFamily, degraded: bool) {
let now_epoch_secs = MePool::now_epoch_secs();
let previous_state = pool.family_runtime_state(family);
let mut state_since_epoch_secs = pool.family_runtime_state_since_epoch_secs(family);
let previous_suppressed_until_epoch_secs = pool.family_suppressed_until_epoch_secs(family);
let previous_fail_streak = pool.family_fail_streak(family);
let previous_recover_success_streak = pool.family_recover_success_streak(family);
let (next_state, suppressed_until_epoch_secs, fail_streak, recover_success_streak) =
if previous_suppressed_until_epoch_secs > now_epoch_secs {
let fail_streak = if degraded {
previous_fail_streak.saturating_add(1)
} else {
previous_fail_streak
};
(
MeFamilyRuntimeState::Suppressed,
previous_suppressed_until_epoch_secs,
fail_streak,
0,
)
} else if degraded {
let fail_streak = previous_fail_streak.saturating_add(1);
if fail_streak >= FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD {
(
MeFamilyRuntimeState::Suppressed,
now_epoch_secs.saturating_add(FAMILY_SUPPRESS_DURATION_SECS),
fail_streak,
0,
)
} else {
(MeFamilyRuntimeState::Degraded, 0, fail_streak, 0)
}
} else if matches!(previous_state, MeFamilyRuntimeState::Healthy) {
(MeFamilyRuntimeState::Healthy, 0, 0, 0)
} else {
let recover_success_streak = previous_recover_success_streak.saturating_add(1);
if recover_success_streak >= FAMILY_RECOVER_SUCCESS_STREAK_TARGET {
(MeFamilyRuntimeState::Healthy, 0, 0, 0)
} else {
(
MeFamilyRuntimeState::Recovering,
0,
0,
recover_success_streak,
)
}
};
if next_state != previous_state || state_since_epoch_secs == 0 {
state_since_epoch_secs = now_epoch_secs;
}
pool.set_family_runtime_state(
family,
next_state,
state_since_epoch_secs,
suppressed_until_epoch_secs,
fail_streak,
recover_success_streak,
);
}
fn should_emit_rate_limited_warn( fn should_emit_rate_limited_warn(
next_allowed: &mut HashMap<(i32, IpFamily), Instant>, next_allowed: &mut HashMap<(i32, IpFamily), Instant>,
key: (i32, IpFamily), key: (i32, IpFamily),
@ -858,7 +715,6 @@ fn adaptive_floor_class_min(
) -> usize { ) -> usize {
if endpoint_count <= 1 { if endpoint_count <= 1 {
let min_single = (pool let min_single = (pool
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint .me_adaptive_floor_min_writers_single_endpoint
.load(std::sync::atomic::Ordering::Relaxed) as usize) .load(std::sync::atomic::Ordering::Relaxed) as usize)
.max(1); .max(1);
@ -1115,7 +971,7 @@ async fn maybe_swap_idle_writer_for_cap(
}; };
let connected = match tokio::time::timeout( let connected = match tokio::time::timeout(
pool.reconnect_runtime.me_one_timeout, pool.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()), pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
) )
.await .await
@ -1221,7 +1077,7 @@ async fn maybe_refresh_idle_writer_for_dc(
}; };
let rotate_ok = match tokio::time::timeout( let rotate_ok = match tokio::time::timeout(
pool.reconnect_runtime.me_one_timeout, pool.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()), pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
) )
.await .await
@ -1332,7 +1188,7 @@ async fn recover_single_endpoint_outage(
required: usize, required: usize,
outage_backoff: &mut HashMap<(i32, IpFamily), u64>, outage_backoff: &mut HashMap<(i32, IpFamily), u64>,
outage_next_attempt: &mut HashMap<(i32, IpFamily), Instant>, outage_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
reconnect_sem: &Arc<Semaphore>, reconnect_budget: &mut usize,
) { ) {
let now = Instant::now(); let now = Instant::now();
if let Some(ts) = outage_next_attempt.get(&key) if let Some(ts) = outage_next_attempt.get(&key)
@ -1342,7 +1198,7 @@ async fn recover_single_endpoint_outage(
} }
let (min_backoff_ms, max_backoff_ms) = pool.single_endpoint_outage_backoff_bounds_ms(); let (min_backoff_ms, max_backoff_ms) = pool.single_endpoint_outage_backoff_bounds_ms();
if reconnect_sem.available_permits() == 0 { if *reconnect_budget == 0 {
outage_next_attempt.insert(key, now + Duration::from_millis(min_backoff_ms.max(250))); outage_next_attempt.insert(key, now + Duration::from_millis(min_backoff_ms.max(250)));
debug!( debug!(
dc = %key.0, dc = %key.0,
@ -1353,17 +1209,7 @@ async fn recover_single_endpoint_outage(
); );
return; return;
} }
let Ok(_reconnect_permit) = reconnect_sem.clone().try_acquire_owned() else { *reconnect_budget = (*reconnect_budget).saturating_sub(1);
outage_next_attempt.insert(key, now + Duration::from_millis(min_backoff_ms.max(250)));
debug!(
dc = %key.0,
family = ?key.1,
%endpoint,
required,
"Single-endpoint outage reconnect deferred by semaphore saturation"
);
return;
};
pool.stats pool.stats
.increment_me_single_endpoint_outage_reconnect_attempt_total(); .increment_me_single_endpoint_outage_reconnect_attempt_total();
@ -1372,7 +1218,7 @@ async fn recover_single_endpoint_outage(
pool.stats pool.stats
.increment_me_single_endpoint_quarantine_bypass_total(); .increment_me_single_endpoint_quarantine_bypass_total();
match tokio::time::timeout( match tokio::time::timeout(
pool.reconnect_runtime.me_one_timeout, pool.me_one_timeout,
pool.connect_one_for_dc(endpoint, key.0, rng.as_ref()), pool.connect_one_for_dc(endpoint, key.0, rng.as_ref()),
) )
.await .await
@ -1401,7 +1247,7 @@ async fn recover_single_endpoint_outage(
} else { } else {
let one_endpoint = [endpoint]; let one_endpoint = [endpoint];
match tokio::time::timeout( match tokio::time::timeout(
pool.reconnect_runtime.me_one_timeout, pool.me_one_timeout,
pool.connect_endpoints_round_robin(key.0, &one_endpoint, rng.as_ref()), pool.connect_endpoints_round_robin(key.0, &one_endpoint, rng.as_ref()),
) )
.await .await
@ -1526,7 +1372,7 @@ async fn maybe_rotate_single_endpoint_shadow(
}; };
let rotate_ok = match tokio::time::timeout( let rotate_ok = match tokio::time::timeout(
pool.reconnect_runtime.me_one_timeout, pool.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()), pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
) )
.await .await
@ -1841,8 +1687,6 @@ mod tests {
general.me_warn_rate_limit_ms, general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(), MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms, general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts, general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms, general.me_route_inline_recovery_wait_ms,
) )

File diff suppressed because it is too large Load Diff

View File

@ -72,7 +72,7 @@ impl MePool {
} }
if changed { if changed {
self.rebuild_endpoint_dc_map().await; self.rebuild_endpoint_dc_map().await;
self.notify_writer_epoch(); self.writer_available.notify_waiters();
} }
if changed { if changed {
SnapshotApplyOutcome::AppliedChanged SnapshotApplyOutcome::AppliedChanged
@ -112,7 +112,7 @@ impl MePool {
pub async fn reconnect_all(self: &Arc<Self>) { pub async fn reconnect_all(self: &Arc<Self>) {
let ws = self.writers.read().await.clone(); let ws = self.writers.read().await.clone();
for w in ws.iter() { for w in ws {
if let Ok(()) = self if let Ok(()) = self
.connect_one_for_dc(w.addr, w.writer_dc, self.rng.as_ref()) .connect_one_for_dc(w.addr, w.writer_dc, self.rng.as_ref())
.await .await

View File

@ -14,10 +14,7 @@ use super::pool::MePool;
impl MePool { impl MePool {
pub async fn init(self: &Arc<Self>, pool_size: usize, rng: &Arc<SecureRandom>) -> Result<()> { pub async fn init(self: &Arc<Self>, pool_size: usize, rng: &Arc<SecureRandom>) -> Result<()> {
let family_order = self.family_order(); let family_order = self.family_order();
let connect_concurrency = self let connect_concurrency = self.me_reconnect_max_concurrent_per_dc.max(1) as usize;
.reconnect_runtime
.me_reconnect_max_concurrent_per_dc
.max(1) as usize;
let ks = self.key_selector().await; let ks = self.key_selector().await;
info!( info!(
me_servers = self.proxy_map_v4.read().await.len(), me_servers = self.proxy_map_v4.read().await.len(),
@ -253,12 +250,10 @@ impl MePool {
return false; return false;
} }
if self.reconnect_runtime.me_warmup_stagger_enabled { if self.me_warmup_stagger_enabled {
let jitter = rand::rng().random_range( let jitter =
0..=self.reconnect_runtime.me_warmup_step_jitter.as_millis() as u64, rand::rng().random_range(0..=self.me_warmup_step_jitter.as_millis() as u64);
); let delay_ms = self.me_warmup_step_delay.as_millis() as u64 + jitter;
let delay_ms =
self.reconnect_runtime.me_warmup_step_delay.as_millis() as u64 + jitter;
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await; tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
} }
} }

View File

@ -42,10 +42,10 @@ pub async fn detect_public_ip() -> Option<IpAddr> {
impl MePool { impl MePool {
fn configured_stun_servers(&self) -> Vec<String> { fn configured_stun_servers(&self) -> Vec<String> {
if !self.nat_runtime.nat_stun_servers.is_empty() { if !self.nat_stun_servers.is_empty() {
return self.nat_runtime.nat_stun_servers.clone(); return self.nat_stun_servers.clone();
} }
if let Some(s) = &self.nat_runtime.nat_stun if let Some(s) = &self.nat_stun
&& !s.trim().is_empty() && !s.trim().is_empty()
{ {
return vec![s.clone()]; return vec![s.clone()];
@ -64,7 +64,7 @@ impl MePool {
let mut next_idx = 0usize; let mut next_idx = 0usize;
let mut live_servers = Vec::new(); let mut live_servers = Vec::new();
let mut best_by_ip: HashMap<IpAddr, (usize, std::net::SocketAddr)> = HashMap::new(); let mut best_by_ip: HashMap<IpAddr, (usize, std::net::SocketAddr)> = HashMap::new();
let concurrency = self.nat_runtime.nat_probe_concurrency.max(1); let concurrency = self.nat_probe_concurrency.max(1);
while next_idx < servers.len() || !join_set.is_empty() { while next_idx < servers.len() || !join_set.is_empty() {
while next_idx < servers.len() && join_set.len() < concurrency { while next_idx < servers.len() && join_set.len() < concurrency {
@ -137,13 +137,9 @@ impl MePool {
} }
pub(super) fn translate_ip_for_nat(&self, ip: IpAddr) -> IpAddr { pub(super) fn translate_ip_for_nat(&self, ip: IpAddr) -> IpAddr {
let nat_ip = self.nat_runtime.nat_ip_cfg.or_else(|| { let nat_ip = self
self.nat_runtime .nat_ip_cfg
.nat_ip_detected .or_else(|| self.nat_ip_detected.try_read().ok().and_then(|g| *g));
.try_read()
.ok()
.and_then(|g| *g)
});
let Some(nat_ip) = nat_ip else { let Some(nat_ip) = nat_ip else {
return ip; return ip;
@ -167,7 +163,7 @@ impl MePool {
addr: std::net::SocketAddr, addr: std::net::SocketAddr,
reflected: Option<std::net::SocketAddr>, reflected: Option<std::net::SocketAddr>,
) -> std::net::SocketAddr { ) -> std::net::SocketAddr {
let ip = if let Some(nat_ip) = self.nat_runtime.nat_ip_cfg { let ip = if let Some(nat_ip) = self.nat_ip_cfg {
match (addr.ip(), nat_ip) { match (addr.ip(), nat_ip) {
(IpAddr::V4(_), IpAddr::V4(dst)) => IpAddr::V4(dst), (IpAddr::V4(_), IpAddr::V4(dst)) => IpAddr::V4(dst),
(IpAddr::V6(_), IpAddr::V6(dst)) => IpAddr::V6(dst), (IpAddr::V6(_), IpAddr::V6(dst)) => IpAddr::V6(dst),
@ -189,22 +185,22 @@ impl MePool {
} }
pub(super) async fn maybe_detect_nat_ip(&self, local_ip: IpAddr) -> Option<IpAddr> { pub(super) async fn maybe_detect_nat_ip(&self, local_ip: IpAddr) -> Option<IpAddr> {
if self.nat_runtime.nat_ip_cfg.is_some() { if self.nat_ip_cfg.is_some() {
return self.nat_runtime.nat_ip_cfg; return self.nat_ip_cfg;
} }
if !(is_bogon(local_ip) || local_ip.is_loopback() || local_ip.is_unspecified()) { if !(is_bogon(local_ip) || local_ip.is_loopback() || local_ip.is_unspecified()) {
return None; return None;
} }
if let Some(ip) = *self.nat_runtime.nat_ip_detected.read().await { if let Some(ip) = *self.nat_ip_detected.read().await {
return Some(ip); return Some(ip);
} }
match fetch_public_ipv4_with_retry().await { match fetch_public_ipv4_with_retry().await {
Ok(Some(ip)) => { Ok(Some(ip)) => {
{ {
let mut guard = self.nat_runtime.nat_ip_detected.write().await; let mut guard = self.nat_ip_detected.write().await;
*guard = Some(IpAddr::V4(ip)); *guard = Some(IpAddr::V4(ip));
} }
info!(public_ip = %ip, "Auto-detected public IP for NAT translation"); info!(public_ip = %ip, "Auto-detected public IP for NAT translation");
@ -235,10 +231,10 @@ impl MePool {
} }
// Backoff window // Backoff window
if use_shared_cache if use_shared_cache
&& let Some(until) = *self.nat_runtime.stun_backoff_until.read().await && let Some(until) = *self.stun_backoff_until.read().await
&& Instant::now() < until && Instant::now() < until
{ {
if let Ok(cache) = self.nat_runtime.nat_reflection_cache.try_lock() { if let Ok(cache) = self.nat_reflection_cache.try_lock() {
let slot = match family { let slot = match family {
IpFamily::V4 => cache.v4, IpFamily::V4 => cache.v4,
IpFamily::V6 => cache.v6, IpFamily::V6 => cache.v6,
@ -248,8 +244,7 @@ impl MePool {
return None; return None;
} }
if use_shared_cache && let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock() if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
{
let slot = match family { let slot = match family {
IpFamily::V4 => &mut cache.v4, IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6, IpFamily::V6 => &mut cache.v6,
@ -263,18 +258,18 @@ impl MePool {
let _singleflight_guard = if use_shared_cache { let _singleflight_guard = if use_shared_cache {
Some(match family { Some(match family {
IpFamily::V4 => self.nat_runtime.nat_reflection_singleflight_v4.lock().await, IpFamily::V4 => self.nat_reflection_singleflight_v4.lock().await,
IpFamily::V6 => self.nat_runtime.nat_reflection_singleflight_v6.lock().await, IpFamily::V6 => self.nat_reflection_singleflight_v6.lock().await,
}) })
} else { } else {
None None
}; };
if use_shared_cache if use_shared_cache
&& let Some(until) = *self.nat_runtime.stun_backoff_until.read().await && let Some(until) = *self.stun_backoff_until.read().await
&& Instant::now() < until && Instant::now() < until
{ {
if let Ok(cache) = self.nat_runtime.nat_reflection_cache.try_lock() { if let Ok(cache) = self.nat_reflection_cache.try_lock() {
let slot = match family { let slot = match family {
IpFamily::V4 => cache.v4, IpFamily::V4 => cache.v4,
IpFamily::V6 => cache.v6, IpFamily::V6 => cache.v6,
@ -284,8 +279,7 @@ impl MePool {
return None; return None;
} }
if use_shared_cache && let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock() if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
{
let slot = match family { let slot = match family {
IpFamily::V4 => &mut cache.v4, IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6, IpFamily::V6 => &mut cache.v6,
@ -298,14 +292,13 @@ impl MePool {
} }
let attempt = if use_shared_cache { let attempt = if use_shared_cache {
self.nat_runtime self.nat_probe_attempts
.nat_probe_attempts
.fetch_add(1, std::sync::atomic::Ordering::Relaxed) .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
} else { } else {
0 0
}; };
let configured_servers = self.configured_stun_servers(); let configured_servers = self.configured_stun_servers();
let live_snapshot = self.nat_runtime.nat_stun_live_servers.read().await.clone(); let live_snapshot = self.nat_stun_live_servers.read().await.clone();
let primary_servers = if live_snapshot.is_empty() { let primary_servers = if live_snapshot.is_empty() {
configured_servers.clone() configured_servers.clone()
} else { } else {
@ -329,15 +322,14 @@ impl MePool {
let live_server_count = live_servers.len(); let live_server_count = live_servers.len();
if !live_servers.is_empty() { if !live_servers.is_empty() {
*self.nat_runtime.nat_stun_live_servers.write().await = live_servers; *self.nat_stun_live_servers.write().await = live_servers;
} else { } else {
self.nat_runtime.nat_stun_live_servers.write().await.clear(); self.nat_stun_live_servers.write().await.clear();
} }
if let Some(reflected_addr) = selected_reflected { if let Some(reflected_addr) = selected_reflected {
if use_shared_cache { if use_shared_cache {
self.nat_runtime self.nat_probe_attempts
.nat_probe_attempts
.store(0, std::sync::atomic::Ordering::Relaxed); .store(0, std::sync::atomic::Ordering::Relaxed);
} }
info!( info!(
@ -346,9 +338,7 @@ impl MePool {
"STUN-Quorum reached, IP: {}", "STUN-Quorum reached, IP: {}",
reflected_addr.ip() reflected_addr.ip()
); );
if use_shared_cache if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
&& let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock()
{
let slot = match family { let slot = match family {
IpFamily::V4 => &mut cache.v4, IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6, IpFamily::V6 => &mut cache.v6,
@ -360,7 +350,7 @@ impl MePool {
if use_shared_cache { if use_shared_cache {
let backoff = Duration::from_secs(60 * 2u64.pow((attempt as u32).min(6))); let backoff = Duration::from_secs(60 * 2u64.pow((attempt as u32).min(6)));
*self.nat_runtime.stun_backoff_until.write().await = Some(Instant::now() + backoff); *self.stun_backoff_until.write().await = Some(Instant::now() + backoff);
} }
None None
} }

View File

@ -13,27 +13,12 @@ use super::pool::{MePool, RefillDcKey, RefillEndpointKey, WriterContour};
const ME_FLAP_UPTIME_THRESHOLD_SECS: u64 = 20; const ME_FLAP_UPTIME_THRESHOLD_SECS: u64 = 20;
const ME_FLAP_QUARANTINE_SECS: u64 = 25; const ME_FLAP_QUARANTINE_SECS: u64 = 25;
const ME_REFILL_TOTAL_ATTEMPT_CAP: u32 = 20;
impl MePool { impl MePool {
pub(super) async fn sweep_endpoint_quarantine(&self) {
let configured = self
.endpoint_dc_map
.read()
.await
.keys()
.copied()
.collect::<HashSet<SocketAddr>>();
let now = Instant::now();
let mut guard = self.endpoint_quarantine.lock().await;
guard.retain(|addr, expiry| *expiry > now && configured.contains(addr));
}
pub(super) async fn maybe_quarantine_flapping_endpoint( pub(super) async fn maybe_quarantine_flapping_endpoint(
&self, &self,
addr: SocketAddr, addr: SocketAddr,
uptime: Duration, uptime: Duration,
reason: &'static str,
) { ) {
if uptime > Duration::from_secs(ME_FLAP_UPTIME_THRESHOLD_SECS) { if uptime > Duration::from_secs(ME_FLAP_UPTIME_THRESHOLD_SECS) {
return; return;
@ -46,7 +31,6 @@ impl MePool {
self.stats.increment_me_endpoint_quarantine_total(); self.stats.increment_me_endpoint_quarantine_total();
warn!( warn!(
%addr, %addr,
reason,
uptime_ms = uptime.as_millis(), uptime_ms = uptime.as_millis(),
quarantine_secs = ME_FLAP_QUARANTINE_SECS, quarantine_secs = ME_FLAP_QUARANTINE_SECS,
"ME endpoint temporarily quarantined due to rapid writer flap" "ME endpoint temporarily quarantined due to rapid writer flap"
@ -221,16 +205,11 @@ impl MePool {
} }
async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr, writer_dc: i32) -> bool { async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr, writer_dc: i32) -> bool {
let fast_retries = self.reconnect_runtime.me_reconnect_fast_retry_count.max(1); let fast_retries = self.me_reconnect_fast_retry_count.max(1);
let mut total_attempts = 0u32;
let same_endpoint_quarantined = self.is_endpoint_quarantined(addr).await; let same_endpoint_quarantined = self.is_endpoint_quarantined(addr).await;
if !same_endpoint_quarantined { if !same_endpoint_quarantined {
for attempt in 0..fast_retries { for attempt in 0..fast_retries {
if total_attempts >= ME_REFILL_TOTAL_ATTEMPT_CAP {
break;
}
total_attempts = total_attempts.saturating_add(1);
self.stats.increment_me_reconnect_attempt(); self.stats.increment_me_reconnect_attempt();
match self match self
.connect_one_for_dc(addr, writer_dc, self.rng.as_ref()) .connect_one_for_dc(addr, writer_dc, self.rng.as_ref())
@ -271,10 +250,6 @@ impl MePool {
} }
for attempt in 0..fast_retries { for attempt in 0..fast_retries {
if total_attempts >= ME_REFILL_TOTAL_ATTEMPT_CAP {
break;
}
total_attempts = total_attempts.saturating_add(1);
self.stats.increment_me_reconnect_attempt(); self.stats.increment_me_reconnect_attempt();
if self if self
.connect_endpoints_round_robin(writer_dc, &dc_endpoints, self.rng.as_ref()) .connect_endpoints_round_robin(writer_dc, &dc_endpoints, self.rng.as_ref())

View File

@ -37,23 +37,16 @@ impl MePool {
} }
fn clear_pending_hardswap_state(&self) { fn clear_pending_hardswap_state(&self) {
self.reinit self.pending_hardswap_generation.store(0, Ordering::Relaxed);
.pending_hardswap_generation self.pending_hardswap_started_at_epoch_secs
.store(0, Ordering::Relaxed); .store(0, Ordering::Relaxed);
self.reinit self.pending_hardswap_map_hash.store(0, Ordering::Relaxed);
.pending_hardswap_started_at_epoch_secs self.warm_generation.store(0, Ordering::Relaxed);
.store(0, Ordering::Relaxed);
self.reinit
.pending_hardswap_map_hash
.store(0, Ordering::Relaxed);
self.reinit.warm_generation.store(0, Ordering::Relaxed);
} }
async fn promote_warm_generation_to_active(&self, generation: u64) { async fn promote_warm_generation_to_active(&self, generation: u64) {
self.reinit self.active_generation.store(generation, Ordering::Relaxed);
.active_generation self.warm_generation.store(0, Ordering::Relaxed);
.store(generation, Ordering::Relaxed);
self.reinit.warm_generation.store(0, Ordering::Relaxed);
let ws = self.writers.read().await; let ws = self.writers.read().await;
for writer in ws.iter() { for writer in ws.iter() {
@ -191,14 +184,8 @@ impl MePool {
} }
fn hardswap_warmup_connect_delay_ms(&self) -> u64 { fn hardswap_warmup_connect_delay_ms(&self) -> u64 {
let min_ms = self let min_ms = self.me_hardswap_warmup_delay_min_ms.load(Ordering::Relaxed);
.reinit let max_ms = self.me_hardswap_warmup_delay_max_ms.load(Ordering::Relaxed);
.me_hardswap_warmup_delay_min_ms
.load(Ordering::Relaxed);
let max_ms = self
.reinit
.me_hardswap_warmup_delay_max_ms
.load(Ordering::Relaxed);
let (min_ms, max_ms) = if min_ms <= max_ms { let (min_ms, max_ms) = if min_ms <= max_ms {
(min_ms, max_ms) (min_ms, max_ms)
} else { } else {
@ -212,11 +199,9 @@ impl MePool {
fn hardswap_warmup_backoff_ms(&self, pass_idx: usize) -> u64 { fn hardswap_warmup_backoff_ms(&self, pass_idx: usize) -> u64 {
let base_ms = self let base_ms = self
.reinit
.me_hardswap_warmup_pass_backoff_base_ms .me_hardswap_warmup_pass_backoff_base_ms
.load(Ordering::Relaxed); .load(Ordering::Relaxed);
let cap_ms = let cap_ms = (self.me_reconnect_backoff_cap.as_millis() as u64).max(base_ms);
(self.reconnect_runtime.me_reconnect_backoff_cap.as_millis() as u64).max(base_ms);
let shift = (pass_idx as u32).min(20); let shift = (pass_idx as u32).min(20);
let scaled = base_ms.saturating_mul(1u64 << shift); let scaled = base_ms.saturating_mul(1u64 << shift);
let core = scaled.min(cap_ms); let core = scaled.min(cap_ms);
@ -259,7 +244,6 @@ impl MePool {
desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>, desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>,
) { ) {
let extra_passes = self let extra_passes = self
.reinit
.me_hardswap_warmup_extra_passes .me_hardswap_warmup_extra_passes
.load(Ordering::Relaxed) .load(Ordering::Relaxed)
.min(10) as usize; .min(10) as usize;
@ -385,20 +369,13 @@ impl MePool {
let desired_map_hash = Self::desired_map_hash(&desired_by_dc); let desired_map_hash = Self::desired_map_hash(&desired_by_dc);
let previous_generation = self.current_generation(); let previous_generation = self.current_generation();
let hardswap = self.reinit.hardswap.load(Ordering::Relaxed); let hardswap = self.hardswap.load(Ordering::Relaxed);
let generation = if hardswap { let generation = if hardswap {
let pending_generation = self let pending_generation = self.pending_hardswap_generation.load(Ordering::Relaxed);
.reinit
.pending_hardswap_generation
.load(Ordering::Relaxed);
let pending_started_at = self let pending_started_at = self
.reinit
.pending_hardswap_started_at_epoch_secs .pending_hardswap_started_at_epoch_secs
.load(Ordering::Relaxed); .load(Ordering::Relaxed);
let pending_map_hash = self let pending_map_hash = self.pending_hardswap_map_hash.load(Ordering::Relaxed);
.reinit
.pending_hardswap_map_hash
.load(Ordering::Relaxed);
let pending_age_secs = now_epoch_secs.saturating_sub(pending_started_at); let pending_age_secs = now_epoch_secs.saturating_sub(pending_started_at);
let pending_ttl_expired = let pending_ttl_expired =
pending_started_at > 0 && pending_age_secs > ME_HARDSWAP_PENDING_TTL_SECS; pending_started_at > 0 && pending_age_secs > ME_HARDSWAP_PENDING_TTL_SECS;
@ -428,30 +405,24 @@ impl MePool {
"ME hardswap pending generation expired by TTL; starting fresh generation" "ME hardswap pending generation expired by TTL; starting fresh generation"
); );
} }
let next_generation = self.reinit.generation.fetch_add(1, Ordering::Relaxed) + 1; let next_generation = self.generation.fetch_add(1, Ordering::Relaxed) + 1;
self.reinit self.pending_hardswap_generation
.pending_hardswap_generation
.store(next_generation, Ordering::Relaxed); .store(next_generation, Ordering::Relaxed);
self.reinit self.pending_hardswap_started_at_epoch_secs
.pending_hardswap_started_at_epoch_secs
.store(now_epoch_secs, Ordering::Relaxed); .store(now_epoch_secs, Ordering::Relaxed);
self.reinit self.pending_hardswap_map_hash
.pending_hardswap_map_hash
.store(desired_map_hash, Ordering::Relaxed); .store(desired_map_hash, Ordering::Relaxed);
self.reinit self.warm_generation
.warm_generation
.store(next_generation, Ordering::Relaxed); .store(next_generation, Ordering::Relaxed);
next_generation next_generation
} }
} else { } else {
self.clear_pending_hardswap_state(); self.clear_pending_hardswap_state();
self.reinit.generation.fetch_add(1, Ordering::Relaxed) + 1 self.generation.fetch_add(1, Ordering::Relaxed) + 1
}; };
if hardswap { if hardswap {
self.reinit self.warm_generation.store(generation, Ordering::Relaxed);
.warm_generation
.store(generation, Ordering::Relaxed);
self.warmup_generation_for_all_dcs(rng, generation, &desired_by_dc) self.warmup_generation_for_all_dcs(rng, generation, &desired_by_dc)
.await; .await;
} else { } else {
@ -465,8 +436,7 @@ impl MePool {
.map(|w| (w.writer_dc, w.addr)) .map(|w| (w.writer_dc, w.addr))
.collect(); .collect();
let min_ratio = Self::permille_to_ratio( let min_ratio = Self::permille_to_ratio(
self.drain_runtime self.me_pool_min_fresh_ratio_permille
.me_pool_min_fresh_ratio_permille
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
); );
let (coverage_ratio, missing_dc) = let (coverage_ratio, missing_dc) =

View File

@ -94,9 +94,9 @@ impl MePool {
pub(crate) async fn api_nat_stun_snapshot(&self) -> MeApiNatStunSnapshot { pub(crate) async fn api_nat_stun_snapshot(&self) -> MeApiNatStunSnapshot {
let now = Instant::now(); let now = Instant::now();
let mut configured_servers = if !self.nat_runtime.nat_stun_servers.is_empty() { let mut configured_servers = if !self.nat_stun_servers.is_empty() {
self.nat_runtime.nat_stun_servers.clone() self.nat_stun_servers.clone()
} else if let Some(stun) = &self.nat_runtime.nat_stun { } else if let Some(stun) = &self.nat_stun {
if stun.trim().is_empty() { if stun.trim().is_empty() {
Vec::new() Vec::new()
} else { } else {
@ -108,11 +108,11 @@ impl MePool {
configured_servers.sort(); configured_servers.sort();
configured_servers.dedup(); configured_servers.dedup();
let mut live_servers = self.nat_runtime.nat_stun_live_servers.read().await.clone(); let mut live_servers = self.nat_stun_live_servers.read().await.clone();
live_servers.sort(); live_servers.sort();
live_servers.dedup(); live_servers.dedup();
let reflection = self.nat_runtime.nat_reflection_cache.lock().await; let reflection = self.nat_reflection_cache.lock().await;
let reflection_v4 = reflection.v4.map(|(ts, addr)| MeApiNatReflectionSnapshot { let reflection_v4 = reflection.v4.map(|(ts, addr)| MeApiNatReflectionSnapshot {
addr, addr,
age_secs: now.saturating_duration_since(ts).as_secs(), age_secs: now.saturating_duration_since(ts).as_secs(),
@ -123,19 +123,17 @@ impl MePool {
}); });
drop(reflection); drop(reflection);
let backoff_until = *self.nat_runtime.stun_backoff_until.read().await; let backoff_until = *self.stun_backoff_until.read().await;
let stun_backoff_remaining_ms = backoff_until.and_then(|until| { let stun_backoff_remaining_ms = backoff_until.and_then(|until| {
(until > now).then_some(until.duration_since(now).as_millis() as u64) (until > now).then_some(until.duration_since(now).as_millis() as u64)
}); });
MeApiNatStunSnapshot { MeApiNatStunSnapshot {
nat_probe_enabled: self.nat_runtime.nat_probe, nat_probe_enabled: self.nat_probe,
nat_probe_disabled_runtime: self nat_probe_disabled_runtime: self
.nat_runtime
.nat_probe_disabled .nat_probe_disabled
.load(std::sync::atomic::Ordering::Relaxed), .load(std::sync::atomic::Ordering::Relaxed),
nat_probe_attempts: self nat_probe_attempts: self
.nat_runtime
.nat_probe_attempts .nat_probe_attempts
.load(std::sync::atomic::Ordering::Relaxed), .load(std::sync::atomic::Ordering::Relaxed),
configured_servers, configured_servers,

View File

@ -160,7 +160,7 @@ impl MePool {
let writers = self.writers.read().await.clone(); let writers = self.writers.read().await.clone();
let mut live_writers_by_dc = HashMap::<i16, usize>::new(); let mut live_writers_by_dc = HashMap::<i16, usize>::new();
for writer in writers.iter() { for writer in writers {
if writer.draining.load(Ordering::Relaxed) { if writer.draining.load(Ordering::Relaxed) {
continue; continue;
} }
@ -197,7 +197,7 @@ impl MePool {
let writers = self.writers.read().await.clone(); let writers = self.writers.read().await.clone();
let mut live_writers_by_dc = HashMap::<i16, usize>::new(); let mut live_writers_by_dc = HashMap::<i16, usize>::new();
for writer in writers.iter() { for writer in writers {
if writer.draining.load(Ordering::Relaxed) { if writer.draining.load(Ordering::Relaxed) {
continue; continue;
} }
@ -224,10 +224,7 @@ impl MePool {
pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot { pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot {
let now_epoch_secs = Self::now_epoch_secs(); let now_epoch_secs = Self::now_epoch_secs();
let active_generation = self.current_generation(); let active_generation = self.current_generation();
let drain_ttl_secs = self let drain_ttl_secs = self.me_pool_drain_ttl_secs.load(Ordering::Relaxed);
.drain_runtime
.me_pool_drain_ttl_secs
.load(Ordering::Relaxed);
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new(); let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
if self.decision.ipv4_me { if self.decision.ipv4_me {
@ -258,7 +255,7 @@ impl MePool {
let mut dc_rtt_agg = HashMap::<i16, (f64, u64)>::new(); let mut dc_rtt_agg = HashMap::<i16, (f64, u64)>::new();
let mut writer_rows = Vec::<MeApiWriterStatusSnapshot>::with_capacity(writers.len()); let mut writer_rows = Vec::<MeApiWriterStatusSnapshot>::with_capacity(writers.len());
for writer in writers.iter() { for writer in writers {
let endpoint = writer.addr; let endpoint = writer.addr;
let dc = i16::try_from(writer.writer_dc).ok(); let dc = i16::try_from(writer.writer_dc).ok();
let draining = writer.draining.load(Ordering::Relaxed); let draining = writer.draining.load(Ordering::Relaxed);
@ -339,7 +336,6 @@ impl MePool {
let mut fresh_alive_writers = 0usize; let mut fresh_alive_writers = 0usize;
let floor_mode = self.floor_mode(); let floor_mode = self.floor_mode();
let adaptive_cpu_cores = (self let adaptive_cpu_cores = (self
.floor_runtime
.me_adaptive_floor_cpu_cores_effective .me_adaptive_floor_cpu_cores_effective
.load(Ordering::Relaxed) as usize) .load(Ordering::Relaxed) as usize)
.max(1); .max(1);
@ -354,26 +350,22 @@ impl MePool {
self.required_writers_for_dc_with_floor_mode(endpoint_count, false); self.required_writers_for_dc_with_floor_mode(endpoint_count, false);
let floor_min = if endpoint_count <= 1 { let floor_min = if endpoint_count <= 1 {
(self (self
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint .me_adaptive_floor_min_writers_single_endpoint
.load(Ordering::Relaxed) as usize) .load(Ordering::Relaxed) as usize)
.max(1) .max(1)
.min(base_required.max(1)) .min(base_required.max(1))
} else { } else {
(self (self
.floor_runtime
.me_adaptive_floor_min_writers_multi_endpoint .me_adaptive_floor_min_writers_multi_endpoint
.load(Ordering::Relaxed) as usize) .load(Ordering::Relaxed) as usize)
.max(1) .max(1)
.min(base_required.max(1)) .min(base_required.max(1))
}; };
let extra_per_core = if endpoint_count <= 1 { let extra_per_core = if endpoint_count <= 1 {
self.floor_runtime self.me_adaptive_floor_max_extra_writers_single_per_core
.me_adaptive_floor_max_extra_writers_single_per_core
.load(Ordering::Relaxed) as usize .load(Ordering::Relaxed) as usize
} else { } else {
self.floor_runtime self.me_adaptive_floor_max_extra_writers_multi_per_core
.me_adaptive_floor_max_extra_writers_multi_per_core
.load(Ordering::Relaxed) as usize .load(Ordering::Relaxed) as usize
}; };
let floor_max = let floor_max =
@ -444,7 +436,6 @@ impl MePool {
let now = Instant::now(); let now = Instant::now();
let now_epoch_secs = Self::now_epoch_secs(); let now_epoch_secs = Self::now_epoch_secs();
let pending_started_at = self let pending_started_at = self
.reinit
.pending_hardswap_started_at_epoch_secs .pending_hardswap_started_at_epoch_secs
.load(Ordering::Relaxed); .load(Ordering::Relaxed);
let pending_hardswap_age_secs = let pending_hardswap_age_secs =
@ -486,175 +477,119 @@ impl MePool {
} }
MeApiRuntimeSnapshot { MeApiRuntimeSnapshot {
active_generation: self.reinit.active_generation.load(Ordering::Relaxed), active_generation: self.active_generation.load(Ordering::Relaxed),
warm_generation: self.reinit.warm_generation.load(Ordering::Relaxed), warm_generation: self.warm_generation.load(Ordering::Relaxed),
pending_hardswap_generation: self pending_hardswap_generation: self.pending_hardswap_generation.load(Ordering::Relaxed),
.reinit
.pending_hardswap_generation
.load(Ordering::Relaxed),
pending_hardswap_age_secs, pending_hardswap_age_secs,
hardswap_enabled: self.reinit.hardswap.load(Ordering::Relaxed), hardswap_enabled: self.hardswap.load(Ordering::Relaxed),
floor_mode: floor_mode_label(self.floor_mode()), floor_mode: floor_mode_label(self.floor_mode()),
adaptive_floor_idle_secs: self adaptive_floor_idle_secs: self.me_adaptive_floor_idle_secs.load(Ordering::Relaxed),
.floor_runtime
.me_adaptive_floor_idle_secs
.load(Ordering::Relaxed),
adaptive_floor_min_writers_single_endpoint: self adaptive_floor_min_writers_single_endpoint: self
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint .me_adaptive_floor_min_writers_single_endpoint
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_min_writers_multi_endpoint: self adaptive_floor_min_writers_multi_endpoint: self
.floor_runtime
.me_adaptive_floor_min_writers_multi_endpoint .me_adaptive_floor_min_writers_multi_endpoint
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_recover_grace_secs: self adaptive_floor_recover_grace_secs: self
.floor_runtime
.me_adaptive_floor_recover_grace_secs .me_adaptive_floor_recover_grace_secs
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_writers_per_core_total: self adaptive_floor_writers_per_core_total: self
.floor_runtime
.me_adaptive_floor_writers_per_core_total .me_adaptive_floor_writers_per_core_total
.load(Ordering::Relaxed) as u16, .load(Ordering::Relaxed) as u16,
adaptive_floor_cpu_cores_override: self adaptive_floor_cpu_cores_override: self
.floor_runtime
.me_adaptive_floor_cpu_cores_override .me_adaptive_floor_cpu_cores_override
.load(Ordering::Relaxed) as u16, .load(Ordering::Relaxed) as u16,
adaptive_floor_max_extra_writers_single_per_core: self adaptive_floor_max_extra_writers_single_per_core: self
.floor_runtime
.me_adaptive_floor_max_extra_writers_single_per_core .me_adaptive_floor_max_extra_writers_single_per_core
.load(Ordering::Relaxed) .load(Ordering::Relaxed)
as u16, as u16,
adaptive_floor_max_extra_writers_multi_per_core: self adaptive_floor_max_extra_writers_multi_per_core: self
.floor_runtime
.me_adaptive_floor_max_extra_writers_multi_per_core .me_adaptive_floor_max_extra_writers_multi_per_core
.load(Ordering::Relaxed) .load(Ordering::Relaxed)
as u16, as u16,
adaptive_floor_max_active_writers_per_core: self adaptive_floor_max_active_writers_per_core: self
.floor_runtime
.me_adaptive_floor_max_active_writers_per_core .me_adaptive_floor_max_active_writers_per_core
.load(Ordering::Relaxed) .load(Ordering::Relaxed)
as u16, as u16,
adaptive_floor_max_warm_writers_per_core: self adaptive_floor_max_warm_writers_per_core: self
.floor_runtime
.me_adaptive_floor_max_warm_writers_per_core .me_adaptive_floor_max_warm_writers_per_core
.load(Ordering::Relaxed) .load(Ordering::Relaxed)
as u16, as u16,
adaptive_floor_max_active_writers_global: self adaptive_floor_max_active_writers_global: self
.floor_runtime
.me_adaptive_floor_max_active_writers_global .me_adaptive_floor_max_active_writers_global
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_max_warm_writers_global: self adaptive_floor_max_warm_writers_global: self
.floor_runtime
.me_adaptive_floor_max_warm_writers_global .me_adaptive_floor_max_warm_writers_global
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_cpu_cores_detected: self adaptive_floor_cpu_cores_detected: self
.floor_runtime
.me_adaptive_floor_cpu_cores_detected .me_adaptive_floor_cpu_cores_detected
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_cpu_cores_effective: self adaptive_floor_cpu_cores_effective: self
.floor_runtime
.me_adaptive_floor_cpu_cores_effective .me_adaptive_floor_cpu_cores_effective
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_global_cap_raw: self adaptive_floor_global_cap_raw: self
.floor_runtime
.me_adaptive_floor_global_cap_raw .me_adaptive_floor_global_cap_raw
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_global_cap_effective: self adaptive_floor_global_cap_effective: self
.floor_runtime
.me_adaptive_floor_global_cap_effective .me_adaptive_floor_global_cap_effective
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_target_writers_total: self adaptive_floor_target_writers_total: self
.floor_runtime
.me_adaptive_floor_target_writers_total .me_adaptive_floor_target_writers_total
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_active_cap_configured: self adaptive_floor_active_cap_configured: self
.floor_runtime
.me_adaptive_floor_active_cap_configured .me_adaptive_floor_active_cap_configured
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_active_cap_effective: self adaptive_floor_active_cap_effective: self
.floor_runtime
.me_adaptive_floor_active_cap_effective .me_adaptive_floor_active_cap_effective
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_warm_cap_configured: self adaptive_floor_warm_cap_configured: self
.floor_runtime
.me_adaptive_floor_warm_cap_configured .me_adaptive_floor_warm_cap_configured
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_warm_cap_effective: self adaptive_floor_warm_cap_effective: self
.floor_runtime
.me_adaptive_floor_warm_cap_effective .me_adaptive_floor_warm_cap_effective
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_active_writers_current: self adaptive_floor_active_writers_current: self
.floor_runtime
.me_adaptive_floor_active_writers_current .me_adaptive_floor_active_writers_current
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
adaptive_floor_warm_writers_current: self adaptive_floor_warm_writers_current: self
.floor_runtime
.me_adaptive_floor_warm_writers_current .me_adaptive_floor_warm_writers_current
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
me_keepalive_enabled: self.writer_lifecycle.me_keepalive_enabled, me_keepalive_enabled: self.me_keepalive_enabled,
me_keepalive_interval_secs: self.writer_lifecycle.me_keepalive_interval.as_secs(), me_keepalive_interval_secs: self.me_keepalive_interval.as_secs(),
me_keepalive_jitter_secs: self.writer_lifecycle.me_keepalive_jitter.as_secs(), me_keepalive_jitter_secs: self.me_keepalive_jitter.as_secs(),
me_keepalive_payload_random: self.writer_lifecycle.me_keepalive_payload_random, me_keepalive_payload_random: self.me_keepalive_payload_random,
rpc_proxy_req_every_secs: self rpc_proxy_req_every_secs: self.rpc_proxy_req_every_secs.load(Ordering::Relaxed),
.writer_lifecycle me_reconnect_max_concurrent_per_dc: self.me_reconnect_max_concurrent_per_dc,
.rpc_proxy_req_every_secs me_reconnect_backoff_base_ms: self.me_reconnect_backoff_base.as_millis() as u64,
.load(Ordering::Relaxed), me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64,
me_reconnect_max_concurrent_per_dc: self me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count,
.reconnect_runtime me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed),
.me_reconnect_max_concurrent_per_dc, me_pool_force_close_secs: self.me_pool_force_close_secs.load(Ordering::Relaxed),
me_reconnect_backoff_base_ms: self
.reconnect_runtime
.me_reconnect_backoff_base
.as_millis() as u64,
me_reconnect_backoff_cap_ms: self.reconnect_runtime.me_reconnect_backoff_cap.as_millis()
as u64,
me_reconnect_fast_retry_count: self.reconnect_runtime.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: self
.drain_runtime
.me_pool_drain_ttl_secs
.load(Ordering::Relaxed),
me_pool_force_close_secs: self
.drain_runtime
.me_pool_force_close_secs
.load(Ordering::Relaxed),
me_pool_min_fresh_ratio: Self::permille_to_ratio( me_pool_min_fresh_ratio: Self::permille_to_ratio(
self.drain_runtime self.me_pool_min_fresh_ratio_permille
.me_pool_min_fresh_ratio_permille
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
), ),
me_bind_stale_mode: bind_stale_mode_label(self.bind_stale_mode()), me_bind_stale_mode: bind_stale_mode_label(self.bind_stale_mode()),
me_bind_stale_ttl_secs: self me_bind_stale_ttl_secs: self.me_bind_stale_ttl_secs.load(Ordering::Relaxed),
.binding_policy
.me_bind_stale_ttl_secs
.load(Ordering::Relaxed),
me_single_endpoint_shadow_writers: self me_single_endpoint_shadow_writers: self
.single_endpoint_runtime
.me_single_endpoint_shadow_writers .me_single_endpoint_shadow_writers
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
me_single_endpoint_outage_mode_enabled: self me_single_endpoint_outage_mode_enabled: self
.single_endpoint_runtime
.me_single_endpoint_outage_mode_enabled .me_single_endpoint_outage_mode_enabled
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
me_single_endpoint_outage_disable_quarantine: self me_single_endpoint_outage_disable_quarantine: self
.single_endpoint_runtime
.me_single_endpoint_outage_disable_quarantine .me_single_endpoint_outage_disable_quarantine
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
me_single_endpoint_outage_backoff_min_ms: self me_single_endpoint_outage_backoff_min_ms: self
.single_endpoint_runtime
.me_single_endpoint_outage_backoff_min_ms .me_single_endpoint_outage_backoff_min_ms
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
me_single_endpoint_outage_backoff_max_ms: self me_single_endpoint_outage_backoff_max_ms: self
.single_endpoint_runtime
.me_single_endpoint_outage_backoff_max_ms .me_single_endpoint_outage_backoff_max_ms
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
me_single_endpoint_shadow_rotate_every_secs: self me_single_endpoint_shadow_rotate_every_secs: self
.single_endpoint_runtime
.me_single_endpoint_shadow_rotate_every_secs .me_single_endpoint_shadow_rotate_every_secs
.load(Ordering::Relaxed), .load(Ordering::Relaxed),
me_deterministic_writer_sort: self me_deterministic_writer_sort: self.me_deterministic_writer_sort.load(Ordering::Relaxed),
.writer_selection_policy
.me_deterministic_writer_sort
.load(Ordering::Relaxed),
me_writer_pick_mode: writer_pick_mode_label(self.writer_pick_mode()), me_writer_pick_mode: writer_pick_mode_label(self.writer_pick_mode()),
me_writer_pick_sample_size: self.writer_pick_sample_size() as u8, me_writer_pick_sample_size: self.writer_pick_sample_size() as u8,
me_socks_kdf_policy: socks_kdf_policy_label(self.socks_kdf_policy()), me_socks_kdf_policy: socks_kdf_policy_label(self.socks_kdf_policy()),

View File

@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::io::ErrorKind; use std::io::ErrorKind;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
@ -26,7 +25,6 @@ const ME_ACTIVE_PING_SECS: u64 = 25;
const ME_ACTIVE_PING_JITTER_SECS: i64 = 5; const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5; const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700; const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
const ME_PING_TRACKER_CLEANUP_EVERY: u32 = 32;
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
enum WriterTeardownMode { enum WriterTeardownMode {
@ -38,240 +36,6 @@ fn is_me_peer_closed_error(error: &ProxyError) -> bool {
matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof) matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof)
} }
enum WriterLifecycleExit {
Reader(Result<()>),
Writer(Result<()>),
Ping,
Signal,
Cancelled,
}
async fn writer_command_loop(
mut rx: mpsc::Receiver<WriterCommand>,
mut rpc_writer: RpcWriter,
cancel: CancellationToken,
) -> Result<()> {
loop {
tokio::select! {
cmd = rx.recv() => {
match cmd {
Some(WriterCommand::Data(payload)) => {
rpc_writer.send(&payload).await?;
}
Some(WriterCommand::DataAndFlush(payload)) => {
rpc_writer.send_and_flush(&payload).await?;
}
Some(WriterCommand::Close) | None => return Ok(()),
}
}
_ = cancel.cancelled() => return Ok(()),
}
}
}
#[allow(clippy::too_many_arguments)]
async fn ping_loop(
pool_ping: std::sync::Weak<MePool>,
writer_id: u64,
tx_ping: mpsc::Sender<WriterCommand>,
ping_tracker_ping: Arc<tokio::sync::Mutex<HashMap<i64, Instant>>>,
stats_ping: Arc<crate::stats::Stats>,
keepalive_enabled: bool,
keepalive_interval: Duration,
keepalive_jitter: Duration,
cancel_ping_token: CancellationToken,
) {
let mut ping_id: i64 = rand::random::<i64>();
let mut cleanup_tick: u32 = 0;
let idle_interval_cap = Duration::from_secs(ME_IDLE_KEEPALIVE_MAX_SECS);
// Per-writer jittered start to avoid phase sync.
let startup_jitter = if keepalive_enabled {
let mut interval = keepalive_interval;
let Some(pool) = pool_ping.upgrade() else {
return;
};
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter =
rand::rng().random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let wait = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(wait)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(startup_jitter) => {}
}
loop {
let wait = if keepalive_enabled {
let mut interval = keepalive_interval;
let Some(pool) = pool_ping.upgrade() else {
return;
};
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
interval
+ Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter =
rand::rng().random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let secs = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(secs)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(wait) => {}
}
let sent_id = ping_id;
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_PING_U32.to_le_bytes());
p.extend_from_slice(&sent_id.to_le_bytes());
{
let mut tracker = ping_tracker_ping.lock().await;
cleanup_tick = cleanup_tick.wrapping_add(1);
if cleanup_tick.is_multiple_of(ME_PING_TRACKER_CLEANUP_EVERY) {
let before = tracker.len();
tracker.retain(|_, ts| ts.elapsed() < Duration::from_secs(120));
let expired = before.saturating_sub(tracker.len());
if expired > 0 {
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
}
}
tracker.insert(sent_id, std::time::Instant::now());
}
ping_id = ping_id.wrapping_add(1);
stats_ping.increment_me_keepalive_sent();
if tx_ping
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
.await
.is_err()
{
stats_ping.increment_me_keepalive_failed();
debug!("ME ping failed, removing dead writer");
return;
}
}
}
#[allow(clippy::too_many_arguments)]
async fn rpc_proxy_req_signal_loop(
pool_signal: std::sync::Weak<MePool>,
writer_id: u64,
tx_signal: mpsc::Sender<WriterCommand>,
stats_signal: Arc<crate::stats::Stats>,
cancel_signal: CancellationToken,
keepalive_jitter_signal: Duration,
rpc_proxy_req_every_secs: u64,
) {
if rpc_proxy_req_every_secs == 0 {
// Disabled service signal loop must stay parked until writer cancellation.
// Returning immediately here would complete `select!` and tear down writer lifecycle.
cancel_signal.cancelled().await;
return;
}
let interval = Duration::from_secs(rpc_proxy_req_every_secs);
let startup_jitter_ms = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
rand::rng().random_range(0..=effective_jitter_ms as u64)
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
}
loop {
let wait = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
interval
+ Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(wait) => {}
}
let Some(pool) = pool_signal.upgrade() else {
return;
};
let Some(meta) = pool.registry.get_last_writer_meta(writer_id).await else {
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
};
let (conn_id, mut service_rx) = pool.registry.register().await;
// Service RPC_PROXY_REQ signal path is intentionally route-only:
// do not bind synthetic conn_id into regular writer/client accounting.
let payload = build_proxy_req_payload(
conn_id,
meta.client_addr,
meta.our_addr,
&[],
pool.proxy_tag.as_deref(),
meta.proto_flags,
);
if tx_signal
.send(WriterCommand::DataAndFlush(payload))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
return;
}
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
if matches!(
tokio::time::timeout(
Duration::from_millis(ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS),
service_rx.recv(),
)
.await,
Ok(Some(_))
) {
stats_signal.increment_me_rpc_proxy_req_signal_response_total();
}
let mut close_payload = Vec::with_capacity(12);
close_payload.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
close_payload.extend_from_slice(&conn_id.to_le_bytes());
if tx_signal
.send(WriterCommand::DataAndFlush(Bytes::from(close_payload)))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
return;
}
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
let _ = pool.registry.unregister(conn_id).await;
}
}
impl MePool { impl MePool {
pub(crate) async fn prune_closed_writers(self: &Arc<Self>) { pub(crate) async fn prune_closed_writers(self: &Arc<Self>) {
let closed_writer_ids: Vec<u64> = { let closed_writer_ids: Vec<u64> = {
@ -372,15 +136,46 @@ impl MePool {
let draining_started_at_epoch_secs = Arc::new(AtomicU64::new(0)); let draining_started_at_epoch_secs = Arc::new(AtomicU64::new(0));
let drain_deadline_epoch_secs = Arc::new(AtomicU64::new(0)); let drain_deadline_epoch_secs = Arc::new(AtomicU64::new(0));
let allow_drain_fallback = Arc::new(AtomicBool::new(false)); let allow_drain_fallback = Arc::new(AtomicBool::new(false));
let (tx, rx) = let (tx, mut rx) = mpsc::channel::<WriterCommand>(self.writer_cmd_channel_capacity);
mpsc::channel::<WriterCommand>(self.writer_lifecycle.writer_cmd_channel_capacity); let mut rpc_writer = RpcWriter {
let rpc_writer = RpcWriter {
writer: hs.wr, writer: hs.wr,
key: hs.write_key, key: hs.write_key,
iv: hs.write_iv, iv: hs.write_iv,
seq_no: 0, seq_no: 0,
crc_mode: hs.crc_mode, crc_mode: hs.crc_mode,
}; };
let cancel_wr = cancel.clone();
let cleanup_done = Arc::new(AtomicBool::new(false));
let cleanup_for_writer = cleanup_done.clone();
let pool_writer_task = Arc::downgrade(self);
tokio::spawn(async move {
loop {
tokio::select! {
cmd = rx.recv() => {
match cmd {
Some(WriterCommand::Data(payload)) => {
if rpc_writer.send(&payload).await.is_err() { break; }
}
Some(WriterCommand::DataAndFlush(payload)) => {
if rpc_writer.send_and_flush(&payload).await.is_err() { break; }
}
Some(WriterCommand::Close) | None => break,
}
}
_ = cancel_wr.cancelled() => break,
}
}
if cleanup_for_writer
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
if let Some(pool) = pool_writer_task.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
cancel_wr.cancel();
}
}
});
let writer = MeWriter { let writer = MeWriter {
id: writer_id, id: writer_id,
addr, addr,
@ -398,52 +193,42 @@ impl MePool {
drain_deadline_epoch_secs: drain_deadline_epoch_secs.clone(), drain_deadline_epoch_secs: drain_deadline_epoch_secs.clone(),
allow_drain_fallback: allow_drain_fallback.clone(), allow_drain_fallback: allow_drain_fallback.clone(),
}; };
self.writers self.writers.write().await.push(writer.clone());
.update(|writers| writers.push(writer.clone()))
.await;
self.registry.register_writer(writer_id, tx.clone()).await; self.registry.register_writer(writer_id, tx.clone()).await;
self.registry.mark_writer_idle(writer_id).await; self.registry.mark_writer_idle(writer_id).await;
self.conn_count.fetch_add(1, Ordering::Relaxed); self.conn_count.fetch_add(1, Ordering::Relaxed);
self.notify_writer_epoch(); self.writer_available.notify_one();
let reg = self.registry.clone(); let reg = self.registry.clone();
let writers_arc = self.writers_arc(); let writers_arc = self.writers_arc();
let ping_tracker = Arc::new(tokio::sync::Mutex::new(HashMap::<i64, Instant>::new())); let ping_tracker = self.ping_tracker.clone();
let ping_tracker_reader = ping_tracker.clone(); let ping_tracker_reader = ping_tracker.clone();
let ping_tracker_ping = ping_tracker.clone();
let rtt_stats = self.rtt_stats.clone(); let rtt_stats = self.rtt_stats.clone();
let stats_reader = self.stats.clone(); let stats_reader = self.stats.clone();
let stats_reader_close = self.stats.clone(); let stats_reader_close = self.stats.clone();
let stats_ping = self.stats.clone(); let stats_ping = self.stats.clone();
let stats_signal = self.stats.clone(); let pool = Arc::downgrade(self);
let pool_lifecycle = Arc::downgrade(self);
let pool_ping = Arc::downgrade(self);
let pool_signal = Arc::downgrade(self);
let tx_reader = tx.clone();
let tx_ping = tx.clone();
let tx_signal = tx.clone();
let keepalive_enabled = self.writer_lifecycle.me_keepalive_enabled;
let keepalive_interval = self.writer_lifecycle.me_keepalive_interval;
let keepalive_jitter = self.writer_lifecycle.me_keepalive_jitter;
let keepalive_jitter_signal = self.writer_lifecycle.me_keepalive_jitter;
let rpc_proxy_req_every_secs = self
.writer_lifecycle
.rpc_proxy_req_every_secs
.load(Ordering::Relaxed);
let cancel_reader = cancel.clone();
let cancel_writer = cancel.clone();
let cancel_ping = cancel.clone(); let cancel_ping = cancel.clone();
let tx_ping = tx.clone();
let ping_tracker_ping = ping_tracker.clone();
let cleanup_for_reader = cleanup_done.clone();
let cleanup_for_ping = cleanup_done.clone();
let keepalive_enabled = self.me_keepalive_enabled;
let keepalive_interval = self.me_keepalive_interval;
let keepalive_jitter = self.me_keepalive_jitter;
let rpc_proxy_req_every_secs = self.rpc_proxy_req_every_secs.load(Ordering::Relaxed);
let tx_signal = tx.clone();
let stats_signal = self.stats.clone();
let cancel_signal = cancel.clone(); let cancel_signal = cancel.clone();
let cancel_select = cancel.clone(); let cleanup_for_signal = cleanup_done.clone();
let cancel_cleanup = cancel.clone(); let pool_signal = Arc::downgrade(self);
let reader_route_data_wait_ms = self.transport_policy.me_reader_route_data_wait_ms.clone(); let keepalive_jitter_signal = self.me_keepalive_jitter;
let cancel_reader_token = cancel.clone();
let cancel_ping_token = cancel_ping.clone();
let reader_route_data_wait_ms = self.me_reader_route_data_wait_ms.clone();
tokio::spawn(async move { tokio::spawn(async move {
// Reader MUST be the first branch in biased select! to avoid read starvation. let res = reader_loop(
let exit = tokio::select! {
biased;
reader_res = reader_loop(
hs.rd, hs.rd,
hs.read_key, hs.read_key,
hs.read_iv, hs.read_iv,
@ -451,44 +236,17 @@ impl MePool {
reg.clone(), reg.clone(),
BytesMut::new(), BytesMut::new(),
BytesMut::new(), BytesMut::new(),
tx_reader, tx.clone(),
ping_tracker_reader, ping_tracker_reader,
rtt_stats, rtt_stats.clone(),
stats_reader, stats_reader,
writer_id, writer_id,
degraded, degraded.clone(),
rtt_ema_ms_x10, rtt_ema_ms_x10.clone(),
reader_route_data_wait_ms, reader_route_data_wait_ms,
cancel_reader, cancel_reader_token.clone(),
) => WriterLifecycleExit::Reader(reader_res), )
writer_res = writer_command_loop(rx, rpc_writer, cancel_writer) => { .await;
WriterLifecycleExit::Writer(writer_res)
}
_ = ping_loop(
pool_ping,
writer_id,
tx_ping,
ping_tracker_ping,
stats_ping,
keepalive_enabled,
keepalive_interval,
keepalive_jitter,
cancel_ping,
) => WriterLifecycleExit::Ping,
_ = rpc_proxy_req_signal_loop(
pool_signal,
writer_id,
tx_signal,
stats_signal,
cancel_signal,
keepalive_jitter_signal,
rpc_proxy_req_every_secs,
) => WriterLifecycleExit::Signal,
_ = cancel_select.cancelled() => WriterLifecycleExit::Cancelled,
};
match exit {
WriterLifecycleExit::Reader(res) => {
let idle_close_by_peer = if let Err(e) = res.as_ref() { let idle_close_by_peer = if let Err(e) = res.as_ref() {
is_me_peer_closed_error(e) && reg.is_writer_empty(writer_id).await is_me_peer_closed_error(e) && reg.is_writer_empty(writer_id).await
} else { } else {
@ -498,35 +256,266 @@ impl MePool {
stats_reader_close.increment_me_idle_close_by_peer_total(); stats_reader_close.increment_me_idle_close_by_peer_total();
info!(writer_id, "ME socket closed by peer on idle writer"); info!(writer_id, "ME socket closed by peer on idle writer");
} }
if cleanup_for_reader
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
if let Some(pool) = pool.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
// Fallback for shutdown races: make writer task exit quickly so stale
// channels are observable by periodic prune.
cancel_reader_token.cancel();
}
}
if let Err(e) = res if let Err(e) = res
&& !idle_close_by_peer && !idle_close_by_peer
{ {
warn!(error = %e, "ME reader ended"); warn!(error = %e, "ME reader ended");
} }
}
WriterLifecycleExit::Writer(res) => {
if let Err(e) = res {
warn!(error = %e, "ME writer command loop ended");
}
}
WriterLifecycleExit::Ping => {
debug!(writer_id, "ME ping loop finished");
}
WriterLifecycleExit::Signal => {
debug!(writer_id, "ME rpc_proxy_req signal loop finished");
}
WriterLifecycleExit::Cancelled => {}
}
if let Some(pool) = pool_lifecycle.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
// Fallback for shutdown races: make lifecycle exit observable by prune.
cancel_cleanup.cancel();
}
let remaining = writers_arc.read().await.len(); let remaining = writers_arc.read().await.len();
debug!(writer_id, remaining, "ME writer lifecycle task finished"); debug!(writer_id, remaining, "ME reader task finished");
});
let pool_ping = Arc::downgrade(self);
tokio::spawn(async move {
let mut ping_id: i64 = rand::random::<i64>();
let idle_interval_cap = Duration::from_secs(ME_IDLE_KEEPALIVE_MAX_SECS);
// Per-writer jittered start to avoid phase sync.
let startup_jitter = if keepalive_enabled {
let mut interval = keepalive_interval;
if let Some(pool) = pool_ping.upgrade() {
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
} else {
return;
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter = rand::rng()
.random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let wait = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(wait)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(startup_jitter) => {}
}
loop {
let wait = if keepalive_enabled {
let mut interval = keepalive_interval;
if let Some(pool) = pool_ping.upgrade() {
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
} else {
break;
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms =
keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
interval
+ Duration::from_millis(
rand::rng().random_range(0..=effective_jitter_ms as u64),
)
} else {
let jitter = rand::rng()
.random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let secs = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(secs)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => {
break;
}
_ = tokio::time::sleep(wait) => {}
}
let sent_id = ping_id;
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_PING_U32.to_le_bytes());
p.extend_from_slice(&sent_id.to_le_bytes());
{
let mut tracker = ping_tracker_ping.lock().await;
let now_epoch_ms = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64;
let mut run_cleanup = false;
if let Some(pool) = pool_ping.upgrade() {
let last_cleanup_ms = pool
.ping_tracker_last_cleanup_epoch_ms
.load(Ordering::Relaxed);
if now_epoch_ms.saturating_sub(last_cleanup_ms) >= 30_000
&& pool
.ping_tracker_last_cleanup_epoch_ms
.compare_exchange(
last_cleanup_ms,
now_epoch_ms,
Ordering::AcqRel,
Ordering::Relaxed,
)
.is_ok()
{
run_cleanup = true;
}
}
if run_cleanup {
let before = tracker.len();
tracker.retain(|_, (ts, _)| ts.elapsed() < Duration::from_secs(120));
let expired = before.saturating_sub(tracker.len());
if expired > 0 {
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
}
}
tracker.insert(sent_id, (std::time::Instant::now(), writer_id));
}
ping_id = ping_id.wrapping_add(1);
stats_ping.increment_me_keepalive_sent();
if tx_ping
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
.await
.is_err()
{
stats_ping.increment_me_keepalive_failed();
debug!("ME ping failed, removing dead writer");
cancel_ping.cancel();
if cleanup_for_ping
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
&& let Some(pool) = pool_ping.upgrade()
{
pool.remove_writer_and_close_clients(writer_id).await;
}
break;
}
}
});
tokio::spawn(async move {
if rpc_proxy_req_every_secs == 0 {
return;
}
let interval = Duration::from_secs(rpc_proxy_req_every_secs);
let startup_jitter_ms = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
rand::rng().random_range(0..=effective_jitter_ms as u64)
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
}
loop {
let wait = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
interval
+ Duration::from_millis(
rand::rng().random_range(0..=effective_jitter_ms as u64),
)
};
tokio::select! {
_ = cancel_signal.cancelled() => break,
_ = tokio::time::sleep(wait) => {}
}
let Some(pool) = pool_signal.upgrade() else {
break;
};
let Some(meta) = pool.registry.get_last_writer_meta(writer_id).await else {
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
};
let (conn_id, mut service_rx) = pool.registry.register().await;
if !pool
.registry
.bind_writer(conn_id, writer_id, meta.clone())
.await
{
let _ = pool.registry.unregister(conn_id).await;
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
}
let payload = build_proxy_req_payload(
conn_id,
meta.client_addr,
meta.our_addr,
&[],
pool.proxy_tag.as_deref(),
meta.proto_flags,
);
if tx_signal
.send(WriterCommand::DataAndFlush(payload))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
cancel_signal.cancel();
if cleanup_for_signal
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
pool.remove_writer_and_close_clients(writer_id).await;
}
break;
}
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
if matches!(
tokio::time::timeout(
Duration::from_millis(ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS),
service_rx.recv(),
)
.await,
Ok(Some(_))
) {
stats_signal.increment_me_rpc_proxy_req_signal_response_total();
}
let mut close_payload = Vec::with_capacity(12);
close_payload.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
close_payload.extend_from_slice(&conn_id.to_le_bytes());
if tx_signal
.send(WriterCommand::DataAndFlush(Bytes::from(close_payload)))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
cancel_signal.cancel();
if cleanup_for_signal
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
pool.remove_writer_and_close_clients(writer_id).await;
}
break;
}
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
let _ = pool.registry.unregister(conn_id).await;
}
}); });
Ok(()) Ok(())
@ -605,36 +594,23 @@ impl MePool {
// The close command below is only a best-effort accelerator for task shutdown. // The close command below is only a best-effort accelerator for task shutdown.
// Cleanup progress must never depend on command-channel availability. // Cleanup progress must never depend on command-channel availability.
let _ = self.registry.writer_lost(writer_id).await; let _ = self.registry.writer_lost(writer_id).await;
{
let mut tracker = self.ping_tracker.lock().await;
tracker.retain(|_, (_, wid)| *wid != writer_id);
}
self.rtt_stats.lock().await.remove(&writer_id); self.rtt_stats.lock().await.remove(&writer_id);
if let Some(tx) = close_tx { if let Some(tx) = close_tx {
// Keep teardown critical path non-blocking: close is best-effort only. let _ = tx.send(WriterCommand::Close).await;
let _ = tx.try_send(WriterCommand::Close);
} }
if let Some(addr) = removed_addr { if let Some(addr) = removed_addr {
if let Some(uptime) = removed_uptime { if let Some(uptime) = removed_uptime {
// Quarantine contract: only unexpected removals are considered endpoint flap. // Quarantine flapping endpoints regardless of draining state.
if trigger_refill { self.maybe_quarantine_flapping_endpoint(addr, uptime).await;
self.stats
.increment_me_endpoint_quarantine_unexpected_total();
self.maybe_quarantine_flapping_endpoint(addr, uptime, "unexpected")
.await;
} else {
self.stats
.increment_me_endpoint_quarantine_draining_suppressed_total();
debug!(
%addr,
uptime_ms = uptime.as_millis(),
"Skipping endpoint quarantine for draining writer removal"
);
}
} }
if trigger_refill && let Some(writer_dc) = removed_dc { if trigger_refill && let Some(writer_dc) = removed_dc {
self.trigger_immediate_refill_for_dc(addr, writer_dc); self.trigger_immediate_refill_for_dc(addr, writer_dc);
} }
} }
if removed {
self.notify_writer_epoch();
}
removed removed
} }
@ -700,10 +676,7 @@ impl MePool {
MeBindStaleMode::Never => false, MeBindStaleMode::Never => false,
MeBindStaleMode::Always => true, MeBindStaleMode::Always => true,
MeBindStaleMode::Ttl => { MeBindStaleMode::Ttl => {
let ttl_secs = self let ttl_secs = self.me_bind_stale_ttl_secs.load(Ordering::Relaxed);
.binding_policy
.me_bind_stale_ttl_secs
.load(Ordering::Relaxed);
if ttl_secs == 0 { if ttl_secs == 0 {
return true; return true;
} }

View File

@ -32,10 +32,10 @@ pub(crate) async fn reader_loop(
enc_leftover: BytesMut, enc_leftover: BytesMut,
mut dec: BytesMut, mut dec: BytesMut,
tx: mpsc::Sender<WriterCommand>, tx: mpsc::Sender<WriterCommand>,
ping_tracker: Arc<Mutex<HashMap<i64, Instant>>>, ping_tracker: Arc<Mutex<HashMap<i64, (Instant, u64)>>>,
rtt_stats: Arc<Mutex<HashMap<u64, (f64, f64)>>>, rtt_stats: Arc<Mutex<HashMap<u64, (f64, f64)>>>,
stats: Arc<Stats>, stats: Arc<Stats>,
writer_id: u64, _writer_id: u64,
degraded: Arc<AtomicBool>, degraded: Arc<AtomicBool>,
writer_rtt_ema_ms_x10: Arc<AtomicU32>, writer_rtt_ema_ms_x10: Arc<AtomicU32>,
reader_route_data_wait_ms: Arc<AtomicU64>, reader_route_data_wait_ms: Arc<AtomicU64>,
@ -45,7 +45,7 @@ pub(crate) async fn reader_loop(
let mut expected_seq: i32 = 0; let mut expected_seq: i32 = 0;
loop { loop {
let mut tmp = [0u8; 65_536]; let mut tmp = [0u8; 16_384];
let n = tokio::select! { let n = tokio::select! {
res = rd.read(&mut tmp) => res.map_err(ProxyError::Io)?, res = rd.read(&mut tmp) => res.map_err(ProxyError::Io)?,
_ = cancel.cancelled() => return Ok(()), _ = cancel.cancelled() => return Ok(()),
@ -203,13 +203,13 @@ pub(crate) async fn reader_loop(
} else if pt == RPC_PONG_U32 && body.len() >= 8 { } else if pt == RPC_PONG_U32 && body.len() >= 8 {
let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap()); let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap());
stats.increment_me_keepalive_pong(); stats.increment_me_keepalive_pong();
if let Some(sent) = { if let Some((sent, wid)) = {
let mut guard = ping_tracker.lock().await; let mut guard = ping_tracker.lock().await;
guard.remove(&ping_id) guard.remove(&ping_id)
} { } {
let rtt = sent.elapsed().as_secs_f64() * 1000.0; let rtt = sent.elapsed().as_secs_f64() * 1000.0;
let mut stats = rtt_stats.lock().await; let mut stats = rtt_stats.lock().await;
let entry = stats.entry(writer_id).or_insert((rtt, rtt)); let entry = stats.entry(wid).or_insert((rtt, rtt));
entry.1 = entry.1 * 0.8 + rtt * 0.2; entry.1 = entry.1 * 0.8 + rtt * 0.2;
if rtt < entry.0 { if rtt < entry.0 {
entry.0 = rtt; entry.0 = rtt;
@ -224,7 +224,7 @@ pub(crate) async fn reader_loop(
Ordering::Relaxed, Ordering::Relaxed,
); );
trace!( trace!(
writer_id, writer_id = wid,
rtt_ms = rtt, rtt_ms = rtt,
ema_ms = entry.1, ema_ms = entry.1,
base_ms = entry.0, base_ms = entry.0,

View File

@ -3,9 +3,8 @@ use std::net::SocketAddr;
use std::sync::atomic::{AtomicU8, AtomicU64, Ordering}; use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
use std::time::{Duration, SystemTime, UNIX_EPOCH}; use std::time::{Duration, SystemTime, UNIX_EPOCH};
use dashmap::DashMap;
use tokio::sync::mpsc::error::TrySendError; use tokio::sync::mpsc::error::TrySendError;
use tokio::sync::{Mutex, mpsc}; use tokio::sync::{RwLock, mpsc};
use super::MeResponse; use super::MeResponse;
use super::codec::WriterCommand; use super::codec::WriterCommand;
@ -51,15 +50,8 @@ pub(super) struct WriterActivitySnapshot {
pub active_sessions_by_target_dc: HashMap<i16, usize>, pub active_sessions_by_target_dc: HashMap<i16, usize>,
} }
struct RoutingTable { struct RegistryInner {
map: DashMap<u64, mpsc::Sender<MeResponse>>, map: HashMap<u64, mpsc::Sender<MeResponse>>,
}
struct BindingState {
inner: Mutex<BindingInner>,
}
struct BindingInner {
writers: HashMap<u64, mpsc::Sender<WriterCommand>>, writers: HashMap<u64, mpsc::Sender<WriterCommand>>,
writer_for_conn: HashMap<u64, u64>, writer_for_conn: HashMap<u64, u64>,
conns_for_writer: HashMap<u64, HashSet<u64>>, conns_for_writer: HashMap<u64, HashSet<u64>>,
@ -68,9 +60,10 @@ struct BindingInner {
writer_idle_since_epoch_secs: HashMap<u64, u64>, writer_idle_since_epoch_secs: HashMap<u64, u64>,
} }
impl BindingInner { impl RegistryInner {
fn new() -> Self { fn new() -> Self {
Self { Self {
map: HashMap::new(),
writers: HashMap::new(), writers: HashMap::new(),
writer_for_conn: HashMap::new(), writer_for_conn: HashMap::new(),
conns_for_writer: HashMap::new(), conns_for_writer: HashMap::new(),
@ -82,8 +75,7 @@ impl BindingInner {
} }
pub struct ConnRegistry { pub struct ConnRegistry {
routing: RoutingTable, inner: RwLock<RegistryInner>,
binding: BindingState,
next_id: AtomicU64, next_id: AtomicU64,
route_channel_capacity: usize, route_channel_capacity: usize,
route_backpressure_base_timeout_ms: AtomicU64, route_backpressure_base_timeout_ms: AtomicU64,
@ -102,12 +94,7 @@ impl ConnRegistry {
pub fn with_route_channel_capacity(route_channel_capacity: usize) -> Self { pub fn with_route_channel_capacity(route_channel_capacity: usize) -> Self {
let start = rand::random::<u64>() | 1; let start = rand::random::<u64>() | 1;
Self { Self {
routing: RoutingTable { inner: RwLock::new(RegistryInner::new()),
map: DashMap::new(),
},
binding: BindingState {
inner: Mutex::new(BindingInner::new()),
},
next_id: AtomicU64::new(start), next_id: AtomicU64::new(start),
route_channel_capacity: route_channel_capacity.max(1), route_channel_capacity: route_channel_capacity.max(1),
route_backpressure_base_timeout_ms: AtomicU64::new(ROUTE_BACKPRESSURE_BASE_TIMEOUT_MS), route_backpressure_base_timeout_ms: AtomicU64::new(ROUTE_BACKPRESSURE_BASE_TIMEOUT_MS),
@ -143,14 +130,14 @@ impl ConnRegistry {
pub async fn register(&self) -> (u64, mpsc::Receiver<MeResponse>) { pub async fn register(&self) -> (u64, mpsc::Receiver<MeResponse>) {
let id = self.next_id.fetch_add(1, Ordering::Relaxed); let id = self.next_id.fetch_add(1, Ordering::Relaxed);
let (tx, rx) = mpsc::channel(self.route_channel_capacity); let (tx, rx) = mpsc::channel(self.route_channel_capacity);
self.routing.map.insert(id, tx); self.inner.write().await.map.insert(id, tx);
(id, rx) (id, rx)
} }
pub async fn register_writer(&self, writer_id: u64, tx: mpsc::Sender<WriterCommand>) { pub async fn register_writer(&self, writer_id: u64, tx: mpsc::Sender<WriterCommand>) {
let mut binding = self.binding.inner.lock().await; let mut inner = self.inner.write().await;
binding.writers.insert(writer_id, tx); inner.writers.insert(writer_id, tx);
binding inner
.conns_for_writer .conns_for_writer
.entry(writer_id) .entry(writer_id)
.or_insert_with(HashSet::new); .or_insert_with(HashSet::new);
@ -158,18 +145,18 @@ impl ConnRegistry {
/// Unregister connection, returning associated writer_id if any. /// Unregister connection, returning associated writer_id if any.
pub async fn unregister(&self, id: u64) -> Option<u64> { pub async fn unregister(&self, id: u64) -> Option<u64> {
self.routing.map.remove(&id); let mut inner = self.inner.write().await;
let mut binding = self.binding.inner.lock().await; inner.map.remove(&id);
binding.meta.remove(&id); inner.meta.remove(&id);
if let Some(writer_id) = binding.writer_for_conn.remove(&id) { if let Some(writer_id) = inner.writer_for_conn.remove(&id) {
let became_empty = if let Some(set) = binding.conns_for_writer.get_mut(&writer_id) { let became_empty = if let Some(set) = inner.conns_for_writer.get_mut(&writer_id) {
set.remove(&id); set.remove(&id);
set.is_empty() set.is_empty()
} else { } else {
false false
}; };
if became_empty { if became_empty {
binding inner
.writer_idle_since_epoch_secs .writer_idle_since_epoch_secs
.insert(writer_id, Self::now_epoch_secs()); .insert(writer_id, Self::now_epoch_secs());
} }
@ -180,7 +167,10 @@ impl ConnRegistry {
#[allow(dead_code)] #[allow(dead_code)]
pub async fn route(&self, id: u64, resp: MeResponse) -> RouteResult { pub async fn route(&self, id: u64, resp: MeResponse) -> RouteResult {
let tx = self.routing.map.get(&id).map(|entry| entry.value().clone()); let tx = {
let inner = self.inner.read().await;
inner.map.get(&id).cloned()
};
let Some(tx) = tx else { let Some(tx) = tx else {
return RouteResult::NoConn; return RouteResult::NoConn;
@ -233,7 +223,10 @@ impl ConnRegistry {
} }
pub async fn route_nowait(&self, id: u64, resp: MeResponse) -> RouteResult { pub async fn route_nowait(&self, id: u64, resp: MeResponse) -> RouteResult {
let tx = self.routing.map.get(&id).map(|entry| entry.value().clone()); let tx = {
let inner = self.inner.read().await;
inner.map.get(&id).cloned()
};
let Some(tx) = tx else { let Some(tx) = tx else {
return RouteResult::NoConn; return RouteResult::NoConn;
@ -256,7 +249,10 @@ impl ConnRegistry {
return self.route_nowait(id, resp).await; return self.route_nowait(id, resp).await;
} }
let tx = self.routing.map.get(&id).map(|entry| entry.value().clone()); let tx = {
let inner = self.inner.read().await;
inner.map.get(&id).cloned()
};
let Some(tx) = tx else { let Some(tx) = tx else {
return RouteResult::NoConn; return RouteResult::NoConn;
@ -295,39 +291,33 @@ impl ConnRegistry {
} }
pub async fn bind_writer(&self, conn_id: u64, writer_id: u64, meta: ConnMeta) -> bool { pub async fn bind_writer(&self, conn_id: u64, writer_id: u64, meta: ConnMeta) -> bool {
let mut binding = self.binding.inner.lock().await; let mut inner = self.inner.write().await;
// ROUTING IS THE SOURCE OF TRUTH: if !inner.writers.contains_key(&writer_id) {
// never keep/attach writer binding for a connection that is already
// absent from the routing table.
if !self.routing.map.contains_key(&conn_id) {
return false;
}
if !binding.writers.contains_key(&writer_id) {
return false; return false;
} }
let previous_writer_id = binding.writer_for_conn.insert(conn_id, writer_id); let previous_writer_id = inner.writer_for_conn.insert(conn_id, writer_id);
if let Some(previous_writer_id) = previous_writer_id if let Some(previous_writer_id) = previous_writer_id
&& previous_writer_id != writer_id && previous_writer_id != writer_id
{ {
let became_empty = let became_empty =
if let Some(set) = binding.conns_for_writer.get_mut(&previous_writer_id) { if let Some(set) = inner.conns_for_writer.get_mut(&previous_writer_id) {
set.remove(&conn_id); set.remove(&conn_id);
set.is_empty() set.is_empty()
} else { } else {
false false
}; };
if became_empty { if became_empty {
binding inner
.writer_idle_since_epoch_secs .writer_idle_since_epoch_secs
.insert(previous_writer_id, Self::now_epoch_secs()); .insert(previous_writer_id, Self::now_epoch_secs());
} }
} }
binding.meta.insert(conn_id, meta.clone()); inner.meta.insert(conn_id, meta.clone());
binding.last_meta_for_writer.insert(writer_id, meta); inner.last_meta_for_writer.insert(writer_id, meta);
binding.writer_idle_since_epoch_secs.remove(&writer_id); inner.writer_idle_since_epoch_secs.remove(&writer_id);
binding inner
.conns_for_writer .conns_for_writer
.entry(writer_id) .entry(writer_id)
.or_insert_with(HashSet::new) .or_insert_with(HashSet::new)
@ -336,32 +326,32 @@ impl ConnRegistry {
} }
pub async fn mark_writer_idle(&self, writer_id: u64) { pub async fn mark_writer_idle(&self, writer_id: u64) {
let mut binding = self.binding.inner.lock().await; let mut inner = self.inner.write().await;
binding inner
.conns_for_writer .conns_for_writer
.entry(writer_id) .entry(writer_id)
.or_insert_with(HashSet::new); .or_insert_with(HashSet::new);
binding inner
.writer_idle_since_epoch_secs .writer_idle_since_epoch_secs
.entry(writer_id) .entry(writer_id)
.or_insert(Self::now_epoch_secs()); .or_insert(Self::now_epoch_secs());
} }
pub async fn get_last_writer_meta(&self, writer_id: u64) -> Option<ConnMeta> { pub async fn get_last_writer_meta(&self, writer_id: u64) -> Option<ConnMeta> {
let binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
binding.last_meta_for_writer.get(&writer_id).cloned() inner.last_meta_for_writer.get(&writer_id).cloned()
} }
pub async fn writer_idle_since_snapshot(&self) -> HashMap<u64, u64> { pub async fn writer_idle_since_snapshot(&self) -> HashMap<u64, u64> {
let binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
binding.writer_idle_since_epoch_secs.clone() inner.writer_idle_since_epoch_secs.clone()
} }
pub async fn writer_idle_since_for_writer_ids(&self, writer_ids: &[u64]) -> HashMap<u64, u64> { pub async fn writer_idle_since_for_writer_ids(&self, writer_ids: &[u64]) -> HashMap<u64, u64> {
let binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
let mut out = HashMap::<u64, u64>::with_capacity(writer_ids.len()); let mut out = HashMap::<u64, u64>::with_capacity(writer_ids.len());
for writer_id in writer_ids { for writer_id in writer_ids {
if let Some(idle_since) = binding.writer_idle_since_epoch_secs.get(writer_id).copied() { if let Some(idle_since) = inner.writer_idle_since_epoch_secs.get(writer_id).copied() {
out.insert(*writer_id, idle_since); out.insert(*writer_id, idle_since);
} }
} }
@ -369,14 +359,14 @@ impl ConnRegistry {
} }
pub(super) async fn writer_activity_snapshot(&self) -> WriterActivitySnapshot { pub(super) async fn writer_activity_snapshot(&self) -> WriterActivitySnapshot {
let binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
let mut bound_clients_by_writer = HashMap::<u64, usize>::new(); let mut bound_clients_by_writer = HashMap::<u64, usize>::new();
let mut active_sessions_by_target_dc = HashMap::<i16, usize>::new(); let mut active_sessions_by_target_dc = HashMap::<i16, usize>::new();
for (writer_id, conn_ids) in &binding.conns_for_writer { for (writer_id, conn_ids) in &inner.conns_for_writer {
bound_clients_by_writer.insert(*writer_id, conn_ids.len()); bound_clients_by_writer.insert(*writer_id, conn_ids.len());
} }
for conn_meta in binding.meta.values() { for conn_meta in inner.meta.values() {
if conn_meta.target_dc == 0 { if conn_meta.target_dc == 0 {
continue; continue;
} }
@ -392,39 +382,9 @@ impl ConnRegistry {
} }
pub async fn get_writer(&self, conn_id: u64) -> Option<ConnWriter> { pub async fn get_writer(&self, conn_id: u64) -> Option<ConnWriter> {
let mut binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
// ROUTING IS THE SOURCE OF TRUTH: let writer_id = inner.writer_for_conn.get(&conn_id).cloned()?;
// stale bindings are ignored and lazily cleaned when routing no longer let writer = inner.writers.get(&writer_id).cloned()?;
// contains the connection.
if !self.routing.map.contains_key(&conn_id) {
binding.meta.remove(&conn_id);
if let Some(stale_writer_id) = binding.writer_for_conn.remove(&conn_id)
&& let Some(conns) = binding.conns_for_writer.get_mut(&stale_writer_id)
{
conns.remove(&conn_id);
if conns.is_empty() {
binding
.writer_idle_since_epoch_secs
.insert(stale_writer_id, Self::now_epoch_secs());
}
}
return None;
}
let writer_id = binding.writer_for_conn.get(&conn_id).copied()?;
let Some(writer) = binding.writers.get(&writer_id).cloned() else {
binding.writer_for_conn.remove(&conn_id);
binding.meta.remove(&conn_id);
if let Some(conns) = binding.conns_for_writer.get_mut(&writer_id) {
conns.remove(&conn_id);
if conns.is_empty() {
binding
.writer_idle_since_epoch_secs
.insert(writer_id, Self::now_epoch_secs());
}
}
return None;
};
Some(ConnWriter { Some(ConnWriter {
writer_id, writer_id,
tx: writer, tx: writer,
@ -432,16 +392,16 @@ impl ConnRegistry {
} }
pub async fn active_conn_ids(&self) -> Vec<u64> { pub async fn active_conn_ids(&self) -> Vec<u64> {
let binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
binding.writer_for_conn.keys().copied().collect() inner.writer_for_conn.keys().copied().collect()
} }
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> { pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
let mut binding = self.binding.inner.lock().await; let mut inner = self.inner.write().await;
binding.writers.remove(&writer_id); inner.writers.remove(&writer_id);
binding.last_meta_for_writer.remove(&writer_id); inner.last_meta_for_writer.remove(&writer_id);
binding.writer_idle_since_epoch_secs.remove(&writer_id); inner.writer_idle_since_epoch_secs.remove(&writer_id);
let conns = binding let conns = inner
.conns_for_writer .conns_for_writer
.remove(&writer_id) .remove(&writer_id)
.unwrap_or_default() .unwrap_or_default()
@ -450,11 +410,11 @@ impl ConnRegistry {
let mut out = Vec::new(); let mut out = Vec::new();
for conn_id in conns { for conn_id in conns {
if binding.writer_for_conn.get(&conn_id).copied() != Some(writer_id) { if inner.writer_for_conn.get(&conn_id).copied() != Some(writer_id) {
continue; continue;
} }
binding.writer_for_conn.remove(&conn_id); inner.writer_for_conn.remove(&conn_id);
if let Some(m) = binding.meta.get(&conn_id) { if let Some(m) = inner.meta.get(&conn_id) {
out.push(BoundConn { out.push(BoundConn {
conn_id, conn_id,
meta: m.clone(), meta: m.clone(),
@ -466,13 +426,13 @@ impl ConnRegistry {
#[allow(dead_code)] #[allow(dead_code)]
pub async fn get_meta(&self, conn_id: u64) -> Option<ConnMeta> { pub async fn get_meta(&self, conn_id: u64) -> Option<ConnMeta> {
let binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
binding.meta.get(&conn_id).cloned() inner.meta.get(&conn_id).cloned()
} }
pub async fn is_writer_empty(&self, writer_id: u64) -> bool { pub async fn is_writer_empty(&self, writer_id: u64) -> bool {
let binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
binding inner
.conns_for_writer .conns_for_writer
.get(&writer_id) .get(&writer_id)
.map(|s| s.is_empty()) .map(|s| s.is_empty())
@ -481,8 +441,8 @@ impl ConnRegistry {
#[allow(dead_code)] #[allow(dead_code)]
pub async fn unregister_writer_if_empty(&self, writer_id: u64) -> bool { pub async fn unregister_writer_if_empty(&self, writer_id: u64) -> bool {
let mut binding = self.binding.inner.lock().await; let mut inner = self.inner.write().await;
let Some(conn_ids) = binding.conns_for_writer.get(&writer_id) else { let Some(conn_ids) = inner.conns_for_writer.get(&writer_id) else {
// Writer is already absent from the registry. // Writer is already absent from the registry.
return true; return true;
}; };
@ -490,19 +450,19 @@ impl ConnRegistry {
return false; return false;
} }
binding.writers.remove(&writer_id); inner.writers.remove(&writer_id);
binding.last_meta_for_writer.remove(&writer_id); inner.last_meta_for_writer.remove(&writer_id);
binding.writer_idle_since_epoch_secs.remove(&writer_id); inner.writer_idle_since_epoch_secs.remove(&writer_id);
binding.conns_for_writer.remove(&writer_id); inner.conns_for_writer.remove(&writer_id);
true true
} }
#[allow(dead_code)] #[allow(dead_code)]
pub(super) async fn non_empty_writer_ids(&self, writer_ids: &[u64]) -> HashSet<u64> { pub(super) async fn non_empty_writer_ids(&self, writer_ids: &[u64]) -> HashSet<u64> {
let binding = self.binding.inner.lock().await; let inner = self.inner.read().await;
let mut out = HashSet::<u64>::with_capacity(writer_ids.len()); let mut out = HashSet::<u64>::with_capacity(writer_ids.len());
for writer_id in writer_ids { for writer_id in writer_ids {
if let Some(conns) = binding.conns_for_writer.get(writer_id) if let Some(conns) = inner.conns_for_writer.get(writer_id)
&& !conns.is_empty() && !conns.is_empty()
{ {
out.insert(*writer_id); out.insert(*writer_id);

View File

@ -26,9 +26,6 @@ use rand::seq::SliceRandom;
const IDLE_WRITER_PENALTY_MID_SECS: u64 = 45; const IDLE_WRITER_PENALTY_MID_SECS: u64 = 45;
const IDLE_WRITER_PENALTY_HIGH_SECS: u64 = 55; const IDLE_WRITER_PENALTY_HIGH_SECS: u64 = 55;
const HYBRID_GLOBAL_BURST_PERIOD_ROUNDS: u32 = 4; const HYBRID_GLOBAL_BURST_PERIOD_ROUNDS: u32 = 4;
const HYBRID_RECENT_SUCCESS_WINDOW_MS: u64 = 120_000;
const HYBRID_TIMEOUT_WARN_RATE_LIMIT_MS: u64 = 5_000;
const HYBRID_RECOVERY_TRIGGER_MIN_INTERVAL_MS: u64 = 5_000;
const PICK_PENALTY_WARM: u64 = 200; const PICK_PENALTY_WARM: u64 = 200;
const PICK_PENALTY_DRAINING: u64 = 600; const PICK_PENALTY_DRAINING: u64 = 600;
const PICK_PENALTY_STALE: u64 = 300; const PICK_PENALTY_STALE: u64 = 300;
@ -71,11 +68,8 @@ impl MePool {
}, },
) )
}; };
let no_writer_mode = MeRouteNoWriterMode::from_u8( let no_writer_mode =
self.route_runtime MeRouteNoWriterMode::from_u8(self.me_route_no_writer_mode.load(Ordering::Relaxed));
.me_route_no_writer_mode
.load(Ordering::Relaxed),
);
let (routed_dc, unknown_target_dc) = let (routed_dc, unknown_target_dc) =
self.resolve_target_dc_for_routing(target_dc as i32).await; self.resolve_target_dc_for_routing(target_dc as i32).await;
let mut no_writer_deadline: Option<Instant> = None; let mut no_writer_deadline: Option<Instant> = None;
@ -83,11 +77,7 @@ impl MePool {
let mut async_recovery_triggered = false; let mut async_recovery_triggered = false;
let mut hybrid_recovery_round = 0u32; let mut hybrid_recovery_round = 0u32;
let mut hybrid_last_recovery_at: Option<Instant> = None; let mut hybrid_last_recovery_at: Option<Instant> = None;
let mut hybrid_total_deadline: Option<Instant> = None; let hybrid_wait_step = self.me_route_no_writer_wait.max(Duration::from_millis(50));
let hybrid_wait_step = self
.route_runtime
.me_route_no_writer_wait
.max(Duration::from_millis(50));
let mut hybrid_wait_current = hybrid_wait_step; let mut hybrid_wait_current = hybrid_wait_step;
loop { loop {
@ -102,13 +92,9 @@ impl MePool {
.tx .tx
.try_send(WriterCommand::Data(current_payload.clone())) .try_send(WriterCommand::Data(current_payload.clone()))
{ {
Ok(()) => { Ok(()) => return Ok(()),
self.note_hybrid_route_success();
return Ok(());
}
Err(TrySendError::Full(cmd)) => { Err(TrySendError::Full(cmd)) => {
if current.tx.send(cmd).await.is_ok() { if current.tx.send(cmd).await.is_ok() {
self.note_hybrid_route_success();
return Ok(()); return Ok(());
} }
warn!(writer_id = current.writer_id, "ME writer channel closed"); warn!(writer_id = current.writer_id, "ME writer channel closed");
@ -132,7 +118,7 @@ impl MePool {
match no_writer_mode { match no_writer_mode {
MeRouteNoWriterMode::AsyncRecoveryFailfast => { MeRouteNoWriterMode::AsyncRecoveryFailfast => {
let deadline = *no_writer_deadline.get_or_insert_with(|| { let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.route_runtime.me_route_no_writer_wait Instant::now() + self.me_route_no_writer_wait
}); });
if !async_recovery_triggered && !unknown_target_dc { if !async_recovery_triggered && !unknown_target_dc {
let triggered = let triggered =
@ -153,9 +139,7 @@ impl MePool {
MeRouteNoWriterMode::InlineRecoveryLegacy => { MeRouteNoWriterMode::InlineRecoveryLegacy => {
self.stats.increment_me_inline_recovery_total(); self.stats.increment_me_inline_recovery_total();
if !unknown_target_dc { if !unknown_target_dc {
for _ in for _ in 0..self.me_route_inline_recovery_attempts.max(1) {
0..self.route_runtime.me_route_inline_recovery_attempts.max(1)
{
for family in self.family_order() { for family in self.family_order() {
let map = match family { let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(), IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
@ -184,7 +168,7 @@ impl MePool {
continue; continue;
} }
let deadline = *no_writer_deadline.get_or_insert_with(|| { let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.route_runtime.me_route_inline_recovery_wait Instant::now() + self.me_route_inline_recovery_wait
}); });
if !self.wait_for_writer_until(deadline).await { if !self.wait_for_writer_until(deadline).await {
if !self.writers.read().await.is_empty() { if !self.writers.read().await.is_empty() {
@ -198,15 +182,6 @@ impl MePool {
continue; continue;
} }
MeRouteNoWriterMode::HybridAsyncPersistent => { MeRouteNoWriterMode::HybridAsyncPersistent => {
let total_deadline = *hybrid_total_deadline.get_or_insert_with(|| {
Instant::now() + self.hybrid_total_wait_budget()
});
if Instant::now() >= total_deadline {
self.on_hybrid_timeout(total_deadline, routed_dc);
return Err(ProxyError::Proxy(
"ME writer not available within hybrid timeout".into(),
));
}
if !unknown_target_dc { if !unknown_target_dc {
self.maybe_trigger_hybrid_recovery( self.maybe_trigger_hybrid_recovery(
routed_dc, routed_dc,
@ -239,9 +214,8 @@ impl MePool {
let pick_mode = self.writer_pick_mode(); let pick_mode = self.writer_pick_mode();
match no_writer_mode { match no_writer_mode {
MeRouteNoWriterMode::AsyncRecoveryFailfast => { MeRouteNoWriterMode::AsyncRecoveryFailfast => {
let deadline = *no_writer_deadline.get_or_insert_with(|| { let deadline = *no_writer_deadline
Instant::now() + self.route_runtime.me_route_no_writer_wait .get_or_insert_with(|| Instant::now() + self.me_route_no_writer_wait);
});
if !async_recovery_triggered && !unknown_target_dc { if !async_recovery_triggered && !unknown_target_dc {
let triggered = let triggered =
self.trigger_async_recovery_for_target_dc(routed_dc).await; self.trigger_async_recovery_for_target_dc(routed_dc).await;
@ -264,7 +238,7 @@ impl MePool {
self.stats.increment_me_inline_recovery_total(); self.stats.increment_me_inline_recovery_total();
if unknown_target_dc { if unknown_target_dc {
let deadline = *no_writer_deadline.get_or_insert_with(|| { let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.route_runtime.me_route_inline_recovery_wait Instant::now() + self.me_route_inline_recovery_wait
}); });
if self.wait_for_candidate_until(routed_dc, deadline).await { if self.wait_for_candidate_until(routed_dc, deadline).await {
continue; continue;
@ -276,9 +250,7 @@ impl MePool {
"No ME writers available for target DC".into(), "No ME writers available for target DC".into(),
)); ));
} }
if emergency_attempts if emergency_attempts >= self.me_route_inline_recovery_attempts.max(1) {
>= self.route_runtime.me_route_inline_recovery_attempts.max(1)
{
self.stats self.stats
.increment_me_writer_pick_no_candidate_total(pick_mode); .increment_me_writer_pick_no_candidate_total(pick_mode);
self.stats.increment_me_no_writer_failfast_total(); self.stats.increment_me_no_writer_failfast_total();
@ -320,16 +292,6 @@ impl MePool {
} }
} }
MeRouteNoWriterMode::HybridAsyncPersistent => { MeRouteNoWriterMode::HybridAsyncPersistent => {
let total_deadline = *hybrid_total_deadline.get_or_insert_with(|| {
Instant::now() + self.hybrid_total_wait_budget()
});
if Instant::now() >= total_deadline {
self.on_hybrid_timeout(total_deadline, routed_dc);
return Err(ProxyError::Proxy(
"No ME writers available for target DC within hybrid timeout"
.into(),
));
}
if !unknown_target_dc { if !unknown_target_dc {
self.maybe_trigger_hybrid_recovery( self.maybe_trigger_hybrid_recovery(
routed_dc, routed_dc,
@ -370,11 +332,7 @@ impl MePool {
pick_sample_size, pick_sample_size,
) )
} else { } else {
if self if self.me_deterministic_writer_sort.load(Ordering::Relaxed) {
.writer_selection_policy
.me_deterministic_writer_sort
.load(Ordering::Relaxed)
{
candidate_indices.sort_by(|lhs, rhs| { candidate_indices.sort_by(|lhs, rhs| {
let left = &writers_snapshot[*lhs]; let left = &writers_snapshot[*lhs];
let right = &writers_snapshot[*rhs]; let right = &writers_snapshot[*rhs];
@ -465,7 +423,6 @@ impl MePool {
"Selected stale ME writer for fallback bind" "Selected stale ME writer for fallback bind"
); );
} }
self.note_hybrid_route_success();
return Ok(()); return Ok(());
} }
Err(TrySendError::Full(_)) => { Err(TrySendError::Full(_)) => {
@ -496,19 +453,7 @@ impl MePool {
.increment_me_writer_pick_blocking_fallback_total(); .increment_me_writer_pick_blocking_fallback_total();
let effective_our_addr = SocketAddr::new(w.source_ip, our_addr.port()); let effective_our_addr = SocketAddr::new(w.source_ip, our_addr.port());
let (payload, meta) = build_routed_payload(effective_our_addr); let (payload, meta) = build_routed_payload(effective_our_addr);
let reserve_result = match w.tx.clone().reserve_owned().await {
if let Some(timeout) = self.route_runtime.me_route_blocking_send_timeout {
match tokio::time::timeout(timeout, w.tx.clone().reserve_owned()).await {
Ok(result) => result,
Err(_) => {
self.stats.increment_me_writer_pick_full_total(pick_mode);
continue;
}
}
} else {
w.tx.clone().reserve_owned().await
};
match reserve_result {
Ok(permit) => { Ok(permit) => {
if !self.registry.bind_writer(conn_id, w.id, meta).await { if !self.registry.bind_writer(conn_id, w.id, meta).await {
debug!( debug!(
@ -526,7 +471,6 @@ impl MePool {
if w.generation < self.current_generation() { if w.generation < self.current_generation() {
self.stats.increment_pool_stale_pick_total(); self.stats.increment_pool_stale_pick_total();
} }
self.note_hybrid_route_success();
return Ok(()); return Ok(());
} }
Err(_) => { Err(_) => {
@ -539,7 +483,7 @@ impl MePool {
} }
async fn wait_for_writer_until(&self, deadline: Instant) -> bool { async fn wait_for_writer_until(&self, deadline: Instant) -> bool {
let mut rx = self.writer_epoch.subscribe(); let waiter = self.writer_available.notified();
if !self.writers.read().await.is_empty() { if !self.writers.read().await.is_empty() {
return true; return true;
} }
@ -548,14 +492,13 @@ impl MePool {
return !self.writers.read().await.is_empty(); return !self.writers.read().await.is_empty();
} }
let timeout = deadline.saturating_duration_since(now); let timeout = deadline.saturating_duration_since(now);
if tokio::time::timeout(timeout, rx.changed()).await.is_ok() { if tokio::time::timeout(timeout, waiter).await.is_ok() {
return !self.writers.read().await.is_empty(); return true;
} }
!self.writers.read().await.is_empty() !self.writers.read().await.is_empty()
} }
async fn wait_for_candidate_until(&self, routed_dc: i32, deadline: Instant) -> bool { async fn wait_for_candidate_until(&self, routed_dc: i32, deadline: Instant) -> bool {
let mut rx = self.writer_epoch.subscribe();
loop { loop {
if self.has_candidate_for_target_dc(routed_dc).await { if self.has_candidate_for_target_dc(routed_dc).await {
return true; return true;
@ -566,6 +509,7 @@ impl MePool {
return self.has_candidate_for_target_dc(routed_dc).await; return self.has_candidate_for_target_dc(routed_dc).await;
} }
let waiter = self.writer_available.notified();
if self.has_candidate_for_target_dc(routed_dc).await { if self.has_candidate_for_target_dc(routed_dc).await {
return true; return true;
} }
@ -573,7 +517,7 @@ impl MePool {
if remaining.is_zero() { if remaining.is_zero() {
return self.has_candidate_for_target_dc(routed_dc).await; return self.has_candidate_for_target_dc(routed_dc).await;
} }
if tokio::time::timeout(remaining, rx.changed()).await.is_err() { if tokio::time::timeout(remaining, waiter).await.is_err() {
return self.has_candidate_for_target_dc(routed_dc).await; return self.has_candidate_for_target_dc(routed_dc).await;
} }
} }
@ -643,9 +587,6 @@ impl MePool {
hybrid_last_recovery_at: &mut Option<Instant>, hybrid_last_recovery_at: &mut Option<Instant>,
hybrid_wait_step: Duration, hybrid_wait_step: Duration,
) { ) {
if !self.try_consume_hybrid_recovery_trigger_slot(HYBRID_RECOVERY_TRIGGER_MIN_INTERVAL_MS) {
return;
}
if let Some(last) = *hybrid_last_recovery_at if let Some(last) = *hybrid_last_recovery_at
&& last.elapsed() < hybrid_wait_step && last.elapsed() < hybrid_wait_step
{ {
@ -661,78 +602,6 @@ impl MePool {
*hybrid_last_recovery_at = Some(Instant::now()); *hybrid_last_recovery_at = Some(Instant::now());
} }
fn hybrid_total_wait_budget(&self) -> Duration {
let base = self
.route_runtime
.me_route_hybrid_max_wait
.max(Duration::from_millis(50));
let now_ms = Self::now_epoch_millis();
let last_success_ms = self
.route_runtime
.me_route_last_success_epoch_ms
.load(Ordering::Relaxed);
if last_success_ms != 0
&& now_ms.saturating_sub(last_success_ms) <= HYBRID_RECENT_SUCCESS_WINDOW_MS
{
return base.saturating_mul(2);
}
base
}
fn note_hybrid_route_success(&self) {
self.route_runtime
.me_route_last_success_epoch_ms
.store(Self::now_epoch_millis(), Ordering::Relaxed);
}
fn on_hybrid_timeout(&self, deadline: Instant, routed_dc: i32) {
self.stats.increment_me_hybrid_timeout_total();
let now_ms = Self::now_epoch_millis();
let mut last_warn_ms = self
.route_runtime
.me_route_hybrid_timeout_warn_epoch_ms
.load(Ordering::Relaxed);
while now_ms.saturating_sub(last_warn_ms) >= HYBRID_TIMEOUT_WARN_RATE_LIMIT_MS {
match self
.route_runtime
.me_route_hybrid_timeout_warn_epoch_ms
.compare_exchange_weak(last_warn_ms, now_ms, Ordering::AcqRel, Ordering::Relaxed)
{
Ok(_) => {
warn!(
routed_dc,
budget_ms = self.hybrid_total_wait_budget().as_millis() as u64,
elapsed_ms = deadline.elapsed().as_millis() as u64,
"ME hybrid route timeout reached"
);
break;
}
Err(actual) => last_warn_ms = actual,
}
}
}
fn try_consume_hybrid_recovery_trigger_slot(&self, min_interval_ms: u64) -> bool {
let now_ms = Self::now_epoch_millis();
let mut last_trigger_ms = self
.route_runtime
.me_async_recovery_last_trigger_epoch_ms
.load(Ordering::Relaxed);
loop {
if now_ms.saturating_sub(last_trigger_ms) < min_interval_ms {
return false;
}
match self
.route_runtime
.me_async_recovery_last_trigger_epoch_ms
.compare_exchange_weak(last_trigger_ms, now_ms, Ordering::AcqRel, Ordering::Relaxed)
{
Ok(_) => return true,
Err(actual) => last_trigger_ms = actual,
}
}
}
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> { pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await { if let Some(w) = self.registry.get_writer(conn_id).await {
let mut p = Vec::with_capacity(12); let mut p = Vec::with_capacity(12);
@ -880,7 +749,7 @@ impl MePool {
(self.writer_idle_rank_for_selection(writer, idle_since_by_writer, now_epoch_secs) (self.writer_idle_rank_for_selection(writer, idle_since_by_writer, now_epoch_secs)
as u64) as u64)
* 100; * 100;
let queue_cap = self.writer_lifecycle.writer_cmd_channel_capacity.max(1) as u64; let queue_cap = self.writer_cmd_channel_capacity.max(1) as u64;
let queue_remaining = writer.tx.capacity() as u64; let queue_remaining = writer.tx.capacity() as u64;
let queue_used = queue_cap.saturating_sub(queue_remaining.min(queue_cap)); let queue_used = queue_cap.saturating_sub(queue_remaining.min(queue_cap));
let queue_util_pct = queue_used.saturating_mul(100) / queue_cap; let queue_util_pct = queue_used.saturating_mul(100) / queue_cap;

View File

@ -113,8 +113,6 @@ async fn make_pool(
general.me_warn_rate_limit_ms, general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(), MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms, general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts, general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms, general.me_route_inline_recovery_wait_ms,
); );

View File

@ -111,8 +111,6 @@ async fn make_pool(
general.me_warn_rate_limit_ms, general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(), MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms, general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts, general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms, general.me_route_inline_recovery_wait_ms,
); );

View File

@ -106,8 +106,6 @@ async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> {
general.me_warn_rate_limit_ms, general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(), MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms, general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts, general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms, general.me_route_inline_recovery_wait_ms,
) )

View File

@ -95,8 +95,6 @@ async fn make_pool() -> Arc<MePool> {
general.me_warn_rate_limit_ms, general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(), MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms, general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts, general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms, general.me_route_inline_recovery_wait_ms,
) )

View File

@ -35,7 +35,7 @@ async fn make_pool() -> Arc<MePool> {
NetworkDecision::default(), NetworkDecision::default(),
None, None,
Arc::new(SecureRandom::new()), Arc::new(SecureRandom::new()),
Arc::new(Stats::new()), Arc::new(Stats::default()),
general.me_keepalive_enabled, general.me_keepalive_enabled,
general.me_keepalive_interval_secs, general.me_keepalive_interval_secs,
general.me_keepalive_jitter_secs, general.me_keepalive_jitter_secs,
@ -100,8 +100,6 @@ async fn make_pool() -> Arc<MePool> {
general.me_warn_rate_limit_ms, general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(), MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms, general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts, general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms, general.me_route_inline_recovery_wait_ms,
) )
@ -173,15 +171,10 @@ async fn bind_conn_to_writer(pool: &Arc<MePool>, writer_id: u64, port: u16) -> u
} }
#[tokio::test] #[tokio::test]
async fn remove_draining_writer_does_not_quarantine_flapping_endpoint() { async fn remove_draining_writer_still_quarantines_flapping_endpoint() {
let pool = make_pool().await; let pool = make_pool().await;
let writer_id = 77; let writer_id = 77;
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 12, 0, 77)), 443); let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 12, 0, 77)), 443);
let before_total = pool.stats.get_me_endpoint_quarantine_total();
let before_unexpected = pool.stats.get_me_endpoint_quarantine_unexpected_total();
let before_suppressed = pool
.stats
.get_me_endpoint_quarantine_draining_suppressed_total();
insert_writer( insert_writer(
&pool, &pool,
writer_id, writer_id,
@ -205,18 +198,8 @@ async fn remove_draining_writer_does_not_quarantine_flapping_endpoint() {
"writer must be removed from pool after cleanup" "writer must be removed from pool after cleanup"
); );
assert!( assert!(
!pool.is_endpoint_quarantined(addr).await, pool.is_endpoint_quarantined(addr).await,
"draining removals must not quarantine endpoint" "draining removals must still quarantine flapping endpoints"
);
assert_eq!(pool.stats.get_me_endpoint_quarantine_total(), before_total);
assert_eq!(
pool.stats.get_me_endpoint_quarantine_unexpected_total(),
before_unexpected
);
assert_eq!(
pool.stats
.get_me_endpoint_quarantine_draining_suppressed_total(),
before_suppressed + 1
); );
assert_eq!(pool.conn_count.load(Ordering::Relaxed), 0); assert_eq!(pool.conn_count.load(Ordering::Relaxed), 0);
} }
@ -272,21 +255,16 @@ async fn edge_draining_only_detach_rejects_active_writer() {
} }
#[tokio::test] #[tokio::test]
async fn adversarial_blackhat_single_unexpected_remove_establishes_single_quarantine_entry() { async fn adversarial_blackhat_single_remove_establishes_single_quarantine_entry() {
let pool = make_pool().await; let pool = make_pool().await;
let writer_id = 93; let writer_id = 93;
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 12, 0, 93)), 443); let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 12, 0, 93)), 443);
let before_total = pool.stats.get_me_endpoint_quarantine_total();
let before_unexpected = pool.stats.get_me_endpoint_quarantine_unexpected_total();
let before_suppressed = pool
.stats
.get_me_endpoint_quarantine_draining_suppressed_total();
insert_writer( insert_writer(
&pool, &pool,
writer_id, writer_id,
2, 2,
addr, addr,
false, true,
Instant::now() - Duration::from_secs(1), Instant::now() - Duration::from_secs(1),
) )
.await; .await;
@ -294,19 +272,6 @@ async fn adversarial_blackhat_single_unexpected_remove_establishes_single_quaran
pool.remove_writer_and_close_clients(writer_id).await; pool.remove_writer_and_close_clients(writer_id).await;
assert!(pool.is_endpoint_quarantined(addr).await); assert!(pool.is_endpoint_quarantined(addr).await);
assert_eq!(pool.endpoint_quarantine.lock().await.len(), 1); assert_eq!(pool.endpoint_quarantine.lock().await.len(), 1);
assert_eq!(
pool.stats.get_me_endpoint_quarantine_total(),
before_total + 1
);
assert_eq!(
pool.stats.get_me_endpoint_quarantine_unexpected_total(),
before_unexpected + 1
);
assert_eq!(
pool.stats
.get_me_endpoint_quarantine_draining_suppressed_total(),
before_suppressed
);
} }
#[tokio::test] #[tokio::test]

View File

@ -106,8 +106,6 @@ async fn make_pool() -> (Arc<MePool>, Arc<SecureRandom>) {
general.me_warn_rate_limit_ms, general.me_warn_rate_limit_ms,
general.me_route_no_writer_mode, general.me_route_no_writer_mode,
general.me_route_no_writer_wait_ms, general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts, general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms, general.me_route_inline_recovery_wait_ms,
); );