mirror of
https://github.com/telemt/telemt.git
synced 2026-04-15 01:24:09 +03:00
Compare commits
207 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b8da986fd5 | ||
|
|
dd270258bf | ||
|
|
40dc6a39c1 | ||
|
|
8b5cbb7b4b | ||
|
|
0e476c71a5 | ||
|
|
be24b47300 | ||
|
|
8cd719da3f | ||
|
|
959d385015 | ||
|
|
6fa01d4c36 | ||
|
|
a383f3f1a3 | ||
|
|
7635aad1cb | ||
|
|
b315e84136 | ||
|
|
1d8de09a32 | ||
|
|
d2db9b8cf9 | ||
|
|
796279343e | ||
|
|
fabb3c45f1 | ||
|
|
161af51558 | ||
|
|
100ef0fa28 | ||
|
|
8994c27714 | ||
|
|
b950987229 | ||
|
|
a09b597fab | ||
|
|
c920dc6381 | ||
|
|
f4418d2d50 | ||
|
|
5ab3170f69 | ||
|
|
76fa06fa2e | ||
|
|
3a997fcf71 | ||
|
|
4b49b1b4f0 | ||
|
|
97926b05e8 | ||
|
|
6de17ae830 | ||
|
|
4c94f73546 | ||
|
|
d99df37ac5 | ||
|
|
d0f253b49b | ||
|
|
ef2ed3daa0 | ||
|
|
fc52cad109 | ||
|
|
98f365be44 | ||
|
|
b6c3cae2ad | ||
|
|
5f7fb15dd8 | ||
|
|
3a89f16332 | ||
|
|
aa3fcfbbe1 | ||
|
|
a616775f6d | ||
|
|
633af93b19 | ||
|
|
b41257f54e | ||
|
|
76b28aea74 | ||
|
|
aa315f5d72 | ||
|
|
c28b82a618 | ||
|
|
e7bdc80956 | ||
|
|
d641137537 | ||
|
|
4fd22b3219 | ||
|
|
fca0e3f619 | ||
|
|
9401c46727 | ||
|
|
6b3697ee87 | ||
|
|
c08160600e | ||
|
|
cd5c60ce1e | ||
|
|
ae1c97e27a | ||
|
|
cfee7de66b | ||
|
|
c942c492ad | ||
|
|
0e4be43b2b | ||
|
|
7eb2b60855 | ||
|
|
373ae3281e | ||
|
|
178630e3bf | ||
|
|
67f307cd43 | ||
|
|
ca2eaa9ead | ||
|
|
3c78daea0c | ||
|
|
d2baa8e721 | ||
|
|
a0cf4b4713 | ||
|
|
1bd249b0a9 | ||
|
|
2f47ec5797 | ||
|
|
80f3661b8e | ||
|
|
32eeb4a98c | ||
|
|
a74cc14ed9 | ||
|
|
5f77f83b48 | ||
|
|
d543dbca92 | ||
|
|
02f9d59f5a | ||
|
|
7b745bc7bc | ||
|
|
5ac0ef1ffd | ||
|
|
e1f3efb619 | ||
|
|
508eea0131 | ||
|
|
9e7f80b9b3 | ||
|
|
ee2def2e62 | ||
|
|
258191ab87 | ||
|
|
27e6dec018 | ||
|
|
26323dbebf | ||
|
|
484137793f | ||
|
|
24713feddc | ||
|
|
93f58524d1 | ||
|
|
0ff2e95e49 | ||
|
|
89222e7123 | ||
|
|
2468ee15e7 | ||
|
|
3440aa9fcd | ||
|
|
ce9698d39b | ||
|
|
ddfe7c5cfa | ||
|
|
01893f3712 | ||
|
|
8ae741ec72 | ||
|
|
6856466cef | ||
|
|
68292fbd26 | ||
|
|
e90c42ae68 | ||
|
|
9f9a5dce0d | ||
|
|
6739cd8d01 | ||
|
|
6cc8d9cb00 | ||
|
|
ce375b62e4 | ||
|
|
95971ac62c | ||
|
|
4ea2226dcd | ||
|
|
d752a440e5 | ||
|
|
5ce2ee2dae | ||
|
|
6fd9f0595d | ||
|
|
fcdd8a9796 | ||
|
|
640468d4e7 | ||
|
|
02fe89f7d0 | ||
|
|
24df865503 | ||
|
|
e9f8c79498 | ||
|
|
24ff75701e | ||
|
|
4221230969 | ||
|
|
d87196c105 | ||
|
|
da89415961 | ||
|
|
2d98ebf3c3 | ||
|
|
fb5e9947bd | ||
|
|
2ea85c00d3 | ||
|
|
2a3b6b917f | ||
|
|
83ed9065b0 | ||
|
|
44b825edf5 | ||
|
|
487e95a66e | ||
|
|
c465c200c4 | ||
|
|
d7716ad875 | ||
|
|
edce194948 | ||
|
|
13fdff750d | ||
|
|
bdcf110c87 | ||
|
|
dd12997744 | ||
|
|
fc160913bf | ||
|
|
92c22ef16d | ||
|
|
aff22d0855 | ||
|
|
b3d3bca15a | ||
|
|
92f38392eb | ||
|
|
30ef8df1b3 | ||
|
|
2e174adf16 | ||
|
|
4e803b1412 | ||
|
|
9b174318ce | ||
|
|
99edcbe818 | ||
|
|
ef7dc2b80f | ||
|
|
691607f269 | ||
|
|
55561a23bc | ||
|
|
f32c34f126 | ||
|
|
8f3bdaec2c | ||
|
|
69b02caf77 | ||
|
|
3854955069 | ||
|
|
9b84fc7a5b | ||
|
|
e7cb9238dc | ||
|
|
0e2cbe6178 | ||
|
|
cd076aeeeb | ||
|
|
d683faf922 | ||
|
|
0494f8ac8b | ||
|
|
48ce59900e | ||
|
|
84e95fd229 | ||
|
|
a80be78345 | ||
|
|
64130dd02e | ||
|
|
d62a6e0417 | ||
|
|
3260746785 | ||
|
|
8066ea2163 | ||
|
|
813f1df63e | ||
|
|
09bdafa718 | ||
|
|
fb0f75df43 | ||
|
|
39255df549 | ||
|
|
456495fd62 | ||
|
|
83cadc0bf3 | ||
|
|
0b1a8cd3f8 | ||
|
|
565b4ee923 | ||
|
|
7a9c1e79c2 | ||
|
|
02c6af4912 | ||
|
|
8ba4dea59f | ||
|
|
ccfda10713 | ||
|
|
bd1327592e | ||
|
|
30b22fe2bf | ||
|
|
651f257a5d | ||
|
|
a9209fd3c7 | ||
|
|
4ae4ca8ca8 | ||
|
|
8be1ddc0d8 | ||
|
|
b55fa5ec8f | ||
|
|
16c6ce850e | ||
|
|
12251e730f | ||
|
|
925b10f9fc | ||
|
|
306b653318 | ||
|
|
8791a52b7e | ||
|
|
0d9470a840 | ||
|
|
0d320c20e0 | ||
|
|
9b3ba2e1c6 | ||
|
|
dbadbf0221 | ||
|
|
173624c838 | ||
|
|
de2047adf2 | ||
|
|
5df2fe9f97 | ||
|
|
2510ebaa79 | ||
|
|
314f30a434 | ||
|
|
c86a511638 | ||
|
|
f1efaf4491 | ||
|
|
716b4adef2 | ||
|
|
5876623bb0 | ||
|
|
6b9c7f7862 | ||
|
|
7ea6387278 | ||
|
|
4c2bc2f41f | ||
|
|
c86f35f059 | ||
|
|
3492566842 | ||
|
|
349bbbb8fa | ||
|
|
ead08981e7 | ||
|
|
068cf825b9 | ||
|
|
7269dfbdc5 | ||
|
|
533708f885 | ||
|
|
5e93ce258f | ||
|
|
1236505502 | ||
|
|
f7d451e689 |
135
.github/instructions/rust_rules.instructions.md
vendored
Normal file
135
.github/instructions/rust_rules.instructions.md
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
---
|
||||
description: 'Rust programming language coding conventions and best practices'
|
||||
applyTo: '**/*.rs'
|
||||
---
|
||||
|
||||
# Rust Coding Conventions and Best Practices
|
||||
|
||||
Follow idiomatic Rust practices and community standards when writing Rust code.
|
||||
|
||||
These instructions are based on [The Rust Book](https://doc.rust-lang.org/book/), [Rust API Guidelines](https://rust-lang.github.io/api-guidelines/), [RFC 430 naming conventions](https://github.com/rust-lang/rfcs/blob/master/text/0430-finalizing-naming-conventions.md), and the broader Rust community at [users.rust-lang.org](https://users.rust-lang.org).
|
||||
|
||||
## General Instructions
|
||||
|
||||
- Always prioritize readability, safety, and maintainability.
|
||||
- Use strong typing and leverage Rust's ownership system for memory safety.
|
||||
- Break down complex functions into smaller, more manageable functions.
|
||||
- For algorithm-related code, include explanations of the approach used.
|
||||
- Write code with good maintainability practices, including comments on why certain design decisions were made.
|
||||
- Handle errors gracefully using `Result<T, E>` and provide meaningful error messages.
|
||||
- For external dependencies, mention their usage and purpose in documentation.
|
||||
- Use consistent naming conventions following [RFC 430](https://github.com/rust-lang/rfcs/blob/master/text/0430-finalizing-naming-conventions.md).
|
||||
- Write idiomatic, safe, and efficient Rust code that follows the borrow checker's rules.
|
||||
- Ensure code compiles without warnings.
|
||||
|
||||
## Patterns to Follow
|
||||
|
||||
- Use modules (`mod`) and public interfaces (`pub`) to encapsulate logic.
|
||||
- Handle errors properly using `?`, `match`, or `if let`.
|
||||
- Use `serde` for serialization and `thiserror` or `anyhow` for custom errors.
|
||||
- Implement traits to abstract services or external dependencies.
|
||||
- Structure async code using `async/await` and `tokio` or `async-std`.
|
||||
- Prefer enums over flags and states for type safety.
|
||||
- Use builders for complex object creation.
|
||||
- Split binary and library code (`main.rs` vs `lib.rs`) for testability and reuse.
|
||||
- Use `rayon` for data parallelism and CPU-bound tasks.
|
||||
- Use iterators instead of index-based loops as they're often faster and safer.
|
||||
- Use `&str` instead of `String` for function parameters when you don't need ownership.
|
||||
- Prefer borrowing and zero-copy operations to avoid unnecessary allocations.
|
||||
|
||||
### Ownership, Borrowing, and Lifetimes
|
||||
|
||||
- Prefer borrowing (`&T`) over cloning unless ownership transfer is necessary.
|
||||
- Use `&mut T` when you need to modify borrowed data.
|
||||
- Explicitly annotate lifetimes when the compiler cannot infer them.
|
||||
- Use `Rc<T>` for single-threaded reference counting and `Arc<T>` for thread-safe reference counting.
|
||||
- Use `RefCell<T>` for interior mutability in single-threaded contexts and `Mutex<T>` or `RwLock<T>` for multi-threaded contexts.
|
||||
|
||||
## Patterns to Avoid
|
||||
|
||||
- Don't use `unwrap()` or `expect()` unless absolutely necessary—prefer proper error handling.
|
||||
- Avoid panics in library code—return `Result` instead.
|
||||
- Don't rely on global mutable state—use dependency injection or thread-safe containers.
|
||||
- Avoid deeply nested logic—refactor with functions or combinators.
|
||||
- Don't ignore warnings—treat them as errors during CI.
|
||||
- Avoid `unsafe` unless required and fully documented.
|
||||
- Don't overuse `clone()`, use borrowing instead of cloning unless ownership transfer is needed.
|
||||
- Avoid premature `collect()`, keep iterators lazy until you actually need the collection.
|
||||
- Avoid unnecessary allocations—prefer borrowing and zero-copy operations.
|
||||
|
||||
## Code Style and Formatting
|
||||
|
||||
- Follow the Rust Style Guide and use `rustfmt` for automatic formatting.
|
||||
- Keep lines under 100 characters when possible.
|
||||
- Place function and struct documentation immediately before the item using `///`.
|
||||
- Use `cargo clippy` to catch common mistakes and enforce best practices.
|
||||
|
||||
## Error Handling
|
||||
|
||||
- Use `Result<T, E>` for recoverable errors and `panic!` only for unrecoverable errors.
|
||||
- Prefer `?` operator over `unwrap()` or `expect()` for error propagation.
|
||||
- Create custom error types using `thiserror` or implement `std::error::Error`.
|
||||
- Use `Option<T>` for values that may or may not exist.
|
||||
- Provide meaningful error messages and context.
|
||||
- Error types should be meaningful and well-behaved (implement standard traits).
|
||||
- Validate function arguments and return appropriate errors for invalid input.
|
||||
|
||||
## API Design Guidelines
|
||||
|
||||
### Common Traits Implementation
|
||||
Eagerly implement common traits where appropriate:
|
||||
- `Copy`, `Clone`, `Eq`, `PartialEq`, `Ord`, `PartialOrd`, `Hash`, `Debug`, `Display`, `Default`
|
||||
- Use standard conversion traits: `From`, `AsRef`, `AsMut`
|
||||
- Collections should implement `FromIterator` and `Extend`
|
||||
- Note: `Send` and `Sync` are auto-implemented by the compiler when safe; avoid manual implementation unless using `unsafe` code
|
||||
|
||||
### Type Safety and Predictability
|
||||
- Use newtypes to provide static distinctions
|
||||
- Arguments should convey meaning through types; prefer specific types over generic `bool` parameters
|
||||
- Use `Option<T>` appropriately for truly optional values
|
||||
- Functions with a clear receiver should be methods
|
||||
- Only smart pointers should implement `Deref` and `DerefMut`
|
||||
|
||||
### Future Proofing
|
||||
- Use sealed traits to protect against downstream implementations
|
||||
- Structs should have private fields
|
||||
- Functions should validate their arguments
|
||||
- All public types must implement `Debug`
|
||||
|
||||
## Testing and Documentation
|
||||
|
||||
- Write comprehensive unit tests using `#[cfg(test)]` modules and `#[test]` annotations.
|
||||
- Use test modules alongside the code they test (`mod tests { ... }`).
|
||||
- Write integration tests in `tests/` directory with descriptive filenames.
|
||||
- Write clear and concise comments for each function, struct, enum, and complex logic.
|
||||
- Ensure functions have descriptive names and include comprehensive documentation.
|
||||
- Document all public APIs with rustdoc (`///` comments) following the [API Guidelines](https://rust-lang.github.io/api-guidelines/).
|
||||
- Use `#[doc(hidden)]` to hide implementation details from public documentation.
|
||||
- Document error conditions, panic scenarios, and safety considerations.
|
||||
- Examples should use `?` operator, not `unwrap()` or deprecated `try!` macro.
|
||||
|
||||
## Project Organization
|
||||
|
||||
- Use semantic versioning in `Cargo.toml`.
|
||||
- Include comprehensive metadata: `description`, `license`, `repository`, `keywords`, `categories`.
|
||||
- Use feature flags for optional functionality.
|
||||
- Organize code into modules using `mod.rs` or named files.
|
||||
- Keep `main.rs` or `lib.rs` minimal - move logic to modules.
|
||||
|
||||
## Quality Checklist
|
||||
|
||||
Before publishing or reviewing Rust code, ensure:
|
||||
|
||||
### Core Requirements
|
||||
- [ ] **Naming**: Follows RFC 430 naming conventions
|
||||
- [ ] **Traits**: Implements `Debug`, `Clone`, `PartialEq` where appropriate
|
||||
- [ ] **Error Handling**: Uses `Result<T, E>` and provides meaningful error types
|
||||
- [ ] **Documentation**: All public items have rustdoc comments with examples
|
||||
- [ ] **Testing**: Comprehensive test coverage including edge cases
|
||||
|
||||
### Safety and Quality
|
||||
- [ ] **Safety**: No unnecessary `unsafe` code, proper error handling
|
||||
- [ ] **Performance**: Efficient use of iterators, minimal allocations
|
||||
- [ ] **API Design**: Functions are predictable, flexible, and type-safe
|
||||
- [ ] **Future Proofing**: Private fields in structs, sealed traits where appropriate
|
||||
- [ ] **Tooling**: Code passes `cargo fmt`, `cargo clippy`, and `cargo test`
|
||||
162
.github/instructions/self-explanatory-code-commenting.instructions.md
vendored
Normal file
162
.github/instructions/self-explanatory-code-commenting.instructions.md
vendored
Normal file
@@ -0,0 +1,162 @@
|
||||
---
|
||||
description: 'Guidelines for GitHub Copilot to write comments to achieve self-explanatory code with less comments. Examples are in JavaScript but it should work on any language that has comments.'
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
# Self-explanatory Code Commenting Instructions
|
||||
|
||||
## Core Principle
|
||||
**Write code that speaks for itself. Comment only when necessary to explain WHY, not WHAT.**
|
||||
We do not need comments most of the time.
|
||||
|
||||
## Commenting Guidelines
|
||||
|
||||
### ❌ AVOID These Comment Types
|
||||
|
||||
**Obvious Comments**
|
||||
```javascript
|
||||
// Bad: States the obvious
|
||||
let counter = 0; // Initialize counter to zero
|
||||
counter++; // Increment counter by one
|
||||
```
|
||||
|
||||
**Redundant Comments**
|
||||
```javascript
|
||||
// Bad: Comment repeats the code
|
||||
function getUserName() {
|
||||
return user.name; // Return the user's name
|
||||
}
|
||||
```
|
||||
|
||||
**Outdated Comments**
|
||||
```javascript
|
||||
// Bad: Comment doesn't match the code
|
||||
// Calculate tax at 5% rate
|
||||
const tax = price * 0.08; // Actually 8%
|
||||
```
|
||||
|
||||
### ✅ WRITE These Comment Types
|
||||
|
||||
**Complex Business Logic**
|
||||
```javascript
|
||||
// Good: Explains WHY this specific calculation
|
||||
// Apply progressive tax brackets: 10% up to 10k, 20% above
|
||||
const tax = calculateProgressiveTax(income, [0.10, 0.20], [10000]);
|
||||
```
|
||||
|
||||
**Non-obvious Algorithms**
|
||||
```javascript
|
||||
// Good: Explains the algorithm choice
|
||||
// Using Floyd-Warshall for all-pairs shortest paths
|
||||
// because we need distances between all nodes
|
||||
for (let k = 0; k < vertices; k++) {
|
||||
for (let i = 0; i < vertices; i++) {
|
||||
for (let j = 0; j < vertices; j++) {
|
||||
// ... implementation
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Regex Patterns**
|
||||
```javascript
|
||||
// Good: Explains what the regex matches
|
||||
// Match email format: username@domain.extension
|
||||
const emailPattern = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
|
||||
```
|
||||
|
||||
**API Constraints or Gotchas**
|
||||
```javascript
|
||||
// Good: Explains external constraint
|
||||
// GitHub API rate limit: 5000 requests/hour for authenticated users
|
||||
await rateLimiter.wait();
|
||||
const response = await fetch(githubApiUrl);
|
||||
```
|
||||
|
||||
## Decision Framework
|
||||
|
||||
Before writing a comment, ask:
|
||||
1. **Is the code self-explanatory?** → No comment needed
|
||||
2. **Would a better variable/function name eliminate the need?** → Refactor instead
|
||||
3. **Does this explain WHY, not WHAT?** → Good comment
|
||||
4. **Will this help future maintainers?** → Good comment
|
||||
|
||||
## Special Cases for Comments
|
||||
|
||||
### Public APIs
|
||||
```javascript
|
||||
/**
|
||||
* Calculate compound interest using the standard formula.
|
||||
*
|
||||
* @param {number} principal - Initial amount invested
|
||||
* @param {number} rate - Annual interest rate (as decimal, e.g., 0.05 for 5%)
|
||||
* @param {number} time - Time period in years
|
||||
* @param {number} compoundFrequency - How many times per year interest compounds (default: 1)
|
||||
* @returns {number} Final amount after compound interest
|
||||
*/
|
||||
function calculateCompoundInterest(principal, rate, time, compoundFrequency = 1) {
|
||||
// ... implementation
|
||||
}
|
||||
```
|
||||
|
||||
### Configuration and Constants
|
||||
```javascript
|
||||
// Good: Explains the source or reasoning
|
||||
const MAX_RETRIES = 3; // Based on network reliability studies
|
||||
const API_TIMEOUT = 5000; // AWS Lambda timeout is 15s, leaving buffer
|
||||
```
|
||||
|
||||
### Annotations
|
||||
```javascript
|
||||
// TODO: Replace with proper user authentication after security review
|
||||
// FIXME: Memory leak in production - investigate connection pooling
|
||||
// HACK: Workaround for bug in library v2.1.0 - remove after upgrade
|
||||
// NOTE: This implementation assumes UTC timezone for all calculations
|
||||
// WARNING: This function modifies the original array instead of creating a copy
|
||||
// PERF: Consider caching this result if called frequently in hot path
|
||||
// SECURITY: Validate input to prevent SQL injection before using in query
|
||||
// BUG: Edge case failure when array is empty - needs investigation
|
||||
// REFACTOR: Extract this logic into separate utility function for reusability
|
||||
// DEPRECATED: Use newApiFunction() instead - this will be removed in v3.0
|
||||
```
|
||||
|
||||
## Anti-Patterns to Avoid
|
||||
|
||||
### Dead Code Comments
|
||||
```javascript
|
||||
// Bad: Don't comment out code
|
||||
// const oldFunction = () => { ... };
|
||||
const newFunction = () => { ... };
|
||||
```
|
||||
|
||||
### Changelog Comments
|
||||
```javascript
|
||||
// Bad: Don't maintain history in comments
|
||||
// Modified by John on 2023-01-15
|
||||
// Fixed bug reported by Sarah on 2023-02-03
|
||||
function processData() {
|
||||
// ... implementation
|
||||
}
|
||||
```
|
||||
|
||||
### Divider Comments
|
||||
```javascript
|
||||
// Bad: Don't use decorative comments
|
||||
//=====================================
|
||||
// UTILITY FUNCTIONS
|
||||
//=====================================
|
||||
```
|
||||
|
||||
## Quality Checklist
|
||||
|
||||
Before committing, ensure your comments:
|
||||
- [ ] Explain WHY, not WHAT
|
||||
- [ ] Are grammatically correct and clear
|
||||
- [ ] Will remain accurate as code evolves
|
||||
- [ ] Add genuine value to code understanding
|
||||
- [ ] Are placed appropriately (above the code they describe)
|
||||
- [ ] Use proper spelling and professional language
|
||||
|
||||
## Summary
|
||||
|
||||
Remember: **The best comment is the one you don't need to write because the code is self-documenting.**
|
||||
@@ -1,3 +1,8 @@
|
||||
# Issues - Rules
|
||||
## What it is not
|
||||
- NOT Question and Answer
|
||||
- NOT Helpdesk
|
||||
|
||||
# Pull Requests - Rules
|
||||
## General
|
||||
- ONLY signed and verified commits
|
||||
|
||||
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -2087,7 +2087,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "telemt"
|
||||
version = "3.1.3"
|
||||
version = "3.3.15"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"anyhow",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "telemt"
|
||||
version = "3.1.6"
|
||||
version = "3.3.15"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
@@ -73,3 +73,6 @@ futures = "0.3"
|
||||
[[bench]]
|
||||
name = "crypto_bench"
|
||||
harness = false
|
||||
|
||||
[profile.release]
|
||||
lto = "thin"
|
||||
|
||||
285
README.md
285
README.md
@@ -2,7 +2,12 @@
|
||||
|
||||
***Löst Probleme, bevor andere überhaupt wissen, dass sie existieren*** / ***It solves problems before others even realize they exist***
|
||||
|
||||
**Telemt** is a fast, secure, and feature-rich server written in Rust: it fully implements the official Telegram proxy algo and adds many production-ready improvements such as connection pooling, replay protection, detailed statistics, masking from "prying" eyes
|
||||
**Telemt** is a fast, secure, and feature-rich server written in Rust: it fully implements the official Telegram proxy algo and adds many production-ready improvements such as:
|
||||
- [ME Pool + Reader/Writer + Registry + Refill + Adaptive Floor + Trio-State + Generation Lifecycle](https://github.com/telemt/telemt/blob/main/docs/model/MODEL.en.md)
|
||||
- [Full-covered API w/ management](https://github.com/telemt/telemt/blob/main/docs/API.md)
|
||||
- Anti-Replay on Sliding Window
|
||||
- Prometheus-format Metrics
|
||||
- TLS-Fronting and TCP-Splicing for masking from "prying" eyes
|
||||
|
||||
[**Telemt Chat in Telegram**](https://t.me/telemtrs)
|
||||
|
||||
@@ -14,18 +19,24 @@
|
||||
|
||||
### 🇷🇺 RU
|
||||
|
||||
#### Релиз 3.0.15 — 25 февраля
|
||||
#### Релиз 3.3.5 LTS - 6 марта
|
||||
|
||||
25 февраля мы выпустили версию **3.0.15**
|
||||
6 марта мы выпустили Telemt **3.3.5**
|
||||
|
||||
Мы предполагаем, что она станет завершающей версией поколения 3.0 и уже сейчас мы рассматриваем её как **LTS-кандидата** для версии **3.1.0**!
|
||||
Это [3.3.5 - первая LTS-версия telemt](https://github.com/telemt/telemt/releases/tag/3.3.5)!
|
||||
|
||||
После нескольких дней детального анализа особенностей работы Middle-End мы спроектировали и реализовали продуманный режим **ротации ME Writer**. Данный режим позволяет поддерживать стабильно высокую производительность в long-run сценариях без возникновения ошибок, связанных с некорректной конфигурацией прокси
|
||||
В ней используется:
|
||||
- новый алгоритм ME NoWait для непревзойдённо быстрого восстановления пула
|
||||
- Adaptive Floor, поддерживающий количество ME Writer на оптимальном уровне
|
||||
- модель усовершенствованного доступа к KDF Fingerprint на RwLock
|
||||
- строгая привязка Middle-End к DC-ID с предсказуемым алгоритмом деградации и самовосстановления
|
||||
|
||||
Будем рады вашему фидбеку и предложениям по улучшению — особенно в части **статистики** и **UX**
|
||||
Telemt Control API V1 в 3.3.5 включает:
|
||||
- несколько режимов работы в зависимости от доступных ресурсов
|
||||
- снапшот-модель для живых метрик без вмешательства в hot-path
|
||||
- минималистичный набор запросов для управления пользователями
|
||||
|
||||
Релиз:
|
||||
[3.0.15](https://github.com/telemt/telemt/releases/tag/3.0.15)
|
||||
Будем рады вашему фидбеку и предложениям по улучшению — особенно в части **API**, **статистики**, **UX**
|
||||
|
||||
---
|
||||
|
||||
@@ -42,18 +53,24 @@
|
||||
|
||||
### 🇬🇧 EN
|
||||
|
||||
#### Release 3.0.15 — February 25
|
||||
#### Release 3.3.5 LTS - March 6
|
||||
|
||||
On February 25, we released version **3.0.15**
|
||||
On March 6, we released Telemt **3.3.3**
|
||||
|
||||
We expect this to become the final release of the 3.0 generation and at this point, we already see it as a strong **LTS candidate** for the upcoming **3.1.0** release!
|
||||
This is [3.3.5 - the first LTS release of telemt](https://github.com/telemt/telemt/releases/tag/3.3.5)
|
||||
|
||||
After several days of deep analysis of Middle-End behavior, we designed and implemented a well-engineered **ME Writer rotation mode**. This mode enables sustained high throughput in long-run scenarios while preventing proxy misconfiguration errors
|
||||
It introduces:
|
||||
- the new ME NoWait algorithm for exceptionally fast pool recovery
|
||||
- Adaptive Floor, which maintains the number of ME Writers at an optimal level
|
||||
- an improved KDF Fingerprint access model based on RwLock
|
||||
- strict binding of Middle-End instances to DC-ID with a predictable degradation and self-recovery algorithm
|
||||
|
||||
We are looking forward to your feedback and improvement proposals — especially regarding **statistics** and **UX**
|
||||
Telemt Control API V1 in version 3.3.5 includes:
|
||||
- multiple operating modes depending on available resources
|
||||
- a snapshot-based model for live metrics without interfering with the hot path
|
||||
- a minimalistic request set for user management
|
||||
|
||||
Release:
|
||||
[3.0.15](https://github.com/telemt/telemt/releases/tag/3.0.15)
|
||||
We are looking forward to your feedback and improvement proposals — especially regarding **API**, **statistics**, **UX**
|
||||
|
||||
---
|
||||
|
||||
@@ -76,31 +93,6 @@ We welcome ideas, architectural feedback, and pull requests.
|
||||
|
||||
⚓ Our ***Middle-End Pool*** is fastest by design in standard scenarios, compared to other implementations of connecting to the Middle-End Proxy: non dramatically, but usual
|
||||
|
||||
# GOTO
|
||||
- [Features](#features)
|
||||
- [Quick Start Guide](#quick-start-guide)
|
||||
- [How to use?](#how-to-use)
|
||||
- [Systemd Method](#telemt-via-systemd)
|
||||
- [Configuration](#configuration)
|
||||
- [Minimal Configuration](#minimal-configuration-for-first-start)
|
||||
- [Advanced](#advanced)
|
||||
- [Adtag](#adtag)
|
||||
- [Listening and Announce IPs](#listening-and-announce-ips)
|
||||
- [Upstream Manager](#upstream-manager)
|
||||
- [IP](#bind-on-ip)
|
||||
- [SOCKS](#socks45-as-upstream)
|
||||
- [FAQ](#faq)
|
||||
- [Recognizability for DPI + crawler](#recognizability-for-dpi-and-crawler)
|
||||
- [Telegram Calls](#telegram-calls-via-mtproxy)
|
||||
- [DPI](#how-does-dpi-see-mtproxy-tls)
|
||||
- [Whitelist on Network Level](#whitelist-on-ip)
|
||||
- [Too many open files](#too-many-open-files)
|
||||
- [Build](#build)
|
||||
- [Docker](#docker)
|
||||
- [Why Rust?](#why-rust)
|
||||
|
||||
## Features
|
||||
|
||||
- Full support for all official MTProto proxy modes:
|
||||
- Classic
|
||||
- Secure - with `dd` prefix
|
||||
@@ -111,158 +103,31 @@ We welcome ideas, architectural feedback, and pull requests.
|
||||
- Graceful shutdown on Ctrl+C
|
||||
- Extensive logging via `trace` and `debug` with `RUST_LOG` method
|
||||
|
||||
# GOTO
|
||||
- [Quick Start Guide](#quick-start-guide)
|
||||
- [FAQ](#faq)
|
||||
- [Recognizability for DPI and crawler](#recognizability-for-dpi-and-crawler)
|
||||
- [Client WITH secret-key accesses the MTProxy resource:](#client-with-secret-key-accesses-the-mtproxy-resource)
|
||||
- [Client WITHOUT secret-key gets transparent access to the specified resource:](#client-without-secret-key-gets-transparent-access-to-the-specified-resource)
|
||||
- [Telegram Calls via MTProxy](#telegram-calls-via-mtproxy)
|
||||
- [How does DPI see MTProxy TLS?](#how-does-dpi-see-mtproxy-tls)
|
||||
- [Whitelist on IP](#whitelist-on-ip)
|
||||
- [Too many open files](#too-many-open-files)
|
||||
- [Build](#build)
|
||||
- [Why Rust?](#why-rust)
|
||||
- [Issues](#issues)
|
||||
- [Roadmap](#roadmap)
|
||||
|
||||
|
||||
## Quick Start Guide
|
||||
**This software is designed for Debian-based OS: in addition to Debian, these are Ubuntu, Mint, Kali, MX and many other Linux**
|
||||
1. Download release
|
||||
```bash
|
||||
wget -qO- "https://github.com/telemt/telemt/releases/latest/download/telemt-$(uname -m)-linux-$(ldd --version 2>&1 | grep -iq musl && echo musl || echo gnu).tar.gz" | tar -xz
|
||||
```
|
||||
2. Move to Bin Folder
|
||||
```bash
|
||||
mv telemt /bin
|
||||
```
|
||||
4. Make Executable
|
||||
```bash
|
||||
chmod +x /bin/telemt
|
||||
```
|
||||
5. Go to [How to use?](#how-to-use) section for for further steps
|
||||
|
||||
## How to use?
|
||||
### Telemt via Systemd
|
||||
**This instruction "assume" that you:**
|
||||
- logged in as root or executed `su -` / `sudo su`
|
||||
- you already have an assembled and executable `telemt` in /bin folder as a result of the [Quick Start Guide](#quick-start-guide) or [Build](#build)
|
||||
|
||||
**0. Check port and generate secrets**
|
||||
|
||||
The port you have selected for use should be MISSING from the list, when:
|
||||
```bash
|
||||
netstat -lnp
|
||||
```
|
||||
|
||||
Generate 16 bytes/32 characters HEX with OpenSSL or another way:
|
||||
```bash
|
||||
openssl rand -hex 16
|
||||
```
|
||||
OR
|
||||
```bash
|
||||
xxd -l 16 -p /dev/urandom
|
||||
```
|
||||
OR
|
||||
```bash
|
||||
python3 -c 'import os; print(os.urandom(16).hex())'
|
||||
```
|
||||
|
||||
**1. Place your config to /etc/telemt.toml**
|
||||
|
||||
Open nano
|
||||
```bash
|
||||
nano /etc/telemt.toml
|
||||
```
|
||||
paste your config from [Configuration](#configuration) section
|
||||
|
||||
then Ctrl+X -> Y -> Enter to save
|
||||
|
||||
**2. Create service on /etc/systemd/system/telemt.service**
|
||||
|
||||
Open nano
|
||||
```bash
|
||||
nano /etc/systemd/system/telemt.service
|
||||
```
|
||||
paste this Systemd Module
|
||||
```bash
|
||||
[Unit]
|
||||
Description=Telemt
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/bin
|
||||
ExecStart=/bin/telemt /etc/telemt.toml
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
then Ctrl+X -> Y -> Enter to save
|
||||
|
||||
**3.** In Shell type `systemctl start telemt` - it must start with zero exit-code
|
||||
|
||||
**4.** In Shell type `systemctl status telemt` - there you can reach info about current MTProxy status
|
||||
|
||||
**5.** In Shell type `systemctl enable telemt` - then telemt will start with system startup, after the network is up
|
||||
|
||||
**6.** In Shell type `journalctl -u telemt -n -g "links" --no-pager -o cat | tac` - get the connection links
|
||||
|
||||
## Configuration
|
||||
### Minimal Configuration for First Start
|
||||
```toml
|
||||
# === General Settings ===
|
||||
[general]
|
||||
# ad_tag = "00000000000000000000000000000000"
|
||||
|
||||
[general.modes]
|
||||
classic = false
|
||||
secure = false
|
||||
tls = true
|
||||
|
||||
# === Anti-Censorship & Masking ===
|
||||
[censorship]
|
||||
tls_domain = "petrovich.ru"
|
||||
|
||||
[access.users]
|
||||
# format: "username" = "32_hex_chars_secret"
|
||||
hello = "00000000000000000000000000000000"
|
||||
|
||||
```
|
||||
### Advanced
|
||||
#### Adtag (per-user)
|
||||
To use channel advertising and usage statistics from Telegram, get an Adtag from [@mtproxybot](https://t.me/mtproxybot). Set it per user in `[access.user_ad_tags]` (32 hex chars):
|
||||
```toml
|
||||
[access.user_ad_tags]
|
||||
username1 = "11111111111111111111111111111111" # Replace with your tag from @mtproxybot
|
||||
username2 = "22222222222222222222222222222222"
|
||||
```
|
||||
#### Listening and Announce IPs
|
||||
To specify listening address and/or address in links, add to section `[[server.listeners]]` of config.toml:
|
||||
```toml
|
||||
[[server.listeners]]
|
||||
ip = "0.0.0.0" # 0.0.0.0 = all IPs; your IP = specific listening
|
||||
announce_ip = "1.2.3.4" # IP in links; comment with # if not used
|
||||
```
|
||||
#### Upstream Manager
|
||||
To specify upstream, add to section `[[upstreams]]` of config.toml:
|
||||
##### Bind on IP
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "direct"
|
||||
weight = 1
|
||||
enabled = true
|
||||
interface = "192.168.1.100" # Change to your outgoing IP
|
||||
```
|
||||
##### SOCKS4/5 as Upstream
|
||||
- Without Auth:
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "socks5" # Specify SOCKS4 or SOCKS5
|
||||
address = "1.2.3.4:1234" # SOCKS-server Address
|
||||
weight = 1 # Set Weight for Scenarios
|
||||
enabled = true
|
||||
```
|
||||
|
||||
- With Auth:
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "socks5" # Specify SOCKS4 or SOCKS5
|
||||
address = "1.2.3.4:1234" # SOCKS-server Address
|
||||
username = "user" # Username for Auth on SOCKS-server
|
||||
password = "pass" # Password for Auth on SOCKS-server
|
||||
weight = 1 # Set Weight for Scenarios
|
||||
enabled = true
|
||||
```
|
||||
- [Quick Start Guide RU](docs/QUICK_START_GUIDE.ru.md)
|
||||
- [Quick Start Guide EN](docs/QUICK_START_GUIDE.en.md)
|
||||
|
||||
## FAQ
|
||||
|
||||
- [FAQ RU](docs/FAQ.ru.md)
|
||||
- [FAQ EN](docs/FAQ.en.md)
|
||||
|
||||
### Recognizability for DPI and crawler
|
||||
Since version 1.1.0.0, we have debugged masking perfectly: for all clients without "presenting" a key,
|
||||
we transparently direct traffic to the target host!
|
||||
@@ -399,6 +264,11 @@ git clone https://github.com/telemt/telemt
|
||||
cd telemt
|
||||
# Starting Release Build
|
||||
cargo build --release
|
||||
|
||||
# Low-RAM devices (1 GB, e.g. NanoPi Neo3 / Raspberry Pi Zero 2):
|
||||
# release profile uses lto = "thin" to reduce peak linker memory.
|
||||
# If your custom toolchain overrides profiles, avoid enabling fat LTO.
|
||||
|
||||
# Move to /bin
|
||||
mv ./target/release/telemt /bin
|
||||
# Make executable
|
||||
@@ -407,40 +277,11 @@ chmod +x /bin/telemt
|
||||
telemt config.toml
|
||||
```
|
||||
|
||||
## Docker
|
||||
**Quick start (Docker Compose)**
|
||||
### OpenBSD
|
||||
- Build and service setup guide: [OpenBSD Guide (EN)](docs/OPENBSD.en.md)
|
||||
- Example rc.d script: [contrib/openbsd/telemt.rcd](contrib/openbsd/telemt.rcd)
|
||||
- Status: OpenBSD sandbox hardening with `pledge(2)` and `unveil(2)` is not implemented yet.
|
||||
|
||||
1. Edit `config.toml` in repo root (at least: port, users secrets, tls_domain)
|
||||
2. Start container:
|
||||
```bash
|
||||
docker compose up -d --build
|
||||
```
|
||||
3. Check logs:
|
||||
```bash
|
||||
docker compose logs -f telemt
|
||||
```
|
||||
4. Stop:
|
||||
```bash
|
||||
docker compose down
|
||||
```
|
||||
|
||||
**Notes**
|
||||
- `docker-compose.yml` maps `./config.toml` to `/app/config.toml` (read-only)
|
||||
- By default it publishes `443:443` and runs with dropped capabilities (only `NET_BIND_SERVICE` is added)
|
||||
- If you really need host networking (usually only for some IPv6 setups) uncomment `network_mode: host`
|
||||
|
||||
**Run without Compose**
|
||||
```bash
|
||||
docker build -t telemt:local .
|
||||
docker run --name telemt --restart unless-stopped \
|
||||
-p 443:443 \
|
||||
-e RUST_LOG=info \
|
||||
-v "$PWD/config.toml:/app/config.toml:ro" \
|
||||
--read-only \
|
||||
--cap-drop ALL --cap-add NET_BIND_SERVICE \
|
||||
--ulimit nofile=65536:65536 \
|
||||
telemt:local
|
||||
```
|
||||
|
||||
## Why Rust?
|
||||
- Long-running reliability and idempotent behavior
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
# === General Settings ===
|
||||
[general]
|
||||
use_middle_proxy = false
|
||||
use_middle_proxy = true
|
||||
# Global ad_tag fallback when user has no per-user tag in [access.user_ad_tags]
|
||||
# ad_tag = "00000000000000000000000000000000"
|
||||
# Per-user ad_tag in [access.user_ad_tags] (32 hex from @MTProxybot)
|
||||
@@ -34,6 +34,13 @@ port = 443
|
||||
# metrics_port = 9090
|
||||
# metrics_whitelist = ["127.0.0.1", "::1", "0.0.0.0/0"]
|
||||
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "0.0.0.0:9091"
|
||||
whitelist = ["127.0.0.0/8"]
|
||||
minimal_runtime_enabled = false
|
||||
minimal_runtime_cache_ttl_ms = 1000
|
||||
|
||||
# Listen on multiple interfaces/IPs - IPv4
|
||||
[[server.listeners]]
|
||||
ip = "0.0.0.0"
|
||||
|
||||
16
contrib/openbsd/telemt.rcd
Normal file
16
contrib/openbsd/telemt.rcd
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/ksh
|
||||
# /etc/rc.d/telemt
|
||||
#
|
||||
# rc.d(8) script for Telemt MTProxy daemon.
|
||||
# Tokio runtime does not daemonize itself, so rc_bg=YES is used.
|
||||
|
||||
daemon="/usr/local/bin/telemt"
|
||||
daemon_user="_telemt"
|
||||
daemon_flags="/etc/telemt/config.toml"
|
||||
|
||||
. /etc/rc.d/rc.subr
|
||||
|
||||
rc_bg=YES
|
||||
rc_reload=NO
|
||||
|
||||
rc_cmd $1
|
||||
1135
docs/API.md
Normal file
1135
docs/API.md
Normal file
File diff suppressed because it is too large
Load Diff
112
docs/FAQ.en.md
Normal file
112
docs/FAQ.en.md
Normal file
@@ -0,0 +1,112 @@
|
||||
## How to set up "proxy sponsor" channel and statistics via @MTProxybot bot
|
||||
|
||||
1. Go to @MTProxybot bot.
|
||||
2. Enter the command `/newproxy`
|
||||
3. Send the server IP and port. For example: 1.2.3.4:443
|
||||
4. Open the config `nano /etc/telemt.toml`.
|
||||
5. Copy and send the user secret from the [access.users] section to the bot.
|
||||
6. Copy the tag received from the bot. For example 1234567890abcdef1234567890abcdef.
|
||||
> [!WARNING]
|
||||
> The link provided by the bot will not work. Do not copy or use it!
|
||||
7. Uncomment the ad_tag parameter and enter the tag received from the bot.
|
||||
8. Uncomment/add the parameter `use_middle_proxy = true`.
|
||||
|
||||
Config example:
|
||||
```toml
|
||||
[general]
|
||||
ad_tag = "1234567890abcdef1234567890abcdef"
|
||||
use_middle_proxy = true
|
||||
```
|
||||
9. Save the config. Ctrl+S -> Ctrl+X.
|
||||
10. Restart telemt `systemctl restart telemt`.
|
||||
11. In the bot, send the command /myproxies and select the added server.
|
||||
12. Click the "Set promotion" button.
|
||||
13. Send a **public link** to the channel. Private channels cannot be added!
|
||||
14. Wait approximately 1 hour for the information to update on Telegram servers.
|
||||
> [!WARNING]
|
||||
> You will not see the "proxy sponsor" if you are already subscribed to the channel.
|
||||
|
||||
**You can also set up different channels for different users.**
|
||||
```toml
|
||||
[access.user_ad_tags]
|
||||
hello = "ad_tag"
|
||||
hello2 = "ad_tag2"
|
||||
```
|
||||
|
||||
## How many people can use 1 link
|
||||
|
||||
By default, 1 link can be used by any number of people.
|
||||
You can limit the number of IPs using the proxy.
|
||||
```toml
|
||||
[access.user_max_unique_ips]
|
||||
hello = 1
|
||||
```
|
||||
This parameter limits how many unique IPs can use 1 link simultaneously. If one user disconnects, a second user can connect. Also, multiple users can sit behind the same IP.
|
||||
|
||||
## How to create multiple different links
|
||||
|
||||
1. Generate the required number of secrets `openssl rand -hex 16`
|
||||
2. Open the config `nano /etc/telemt.toml`
|
||||
3. Add new users.
|
||||
```toml
|
||||
[access.users]
|
||||
user1 = "00000000000000000000000000000001"
|
||||
user2 = "00000000000000000000000000000002"
|
||||
user3 = "00000000000000000000000000000003"
|
||||
```
|
||||
4. Save the config. Ctrl+S -> Ctrl+X. You don't need to restart telemt.
|
||||
5. Get the links via `journalctl -u telemt -n -g "links" --no-pager -o cat | tac`
|
||||
|
||||
## How to view metrics
|
||||
|
||||
1. Open the config `nano /etc/telemt.toml`
|
||||
2. Add the following parameters
|
||||
```toml
|
||||
[server]
|
||||
metrics_port = 9090
|
||||
metrics_whitelist = ["127.0.0.1/32", "::1/128", "0.0.0.0/0"]
|
||||
```
|
||||
3. Save the config. Ctrl+S -> Ctrl+X.
|
||||
4. Metrics are available at SERVER_IP:9090/metrics.
|
||||
> [!WARNING]
|
||||
> "0.0.0.0/0" in metrics_whitelist opens access from any IP. Replace with your own IP. For example "1.2.3.4"
|
||||
|
||||
## Additional parameters
|
||||
|
||||
### Domain in link instead of IP
|
||||
To specify a domain in the links, add to the `[general.links]` section of the config file.
|
||||
```toml
|
||||
[general.links]
|
||||
public_host = "proxy.example.com"
|
||||
```
|
||||
|
||||
### Upstream Manager
|
||||
To specify an upstream, add to the `[[upstreams]]` section of the config.toml file:
|
||||
#### Binding to IP
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "direct"
|
||||
weight = 1
|
||||
enabled = true
|
||||
interface = "192.168.1.100" # Change to your outgoing IP
|
||||
```
|
||||
#### SOCKS4/5 as Upstream
|
||||
- Without authentication:
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "socks5" # Specify SOCKS4 or SOCKS5
|
||||
address = "1.2.3.4:1234" # SOCKS-server Address
|
||||
weight = 1 # Set Weight for Scenarios
|
||||
enabled = true
|
||||
```
|
||||
|
||||
- With authentication:
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "socks5" # Specify SOCKS4 or SOCKS5
|
||||
address = "1.2.3.4:1234" # SOCKS-server Address
|
||||
username = "user" # Username for Auth on SOCKS-server
|
||||
password = "pass" # Password for Auth on SOCKS-server
|
||||
weight = 1 # Set Weight for Scenarios
|
||||
enabled = true
|
||||
```
|
||||
@@ -1,4 +1,4 @@
|
||||
## Как настроить канал "спонсор прокси"
|
||||
## Как настроить канал "спонсор прокси" и статистику через бота @MTProxybot
|
||||
|
||||
1. Зайти в бота @MTProxybot.
|
||||
2. Ввести команду `/newproxy`
|
||||
@@ -26,6 +26,13 @@ use_middle_proxy = true
|
||||
> [!WARNING]
|
||||
> У вас не будет отображаться "спонсор прокси" если вы уже подписаны на канал.
|
||||
|
||||
**Также вы можете настроить разные каналы для разных пользователей.**
|
||||
```toml
|
||||
[access.user_ad_tags]
|
||||
hello = "ad_tag"
|
||||
hello2 = "ad_tag2"
|
||||
```
|
||||
|
||||
## Сколько человек может пользоваться 1 ссылкой
|
||||
|
||||
По умолчанию 1 ссылкой может пользоваться сколько угодно человек.
|
||||
@@ -63,3 +70,43 @@ metrics_whitelist = ["127.0.0.1/32", "::1/128", "0.0.0.0/0"]
|
||||
4. Метрики доступны по адресу SERVER_IP:9090/metrics.
|
||||
> [!WARNING]
|
||||
> "0.0.0.0/0" в metrics_whitelist открывает доступ с любого IP. Замените на свой ip. Например "1.2.3.4"
|
||||
|
||||
## Дополнительные параметры
|
||||
|
||||
### Домен в ссылке вместо IP
|
||||
Чтобы указать домен в ссылках, добавьте в секцию `[general.links]` файла config.
|
||||
```toml
|
||||
[general.links]
|
||||
public_host = "proxy.example.com"
|
||||
```
|
||||
|
||||
### Upstream Manager
|
||||
Чтобы указать апстрим, добавьте в секцию `[[upstreams]]` файла config.toml:
|
||||
#### Привязка к IP
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "direct"
|
||||
weight = 1
|
||||
enabled = true
|
||||
interface = "192.168.1.100" # Change to your outgoing IP
|
||||
```
|
||||
#### SOCKS4/5 как Upstream
|
||||
- Без авторизации:
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "socks5" # Specify SOCKS4 or SOCKS5
|
||||
address = "1.2.3.4:1234" # SOCKS-server Address
|
||||
weight = 1 # Set Weight for Scenarios
|
||||
enabled = true
|
||||
```
|
||||
|
||||
- С авторизацией:
|
||||
```toml
|
||||
[[upstreams]]
|
||||
type = "socks5" # Specify SOCKS4 or SOCKS5
|
||||
address = "1.2.3.4:1234" # SOCKS-server Address
|
||||
username = "user" # Username for Auth on SOCKS-server
|
||||
password = "pass" # Password for Auth on SOCKS-server
|
||||
weight = 1 # Set Weight for Scenarios
|
||||
enabled = true
|
||||
```
|
||||
|
||||
132
docs/OPENBSD.en.md
Normal file
132
docs/OPENBSD.en.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# Telemt on OpenBSD (Build, Run, and rc.d)
|
||||
|
||||
This guide covers a practical OpenBSD deployment flow for Telemt:
|
||||
- build from source,
|
||||
- install binary and config,
|
||||
- run as an rc.d daemon,
|
||||
- verify basic runtime behavior.
|
||||
|
||||
## 1. Prerequisites
|
||||
|
||||
Install required packages:
|
||||
|
||||
```sh
|
||||
doas pkg_add rust git
|
||||
```
|
||||
|
||||
Notes:
|
||||
- Telemt release installer (`install.sh`) is Linux-only.
|
||||
- On OpenBSD, use source build with `cargo`.
|
||||
|
||||
## 2. Build from source
|
||||
|
||||
```sh
|
||||
git clone https://github.com/telemt/telemt
|
||||
cd telemt
|
||||
cargo build --release
|
||||
./target/release/telemt --version
|
||||
```
|
||||
|
||||
For low-RAM systems, this repository already uses `lto = "thin"` in release profile.
|
||||
|
||||
## 3. Install binary and config
|
||||
|
||||
```sh
|
||||
doas install -d -m 0755 /usr/local/bin
|
||||
doas install -m 0755 ./target/release/telemt /usr/local/bin/telemt
|
||||
|
||||
doas install -d -m 0750 /etc/telemt
|
||||
doas install -m 0640 ./config.toml /etc/telemt/config.toml
|
||||
```
|
||||
|
||||
## 4. Create runtime user
|
||||
|
||||
```sh
|
||||
doas useradd -L daemon -s /sbin/nologin -d /var/empty _telemt
|
||||
```
|
||||
|
||||
If `_telemt` already exists, continue.
|
||||
|
||||
## 5. Install rc.d service
|
||||
|
||||
Install the provided script:
|
||||
|
||||
```sh
|
||||
doas install -m 0555 ./contrib/openbsd/telemt.rcd /etc/rc.d/telemt
|
||||
```
|
||||
|
||||
Enable and start:
|
||||
|
||||
```sh
|
||||
doas rcctl enable telemt
|
||||
# Optional: send daemon output to syslog
|
||||
#doas rcctl set telemt logger daemon.info
|
||||
|
||||
doas rcctl start telemt
|
||||
```
|
||||
|
||||
Service controls:
|
||||
|
||||
```sh
|
||||
doas rcctl check telemt
|
||||
doas rcctl restart telemt
|
||||
doas rcctl stop telemt
|
||||
```
|
||||
|
||||
## 6. Resource limits (recommended)
|
||||
|
||||
OpenBSD rc.d can apply limits via login class. Add class `telemt` and assign it to `_telemt`.
|
||||
|
||||
Example class entry:
|
||||
|
||||
```text
|
||||
telemt:\
|
||||
:openfiles-cur=8192:openfiles-max=16384:\
|
||||
:datasize-cur=768M:datasize-max=1024M:\
|
||||
:coredumpsize=0:\
|
||||
:tc=daemon:
|
||||
```
|
||||
|
||||
These values are conservative defaults for small and medium deployments.
|
||||
Increase `openfiles-*` only if logs show descriptor exhaustion under load.
|
||||
|
||||
Then rebuild database and assign class:
|
||||
|
||||
```sh
|
||||
doas cap_mkdb /etc/login.conf
|
||||
#doas usermod -L telemt _telemt
|
||||
```
|
||||
|
||||
Uncomment `usermod` if you want this class bound to the Telemt user.
|
||||
|
||||
## 7. Functional smoke test
|
||||
|
||||
1. Validate service state:
|
||||
|
||||
```sh
|
||||
doas rcctl check telemt
|
||||
```
|
||||
|
||||
2. Check listener is present (replace 443 if needed):
|
||||
|
||||
```sh
|
||||
netstat -n -f inet -p tcp | grep LISTEN | grep '\.443'
|
||||
```
|
||||
|
||||
3. Verify process user:
|
||||
|
||||
```sh
|
||||
ps -o user,pid,command -ax | grep telemt | grep -v grep
|
||||
```
|
||||
|
||||
4. If startup fails, debug in foreground:
|
||||
|
||||
```sh
|
||||
RUST_LOG=debug /usr/local/bin/telemt /etc/telemt/config.toml
|
||||
```
|
||||
|
||||
## 8. OpenBSD-specific caveats
|
||||
|
||||
- OpenBSD does not support per-socket keepalive retries/interval tuning in the same way as Linux.
|
||||
- Telemt source already uses target-aware cfg gates for keepalive setup.
|
||||
- Use rc.d/rcctl, not systemd.
|
||||
@@ -48,11 +48,16 @@ Save the obtained result somewhere. You will need it later!
|
||||
|
||||
---
|
||||
|
||||
**1. Place your config to /etc/telemt.toml**
|
||||
**1. Place your config to /etc/telemt/telemt.toml**
|
||||
|
||||
Create config directory:
|
||||
```bash
|
||||
mkdir /etc/telemt
|
||||
```
|
||||
|
||||
Open nano
|
||||
```bash
|
||||
nano /etc/telemt.toml
|
||||
nano /etc/telemt/telemt.toml
|
||||
```
|
||||
paste your config
|
||||
|
||||
@@ -60,12 +65,19 @@ paste your config
|
||||
# === General Settings ===
|
||||
[general]
|
||||
# ad_tag = "00000000000000000000000000000000"
|
||||
use_middle_proxy = false
|
||||
|
||||
[general.modes]
|
||||
classic = false
|
||||
secure = false
|
||||
tls = true
|
||||
|
||||
[server.api]
|
||||
enabled = true
|
||||
# listen = "127.0.0.1:9091"
|
||||
# whitelist = ["127.0.0.1/32"]
|
||||
# read_only = true
|
||||
|
||||
# === Anti-Censorship & Masking ===
|
||||
[censorship]
|
||||
tls_domain = "petrovich.ru"
|
||||
@@ -74,6 +86,7 @@ tls_domain = "petrovich.ru"
|
||||
# format: "username" = "32_hex_chars_secret"
|
||||
hello = "00000000000000000000000000000000"
|
||||
```
|
||||
|
||||
then Ctrl+S -> Ctrl+X to save
|
||||
|
||||
> [!WARNING]
|
||||
@@ -82,7 +95,14 @@ then Ctrl+S -> Ctrl+X to save
|
||||
|
||||
---
|
||||
|
||||
**2. Create service on /etc/systemd/system/telemt.service**
|
||||
**2. Create telemt user**
|
||||
|
||||
```bash
|
||||
useradd -d /opt/telemt -m -r -U telemt
|
||||
chown -R telemt:telemt /etc/telemt
|
||||
```
|
||||
|
||||
**3. Create service on /etc/systemd/system/telemt.service**
|
||||
|
||||
Open nano
|
||||
```bash
|
||||
@@ -93,28 +113,43 @@ paste this Systemd Module
|
||||
```bash
|
||||
[Unit]
|
||||
Description=Telemt
|
||||
After=network.target
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/bin
|
||||
ExecStart=/bin/telemt /etc/telemt.toml
|
||||
User=telemt
|
||||
Group=telemt
|
||||
WorkingDirectory=/opt/telemt
|
||||
ExecStart=/bin/telemt /etc/telemt/telemt.toml
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
AmbientCapabilities=CAP_NET_BIND_SERVICE
|
||||
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
|
||||
NoNewPrivileges=true
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
then Ctrl+S -> Ctrl+X to save
|
||||
|
||||
reload systemd units
|
||||
```bash
|
||||
systemctl daemon-reload
|
||||
```
|
||||
|
||||
**3.** To start it, enter the command `systemctl start telemt`
|
||||
**4.** To start it, enter the command `systemctl start telemt`
|
||||
|
||||
**4.** To get status information, enter `systemctl status telemt`
|
||||
**5.** To get status information, enter `systemctl status telemt`
|
||||
|
||||
**5.** For automatic startup at system boot, enter `systemctl enable telemt`
|
||||
**6.** For automatic startup at system boot, enter `systemctl enable telemt`
|
||||
|
||||
**6.** To get the links, enter `journalctl -u telemt -n -g "links" --no-pager -o cat | tac`
|
||||
**7.** To get the link(s), enter
|
||||
```bash
|
||||
curl -s http://127.0.0.1:9091/v1/users | jq
|
||||
```
|
||||
|
||||
> Any number of people can use one link.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -48,11 +48,16 @@ python3 -c 'import os; print(os.urandom(16).hex())'
|
||||
|
||||
---
|
||||
|
||||
**1. Поместите свою конфигурацию в файл /etc/telemt.toml**
|
||||
**1. Поместите свою конфигурацию в файл /etc/telemt/telemt.toml**
|
||||
|
||||
Создаём директорию для конфига:
|
||||
```bash
|
||||
mkdir /etc/telemt
|
||||
```
|
||||
|
||||
Открываем nano
|
||||
```bash
|
||||
nano /etc/telemt.toml
|
||||
nano /etc/telemt/telemt.toml
|
||||
```
|
||||
Вставьте свою конфигурацию
|
||||
|
||||
@@ -60,12 +65,19 @@ nano /etc/telemt.toml
|
||||
# === General Settings ===
|
||||
[general]
|
||||
# ad_tag = "00000000000000000000000000000000"
|
||||
use_middle_proxy = false
|
||||
|
||||
[general.modes]
|
||||
classic = false
|
||||
secure = false
|
||||
tls = true
|
||||
|
||||
[server.api]
|
||||
enabled = true
|
||||
# listen = "127.0.0.1:9091"
|
||||
# whitelist = ["127.0.0.1/32"]
|
||||
# read_only = true
|
||||
|
||||
# === Anti-Censorship & Masking ===
|
||||
[censorship]
|
||||
tls_domain = "petrovich.ru"
|
||||
@@ -74,6 +86,7 @@ tls_domain = "petrovich.ru"
|
||||
# format: "username" = "32_hex_chars_secret"
|
||||
hello = "00000000000000000000000000000000"
|
||||
```
|
||||
|
||||
Затем нажмите Ctrl+S -> Ctrl+X, чтобы сохранить
|
||||
|
||||
> [!WARNING]
|
||||
@@ -82,7 +95,14 @@ hello = "00000000000000000000000000000000"
|
||||
|
||||
---
|
||||
|
||||
**2. Создайте службу в /etc/systemd/system/telemt.service**
|
||||
**2. Создайте пользователя для telemt**
|
||||
|
||||
```bash
|
||||
useradd -d /opt/telemt -m -r -U telemt
|
||||
chown -R telemt:telemt /etc/telemt
|
||||
```
|
||||
|
||||
**3. Создайте службу в /etc/systemd/system/telemt.service**
|
||||
|
||||
Открываем nano
|
||||
```bash
|
||||
@@ -93,30 +113,45 @@ nano /etc/systemd/system/telemt.service
|
||||
```bash
|
||||
[Unit]
|
||||
Description=Telemt
|
||||
After=network.target
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/bin
|
||||
ExecStart=/bin/telemt /etc/telemt.toml
|
||||
User=telemt
|
||||
Group=telemt
|
||||
WorkingDirectory=/opt/telemt
|
||||
ExecStart=/bin/telemt /etc/telemt/telemt.toml
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
AmbientCapabilities=CAP_NET_BIND_SERVICE
|
||||
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
|
||||
NoNewPrivileges=true
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
Затем нажмите Ctrl+S -> Ctrl+X, чтобы сохранить
|
||||
|
||||
перезагрузите конфигурацию systemd
|
||||
```bash
|
||||
systemctl daemon-reload
|
||||
```
|
||||
|
||||
**3.** Для запуска введите команду `systemctl start telemt`
|
||||
**4.** Для запуска введите команду `systemctl start telemt`
|
||||
|
||||
**4.** Для получения информации о статусе введите `systemctl status telemt`
|
||||
**5.** Для получения информации о статусе введите `systemctl status telemt`
|
||||
|
||||
**5.** Для автоматического запуска при запуске системы в введите `systemctl enable telemt`
|
||||
**6.** Для автоматического запуска при запуске системы в введите `systemctl enable telemt`
|
||||
|
||||
**7.** Для получения ссылки/ссылок введите
|
||||
```bash
|
||||
curl -s http://127.0.0.1:9091/v1/users | jq
|
||||
```
|
||||
> Одной ссылкой может пользоваться сколько угодно человек.
|
||||
|
||||
**6.** Для получения ссылки введите `journalctl -u telemt -n -g "links" --no-pager -o cat | tac`
|
||||
> [!WARNING]
|
||||
> Рабочую ссылку может выдать только команда из 6 пункта. Не пытайтесь делать ее самостоятельно или копировать откуда-либо!
|
||||
> Рабочую ссылку может выдать только команда из 6 пункта. Не пытайтесь делать ее самостоятельно или копировать откуда-либо если вы не уверены в том, что делаете!
|
||||
|
||||
---
|
||||
|
||||
|
||||
285
docs/model/MODEL.en.md
Normal file
285
docs/model/MODEL.en.md
Normal file
@@ -0,0 +1,285 @@
|
||||
# Telemt Runtime Model
|
||||
|
||||
## Scope
|
||||
This document defines runtime concepts used by the Middle-End (ME) transport pipeline and the orchestration logic around it.
|
||||
|
||||
It focuses on:
|
||||
- `ME Pool / Reader / Writer / Refill / Registry`
|
||||
- `Adaptive Floor`
|
||||
- `Trio-State`
|
||||
- `Generation Lifecycle`
|
||||
|
||||
## Core Entities
|
||||
|
||||
### ME Pool
|
||||
`ME Pool` is the runtime orchestrator for all Middle-End writers.
|
||||
|
||||
Responsibilities:
|
||||
- Holds writer inventory by DC/family/endpoint.
|
||||
- Maintains routing primitives and writer selection policy.
|
||||
- Tracks generation state (`active`, `warm`, `draining` context).
|
||||
- Applies runtime policies (floor mode, refill, reconnect, reinit, fallback behavior).
|
||||
- Exposes readiness gates used by admission logic (for conditional accept/cast behavior).
|
||||
|
||||
Non-goals:
|
||||
- It does not own client protocol decoding.
|
||||
- It does not own per-client business policy (quotas/limits).
|
||||
|
||||
### ME Writer
|
||||
`ME Writer` is a long-lived ME RPC tunnel bound to one concrete ME endpoint (`ip:port`), with:
|
||||
- Outbound command channel (send path).
|
||||
- Associated reader loop (inbound path).
|
||||
- Health/degraded flags.
|
||||
- Contour/state and generation metadata.
|
||||
|
||||
A writer is the actual data plane carrier for client sessions once bound.
|
||||
|
||||
### ME Reader
|
||||
`ME Reader` is the inbound parser/dispatcher for one writer:
|
||||
- Reads/decrypts ME RPC frames.
|
||||
- Validates sequence/checksum.
|
||||
- Routes payloads to client-connection channels via `Registry`.
|
||||
- Emits close/ack/data events and updates telemetry.
|
||||
|
||||
Design intent:
|
||||
- Reader must stay non-blocking as much as possible.
|
||||
- Backpressure on a single client route must not stall the whole writer stream.
|
||||
|
||||
### Refill
|
||||
`Refill` is the recovery mechanism that restores writer coverage when capacity drops:
|
||||
- Per-endpoint restore (same endpoint first).
|
||||
- Per-DC restore to satisfy required floor.
|
||||
- Optional outage-mode/shadow behavior for fragile single-endpoint DCs.
|
||||
|
||||
Refill works asynchronously and should not block hot routing paths.
|
||||
|
||||
### Registry
|
||||
`Registry` is the routing index between ME and client sessions:
|
||||
- `conn_id -> client response channel`
|
||||
- `conn_id <-> writer_id` binding map
|
||||
- writer activity snapshots and idle tracking
|
||||
|
||||
Main invariants:
|
||||
- A `conn_id` routes to at most one active response channel.
|
||||
- Writer loss triggers safe unbind/cleanup and close propagation.
|
||||
- Registry state is the source of truth for active ME-bound session mapping.
|
||||
|
||||
## Adaptive Floor
|
||||
|
||||
### What it is
|
||||
`Adaptive Floor` is a runtime policy that changes target writer count per DC based on observed activity, instead of always holding static peak floor.
|
||||
|
||||
### Why it exists
|
||||
Goals:
|
||||
- Reduce idle writer churn under low traffic.
|
||||
- Keep enough warm capacity to avoid client-visible stalls on burst recovery.
|
||||
- Limit needless reconnect storms on unstable endpoints.
|
||||
|
||||
### Behavioral model
|
||||
- Under activity: floor converges toward configured static requirement.
|
||||
- Under prolonged idle: floor can shrink to a safe minimum.
|
||||
- Recovery/grace windows prevent aggressive oscillation.
|
||||
|
||||
### Safety constraints
|
||||
- Never violate minimal survivability floor for a DC group.
|
||||
- Refill must still restore quickly on demand.
|
||||
- Floor adaptation must not force-drop already bound healthy sessions.
|
||||
|
||||
## Trio-State
|
||||
|
||||
`Trio-State` is writer contouring:
|
||||
- `Warm`
|
||||
- `Active`
|
||||
- `Draining`
|
||||
|
||||
### State semantics
|
||||
- `Warm`: connected and validated, not primary for new binds.
|
||||
- `Active`: preferred for new binds and normal traffic.
|
||||
- `Draining`: no new regular binds; existing sessions continue until graceful retirement rules apply.
|
||||
|
||||
### Transition intent
|
||||
- `Warm -> Active`: when coverage/readiness conditions are satisfied.
|
||||
- `Active -> Draining`: on generation swap, endpoint replacement, or controlled retirement.
|
||||
- `Draining -> removed`: after drain TTL/force-close policy (or when naturally empty).
|
||||
|
||||
This separation reduces SPOF and keeps cutovers predictable.
|
||||
|
||||
## Generation Lifecycle
|
||||
|
||||
Generation isolates pool epochs during reinit/reconfiguration.
|
||||
|
||||
### Lifecycle phases
|
||||
1. `Bootstrap`: initial writers are established.
|
||||
2. `Warmup`: next generation writers are created and validated.
|
||||
3. `Activation`: generation promoted to active when coverage gate passes.
|
||||
4. `Drain`: previous generation becomes draining, existing sessions are allowed to finish.
|
||||
5. `Retire`: old generation writers are removed after graceful rules.
|
||||
|
||||
### Operational guarantees
|
||||
- No partial generation activation without minimum coverage.
|
||||
- Existing healthy client sessions should not be dropped just because a new generation appears.
|
||||
- Draining generation exists to absorb in-flight traffic during swap.
|
||||
|
||||
### Readiness and admission
|
||||
Pool readiness is not equivalent to “all endpoints fully saturated”.
|
||||
Typical gating strategy:
|
||||
- Open admission when per-DC minimal alive coverage exists.
|
||||
- Continue background saturation for multi-endpoint DCs.
|
||||
|
||||
This keeps startup latency low while preserving eventual full capacity.
|
||||
|
||||
## Interactions Between Concepts
|
||||
|
||||
- `Generation` defines pool epochs.
|
||||
- `Trio-State` defines per-writer role inside/around those epochs.
|
||||
- `Adaptive Floor` defines how much capacity should be maintained right now.
|
||||
- `Refill` is the actuator that closes the gap between desired and current capacity.
|
||||
- `Registry` keeps per-session routing correctness while all of the above changes over time.
|
||||
|
||||
## Architectural Approach
|
||||
|
||||
### Layered Design
|
||||
The runtime is intentionally split into two planes:
|
||||
- `Control Plane`: decides desired topology and policy (`floor`, `generation swap`, `refill`, `fallback`).
|
||||
- `Data Plane`: executes packet/session transport (`reader`, `writer`, routing, acks, close propagation).
|
||||
|
||||
Architectural rule:
|
||||
- Control Plane may change writer inventory and policy.
|
||||
- Data Plane must remain stable and low-latency while those changes happen.
|
||||
|
||||
### Ownership Model
|
||||
Ownership is centered around explicit state domains:
|
||||
- `MePool` owns writer lifecycle and policy state.
|
||||
- `Registry` owns per-connection routing bindings.
|
||||
- `Writer task` owns outbound ME socket send progression.
|
||||
- `Reader task` owns inbound ME socket parsing and event dispatch.
|
||||
|
||||
This prevents accidental cross-layer mutation and keeps invariants local.
|
||||
|
||||
### Control Plane Responsibilities
|
||||
Control Plane is event-driven and policy-driven:
|
||||
- Startup initialization and readiness gates.
|
||||
- Runtime reinit (periodic or config-triggered).
|
||||
- Coverage checks per DC/family/endpoint group.
|
||||
- Floor enforcement (static/adaptive).
|
||||
- Refill scheduling and retry orchestration.
|
||||
- Generation transition (`warm -> active`, previous `active -> draining`).
|
||||
|
||||
Control Plane must prioritize determinism over short-term aggressiveness.
|
||||
|
||||
### Data Plane Responsibilities
|
||||
Data Plane is throughput-first and allocation-sensitive:
|
||||
- Session bind to writer.
|
||||
- Per-frame parsing/validation and dispatch.
|
||||
- Ack and close signal propagation.
|
||||
- Route drop behavior under missing connection or closed channel.
|
||||
- Minimal critical logging in hot path.
|
||||
|
||||
Data Plane should avoid waiting on operations that are not strictly required for frame correctness.
|
||||
|
||||
## Concurrency and Synchronization
|
||||
|
||||
### Concurrency Principles
|
||||
- Per-writer isolation: each writer has independent send/read task loops.
|
||||
- Per-connection isolation: client channel state is scoped by `conn_id`.
|
||||
- Asynchronous recovery: refill/reconnect runs outside the packet hot path.
|
||||
|
||||
### Synchronization Strategy
|
||||
- Shared maps use fine-grained, short-lived locking.
|
||||
- Read-mostly paths avoid broad write-lock windows.
|
||||
- Backpressure decisions are localized at route/channel boundary.
|
||||
|
||||
Design target:
|
||||
- A slow consumer should degrade only itself (or its route), not global writer progress.
|
||||
|
||||
### Cancellation and Shutdown
|
||||
Writer and reader loops are cancellation-aware:
|
||||
- explicit cancel token / close command support;
|
||||
- safe unbind and cleanup via registry;
|
||||
- deterministic order: stop admission -> drain/close -> release resources.
|
||||
|
||||
## Consistency Model
|
||||
|
||||
### Session Consistency
|
||||
For one `conn_id`:
|
||||
- exactly one active route target at a time;
|
||||
- close and unbind must be idempotent;
|
||||
- writer loss must not leave dangling bindings.
|
||||
|
||||
### Generation Consistency
|
||||
Generational consistency guarantees:
|
||||
- New generation is not promoted before minimum coverage gate.
|
||||
- Previous generation remains available in `draining` state during handover.
|
||||
- Forced retirement is policy-bound (`drain ttl`, optional force-close), not immediate.
|
||||
|
||||
### Policy Consistency
|
||||
Policy changes (`adaptive/static floor`, fallback mode, retries) should apply without violating established active-session routing invariants.
|
||||
|
||||
## Backpressure and Flow Control
|
||||
|
||||
### Route-Level Backpressure
|
||||
Route channels are bounded by design.
|
||||
When pressure increases:
|
||||
- short burst absorption is allowed;
|
||||
- prolonged congestion triggers controlled drop semantics;
|
||||
- drop accounting is explicit via metrics/counters.
|
||||
|
||||
### Reader Non-Blocking Priority
|
||||
Inbound ME reader path should never be serialized behind one congested client route.
|
||||
Practical implication:
|
||||
- prefer non-blocking route attempt in the parser loop;
|
||||
- move heavy recovery to async side paths.
|
||||
|
||||
## Failure Domain Strategy
|
||||
|
||||
### Endpoint-Level Failure
|
||||
Failure of one endpoint should trigger endpoint-scoped recovery first:
|
||||
- same endpoint reconnect;
|
||||
- endpoint replacement within same DC group if applicable.
|
||||
|
||||
### DC-Level Degradation
|
||||
If a DC group cannot satisfy floor:
|
||||
- keep service via remaining coverage if policy allows;
|
||||
- continue asynchronous refill saturation in background.
|
||||
|
||||
### Whole-Pool Readiness Loss
|
||||
If no sufficient ME coverage exists:
|
||||
- admission gate can hold new accepts (conditional policy);
|
||||
- existing sessions should continue when their path remains healthy.
|
||||
|
||||
## Performance Architecture Notes
|
||||
|
||||
### Hotpath Discipline
|
||||
Allowed in hotpath:
|
||||
- fixed-size parsing and cheap validation;
|
||||
- bounded channel operations;
|
||||
- precomputed or low-allocation access patterns.
|
||||
|
||||
Avoid in hotpath:
|
||||
- repeated expensive decoding;
|
||||
- broad locks with awaits inside critical sections;
|
||||
- verbose high-frequency logging.
|
||||
|
||||
### Throughput Stability Over Peak Spikes
|
||||
Architecture prefers stable throughput and predictable latency over short peak gains that increase churn or long-tail reconnect times.
|
||||
|
||||
## Evolution and Extension Rules
|
||||
|
||||
To evolve this model safely:
|
||||
- Add new policy knobs in Control Plane first.
|
||||
- Keep Data Plane contracts stable (`conn_id`, route semantics, close semantics).
|
||||
- Validate generation and registry invariants before enabling by default.
|
||||
- Introduce new retry/recovery strategies behind explicit config.
|
||||
|
||||
## Failure and Recovery Notes
|
||||
|
||||
- Single-endpoint DC failure is a normal degraded mode case; policy should prioritize fast reconnect and optional shadow/probing strategies.
|
||||
- Idle close by peer should be treated as expected when upstream enforces idle timeout.
|
||||
- Reconnect backoff must protect against synchronized churn while still allowing fast first retries.
|
||||
- Fallback (`ME -> direct DC`) is a policy switch, not a transport bug by itself.
|
||||
|
||||
## Terminology Summary
|
||||
- `Coverage`: enough live writers to satisfy per-DC acceptance policy.
|
||||
- `Floor`: target minimum writer count policy.
|
||||
- `Churn`: frequent writer reconnect/remove cycles.
|
||||
- `Hotpath`: per-packet/per-connection data path where extra waits/allocations are expensive.
|
||||
285
docs/model/MODEL.ru.md
Normal file
285
docs/model/MODEL.ru.md
Normal file
@@ -0,0 +1,285 @@
|
||||
# Runtime-модель Telemt
|
||||
|
||||
## Область описания
|
||||
Документ фиксирует ключевые runtime-понятия пайплайна Middle-End (ME) и оркестрации вокруг него.
|
||||
|
||||
Фокус:
|
||||
- `ME Pool / Reader / Writer / Refill / Registry`
|
||||
- `Adaptive Floor`
|
||||
- `Trio-State`
|
||||
- `Generation Lifecycle`
|
||||
|
||||
## Базовые сущности
|
||||
|
||||
### ME Pool
|
||||
`ME Pool` — центральный оркестратор всех Middle-End writer-ов.
|
||||
|
||||
Зона ответственности:
|
||||
- хранит инвентарь writer-ов по DC/family/endpoint;
|
||||
- управляет выбором writer-а и маршрутизацией;
|
||||
- ведёт состояние поколений (`active`, `warm`, `draining` контекст);
|
||||
- применяет runtime-политики (floor, refill, reconnect, reinit, fallback);
|
||||
- отдаёт сигналы готовности для admission-логики (conditional accept/cast).
|
||||
|
||||
Что не делает:
|
||||
- не декодирует клиентский протокол;
|
||||
- не реализует бизнес-политику пользователя (квоты/лимиты).
|
||||
|
||||
### ME Writer
|
||||
`ME Writer` — долгоживущий ME RPC-канал к конкретному endpoint (`ip:port`), у которого есть:
|
||||
- канал команд на отправку;
|
||||
- связанный reader loop для входящего потока;
|
||||
- флаги состояния/деградации;
|
||||
- метаданные contour/state и generation.
|
||||
|
||||
Writer — это фактический data-plane носитель клиентских сессий после бинда.
|
||||
|
||||
### ME Reader
|
||||
`ME Reader` — входной parser/dispatcher одного writer-а:
|
||||
- читает и расшифровывает ME RPC-фреймы;
|
||||
- проверяет sequence/checksum;
|
||||
- маршрутизирует payload в client-каналы через `Registry`;
|
||||
- обрабатывает close/ack/data и обновляет телеметрию.
|
||||
|
||||
Инженерный принцип:
|
||||
- Reader должен оставаться неблокирующим.
|
||||
- Backpressure одной клиентской сессии не должен останавливать весь поток writer-а.
|
||||
|
||||
### Refill
|
||||
`Refill` — механизм восстановления покрытия writer-ов при просадке:
|
||||
- восстановление на том же endpoint в первую очередь;
|
||||
- восстановление по DC до требуемого floor;
|
||||
- опциональные outage/shadow-режимы для хрупких single-endpoint DC.
|
||||
|
||||
Refill работает асинхронно и не должен блокировать hotpath.
|
||||
|
||||
### Registry
|
||||
`Registry` — маршрутизационный индекс между ME и клиентскими сессиями:
|
||||
- `conn_id -> канал ответа клиенту`;
|
||||
- map биндов `conn_id <-> writer_id`;
|
||||
- снимки активности writer-ов и idle-трекинг.
|
||||
|
||||
Ключевые инварианты:
|
||||
- один `conn_id` маршрутизируется максимум в один активный канал ответа;
|
||||
- потеря writer-а приводит к безопасному unbind/cleanup и отправке close;
|
||||
- именно `Registry` является источником истины по активным ME-биндам.
|
||||
|
||||
## Adaptive Floor
|
||||
|
||||
### Что это
|
||||
`Adaptive Floor` — runtime-политика, которая динамически меняет целевое число writer-ов на DC в зависимости от активности, а не держит всегда фиксированный статический floor.
|
||||
|
||||
### Зачем
|
||||
Цели:
|
||||
- уменьшить churn на idle-трафике;
|
||||
- сохранить достаточную прогретую ёмкость для быстрых всплесков;
|
||||
- снизить лишние reconnect-штормы на нестабильных endpoint.
|
||||
|
||||
### Модель поведения
|
||||
- при активности floor стремится к статическому требованию;
|
||||
- при длительном idle floor может снижаться до безопасного минимума;
|
||||
- grace/recovery окна не дают системе "флапать" слишком резко.
|
||||
|
||||
### Ограничения безопасности
|
||||
- нельзя нарушать минимальный floor выживаемости DC-группы;
|
||||
- refill обязан быстро нарастить покрытие по запросу;
|
||||
- адаптация не должна принудительно ронять уже привязанные healthy-сессии.
|
||||
|
||||
## Trio-State
|
||||
|
||||
`Trio-State` — контурная роль writer-а:
|
||||
- `Warm`
|
||||
- `Active`
|
||||
- `Draining`
|
||||
|
||||
### Семантика состояний
|
||||
- `Warm`: writer подключён и валиден, но не основной для новых биндов.
|
||||
- `Active`: приоритетный для новых биндов и обычного трафика.
|
||||
- `Draining`: новые обычные бинды не назначаются; текущие сессии живут до правил graceful-вывода.
|
||||
|
||||
### Логика переходов
|
||||
- `Warm -> Active`: когда достигнуты условия покрытия/готовности.
|
||||
- `Active -> Draining`: при swap поколения, замене endpoint или контролируемом выводе.
|
||||
- `Draining -> removed`: после drain TTL/force-close политики (или естественного опустошения).
|
||||
|
||||
Такое разделение снижает SPOF-риски и делает cutover предсказуемым.
|
||||
|
||||
## Generation Lifecycle
|
||||
|
||||
Generation изолирует эпохи пула при reinit/reconfiguration.
|
||||
|
||||
### Фазы жизненного цикла
|
||||
1. `Bootstrap`: поднимается начальный набор writer-ов.
|
||||
2. `Warmup`: создаётся и валидируется новое поколение.
|
||||
3. `Activation`: новое поколение становится active после прохождения coverage-gate.
|
||||
4. `Drain`: предыдущее поколение переводится в draining, текущим сессиям дают завершиться.
|
||||
5. `Retire`: старое поколение удаляется по graceful-правилам.
|
||||
|
||||
### Операционные гарантии
|
||||
- нельзя активировать поколение частично без минимального покрытия;
|
||||
- healthy-клиенты не должны теряться только из-за появления нового поколения;
|
||||
- draining-поколение служит буфером для in-flight трафика во время swap.
|
||||
|
||||
### Готовность и приём клиентов
|
||||
Готовность пула не равна "все endpoint полностью насыщены".
|
||||
Типичная стратегия:
|
||||
- открыть admission при минимально достаточном alive-покрытии по DC;
|
||||
- параллельно продолжать saturation для multi-endpoint DC.
|
||||
|
||||
Это уменьшает startup latency и сохраняет выход на полную ёмкость.
|
||||
|
||||
## Как понятия связаны между собой
|
||||
|
||||
- `Generation` задаёт эпохи пула.
|
||||
- `Trio-State` задаёт роль каждого writer-а внутри/между эпохами.
|
||||
- `Adaptive Floor` задаёт, сколько ёмкости нужно сейчас.
|
||||
- `Refill` — исполнитель, который закрывает разницу между desired и current capacity.
|
||||
- `Registry` гарантирует корректную маршрутизацию сессий, пока всё выше меняется.
|
||||
|
||||
## Архитектурный подход
|
||||
|
||||
### Слоистая модель
|
||||
Runtime специально разделён на две плоскости:
|
||||
- `Control Plane`: принимает решения о целевой топологии и политиках (`floor`, `generation swap`, `refill`, `fallback`).
|
||||
- `Data Plane`: исполняет транспорт сессий и пакетов (`reader`, `writer`, маршрутизация, ack, close).
|
||||
|
||||
Ключевое правило:
|
||||
- Control Plane может менять состав writer-ов и policy.
|
||||
- Data Plane должен оставаться стабильным и низколатентным в момент этих изменений.
|
||||
|
||||
### Модель владения состоянием
|
||||
Владение разделено по доменам:
|
||||
- `MePool` владеет жизненным циклом writer-ов и policy-state.
|
||||
- `Registry` владеет routing-биндами клиентских сессий.
|
||||
- `Writer task` владеет исходящей прогрессией ME-сокета.
|
||||
- `Reader task` владеет входящим парсингом и dispatch-событиями.
|
||||
|
||||
Это ограничивает побочные мутации и локализует инварианты.
|
||||
|
||||
### Обязанности Control Plane
|
||||
Control Plane работает событийно и policy-ориентированно:
|
||||
- стартовая инициализация и readiness-gate;
|
||||
- runtime reinit (периодический и/или по изменению конфигурации);
|
||||
- проверки покрытия по DC/family/endpoint group;
|
||||
- применение floor-политики (static/adaptive);
|
||||
- планирование refill и orchestration retry;
|
||||
- переходы поколений (`warm -> active`, прежний `active -> draining`).
|
||||
|
||||
Для него важнее детерминизм, чем агрессивная краткосрочная реакция.
|
||||
|
||||
### Обязанности Data Plane
|
||||
Data Plane ориентирован на пропускную способность и предсказуемую задержку:
|
||||
- bind клиентской сессии к writer-у;
|
||||
- per-frame parsing/validation/dispatch;
|
||||
- распространение ack/close;
|
||||
- корректная реакция на missing conn/closed channel;
|
||||
- минимальный лог-шум в hotpath.
|
||||
|
||||
Data Plane не должен ждать операций, не критичных для корректности текущего фрейма.
|
||||
|
||||
## Конкурентность и синхронизация
|
||||
|
||||
### Принципы конкурентности
|
||||
- Изоляция по writer-у: у каждого writer-а независимые send/read loop.
|
||||
- Изоляция по сессии: состояние канала локально для `conn_id`.
|
||||
- Асинхронное восстановление: refill/reconnect выполняются вне пакетного hotpath.
|
||||
|
||||
### Стратегия синхронизации
|
||||
- Для shared map используются короткие и узкие lock-секции.
|
||||
- Read-heavy пути избегают длительных write-lock окон.
|
||||
- Решения по backpressure локализованы на границе route/channel.
|
||||
|
||||
Цель:
|
||||
- медленный consumer должен деградировать локально, не останавливая глобальный прогресс writer-а.
|
||||
|
||||
### Cancellation и shutdown
|
||||
Reader/Writer loop должны быть cancellation-aware:
|
||||
- явные cancel token / close command;
|
||||
- безопасный unbind/cleanup через registry;
|
||||
- детерминированный порядок: stop admission -> drain/close -> release resources.
|
||||
|
||||
## Модель согласованности
|
||||
|
||||
### Согласованность сессии
|
||||
Для одного `conn_id`:
|
||||
- одновременно ровно один активный route-target;
|
||||
- close/unbind операции идемпотентны;
|
||||
- потеря writer-а не оставляет dangling-бинды.
|
||||
|
||||
### Согласованность поколения
|
||||
Гарантии generation:
|
||||
- новое поколение не активируется до прохождения минимального coverage-gate;
|
||||
- предыдущее поколение остаётся в `draining` на время handover;
|
||||
- принудительный вывод writer-ов ограничен policy (`drain ttl`, optional force-close), а не мгновенный.
|
||||
|
||||
### Согласованность политик
|
||||
Изменение policy (`adaptive/static floor`, fallback mode, retries) не должно ломать инварианты маршрутизации уже активных сессий.
|
||||
|
||||
## Backpressure и управление потоком
|
||||
|
||||
### Route-level backpressure
|
||||
Route-каналы намеренно bounded.
|
||||
При росте нагрузки:
|
||||
- кратковременный burst поглощается;
|
||||
- длительная перегрузка переходит в контролируемую drop-семантику;
|
||||
- все drop-сценарии должны быть прозрачно видны в метриках.
|
||||
|
||||
### Приоритет неблокирующего Reader
|
||||
Входящий ME-reader path не должен сериализоваться из-за одной перегруженной клиентской сессии.
|
||||
Практически это означает:
|
||||
- использовать неблокирующую попытку route в parser loop;
|
||||
- выносить тяжёлое восстановление в асинхронные side-path.
|
||||
|
||||
## Стратегия доменов отказа
|
||||
|
||||
### Отказ отдельного endpoint
|
||||
Сначала применяется endpoint-local recovery:
|
||||
- reconnect в тот же endpoint;
|
||||
- затем замена endpoint внутри той же DC-группы (если доступно).
|
||||
|
||||
### Деградация уровня DC
|
||||
Если DC-группа не набирает floor:
|
||||
- сервис сохраняется на остаточном покрытии (если policy разрешает);
|
||||
- saturation refill продолжается асинхронно в фоне.
|
||||
|
||||
### Потеря готовности всего пула
|
||||
Если достаточного ME-покрытия нет:
|
||||
- admission gate может временно закрыть приём новых подключений (conditional policy);
|
||||
- уже активные сессии продолжают работать, пока их маршрут остаётся healthy.
|
||||
|
||||
## Архитектурные заметки по производительности
|
||||
|
||||
### Дисциплина hotpath
|
||||
Допустимо в hotpath:
|
||||
- фиксированный и дешёвый parsing/validation;
|
||||
- bounded channel operations;
|
||||
- precomputed/low-allocation доступ к данным.
|
||||
|
||||
Нежелательно в hotpath:
|
||||
- повторные дорогие decode;
|
||||
- широкие lock-секции с `await` внутри;
|
||||
- высокочастотный подробный logging.
|
||||
|
||||
### Стабильность важнее пиков
|
||||
Архитектура приоритетно выбирает стабильную пропускную способность и предсказуемую latency, а не краткосрочные пики ценой churn и long-tail reconnect.
|
||||
|
||||
## Правила эволюции модели
|
||||
|
||||
Чтобы расширять модель безопасно:
|
||||
- новые policy knobs сначала внедрять в Control Plane;
|
||||
- контракты Data Plane (`conn_id`, route/close семантика) держать стабильными;
|
||||
- перед дефолтным включением проверять generation/registry инварианты;
|
||||
- новые recovery/retry стратегии вводить через явный config-флаг.
|
||||
|
||||
## Нюансы отказов и восстановления
|
||||
|
||||
- падение single-endpoint DC — штатный деградированный сценарий; приоритет: быстрый reconnect и, при необходимости, shadow/probing;
|
||||
- idle-close со стороны peer должен считаться нормальным событием при upstream idle-timeout;
|
||||
- backoff reconnect-логики должен ограничивать синхронный churn, но сохранять быстрые первые попытки;
|
||||
- fallback (`ME -> direct DC`) — это переключаемая policy-ветка, а не автоматический признак бага транспорта.
|
||||
|
||||
## Краткий словарь
|
||||
- `Coverage`: достаточное число живых writer-ов для политики приёма по DC.
|
||||
- `Floor`: целевая минимальная ёмкость writer-ов.
|
||||
- `Churn`: частые циклы reconnect/remove writer-ов.
|
||||
- `Hotpath`: пер-пакетный/пер-коннектный путь, где любые лишние ожидания и аллокации особенно дороги.
|
||||
160
install.sh
160
install.sh
@@ -1,73 +1,115 @@
|
||||
sudo bash -c '
|
||||
set -e
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
# --- Проверка на существующую установку ---
|
||||
if systemctl list-unit-files | grep -q telemt.service; then
|
||||
# --- РЕЖИМ ОБНОВЛЕНИЯ ---
|
||||
echo "--- Обнаружена существующая установка Telemt. Запускаю обновление... ---"
|
||||
REPO="${REPO:-telemt/telemt}"
|
||||
BIN_NAME="${BIN_NAME:-telemt}"
|
||||
VERSION="${1:-${VERSION:-latest}}"
|
||||
INSTALL_DIR="${INSTALL_DIR:-/usr/local/bin}"
|
||||
|
||||
echo "[*] Остановка службы telemt..."
|
||||
systemctl stop telemt || true # Игнорируем ошибку, если служба уже остановлена
|
||||
say() {
|
||||
printf '%s\n' "$*"
|
||||
}
|
||||
|
||||
echo "[1/2] Скачивание последней версии Telemt..."
|
||||
wget -qO- "https://github.com/telemt/telemt/releases/latest/download/telemt-$(uname -m)-linux-$(ldd --version 2>&1 | grep -iq musl && echo musl || echo gnu).tar.gz" | tar -xz
|
||||
die() {
|
||||
printf 'Error: %s\n' "$*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "[1/2] Замена исполняемого файла в /usr/local/bin..."
|
||||
mv telemt /usr/local/bin/telemt
|
||||
chmod +x /usr/local/bin/telemt
|
||||
need_cmd() {
|
||||
command -v "$1" >/dev/null 2>&1 || die "required command not found: $1"
|
||||
}
|
||||
|
||||
echo "[2/2] Запуск службы..."
|
||||
systemctl start telemt
|
||||
detect_os() {
|
||||
os="$(uname -s)"
|
||||
case "$os" in
|
||||
Linux) printf 'linux\n' ;;
|
||||
OpenBSD) printf 'openbsd\n' ;;
|
||||
*) printf '%s\n' "$os" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
echo "--- Обновление Telemt успешно завершено! ---"
|
||||
echo
|
||||
echo "Для проверки статуса службы выполните:"
|
||||
echo " systemctl status telemt"
|
||||
detect_arch() {
|
||||
arch="$(uname -m)"
|
||||
case "$arch" in
|
||||
x86_64|amd64) printf 'x86_64\n' ;;
|
||||
aarch64|arm64) printf 'aarch64\n' ;;
|
||||
*) die "unsupported architecture: $arch" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
else
|
||||
# --- РЕЖИМ НОВОЙ УСТАНОВКИ ---
|
||||
echo "--- Начало автоматической установки Telemt ---"
|
||||
detect_libc() {
|
||||
case "$(ldd --version 2>&1 || true)" in
|
||||
*musl*) printf 'musl\n' ;;
|
||||
*) printf 'gnu\n' ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Шаг 1: Скачивание и установка бинарного файла
|
||||
echo "[1/5] Скачивание последней версии Telemt..."
|
||||
wget -qO- "https://github.com/telemt/telemt/releases/latest/download/telemt-$(uname -m)-linux-$(ldd --version 2>&1 | grep -iq musl && echo musl || echo gnu).tar.gz" | tar -xz
|
||||
fetch_to_stdout() {
|
||||
url="$1"
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
curl -fsSL "$url"
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
wget -qO- "$url"
|
||||
else
|
||||
die "neither curl nor wget is installed"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "[1/5] Перемещение исполняемого файла в /usr/local/bin и установка прав..."
|
||||
mv telemt /usr/local/bin/telemt
|
||||
chmod +x /usr/local/bin/telemt
|
||||
install_binary() {
|
||||
src="$1"
|
||||
dst="$2"
|
||||
|
||||
# Шаг 2: Генерация секрета
|
||||
echo "[2/5] Генерация секретного ключа..."
|
||||
SECRET=$(openssl rand -hex 16)
|
||||
if [ -w "$INSTALL_DIR" ] || { [ ! -e "$INSTALL_DIR" ] && [ -w "$(dirname "$INSTALL_DIR")" ]; }; then
|
||||
mkdir -p "$INSTALL_DIR"
|
||||
install -m 0755 "$src" "$dst"
|
||||
elif command -v sudo >/dev/null 2>&1; then
|
||||
sudo mkdir -p "$INSTALL_DIR"
|
||||
sudo install -m 0755 "$src" "$dst"
|
||||
else
|
||||
die "cannot write to $INSTALL_DIR and sudo is not available"
|
||||
fi
|
||||
}
|
||||
|
||||
# Шаг 3: Создание файла конфигурации
|
||||
echo "[3/5] Создание файла конфигурации /etc/telemt.toml..."
|
||||
printf "# === General Settings ===\n[general]\n[general.modes]\nclassic = false\nsecure = false\ntls = true\n\n# === Anti-Censorship & Masking ===\n[censorship]\n# !!! ВАЖНО: Замените на ваш домен или домен, который вы хотите использовать для маскировки !!!\ntls_domain = \"petrovich.ru\"\n\n[access.users]\nhello = \"%s\"\n" "$SECRET" > /etc/telemt.toml
|
||||
need_cmd uname
|
||||
need_cmd tar
|
||||
need_cmd mktemp
|
||||
need_cmd grep
|
||||
need_cmd install
|
||||
|
||||
# Шаг 4: Создание службы Systemd
|
||||
echo "[4/5] Создание службы systemd..."
|
||||
printf "[Unit]\nDescription=Telemt Proxy\nAfter=network.target\n\n[Service]\nType=simple\nExecStart=/usr/local/bin/telemt /etc/telemt.toml\nRestart=on-failure\nRestartSec=5\nLimitNOFILE=65536\n\n[Install]\nWantedBy=multi-user.target\n" > /etc/systemd/system/telemt.service
|
||||
ARCH="$(detect_arch)"
|
||||
OS="$(detect_os)"
|
||||
|
||||
# Шаг 5: Запуск службы
|
||||
echo "[5/5] Перезагрузка systemd, запуск и включение службы telemt..."
|
||||
systemctl daemon-reload
|
||||
systemctl start telemt
|
||||
systemctl enable telemt
|
||||
|
||||
echo "--- Установка и запуск Telemt успешно завершены! ---"
|
||||
echo
|
||||
echo "ВАЖНАЯ ИНФОРМАЦИЯ:"
|
||||
echo "==================="
|
||||
echo "1. Вам НЕОБХОДИМО отредактировать файл /etc/telemt.toml и заменить '\''petrovich.ru'\'' на другой домен"
|
||||
echo " с помощью команды:"
|
||||
echo " nano /etc/telemt.toml"
|
||||
echo " После редактирования файла перезапустите службу командой:"
|
||||
echo " sudo systemctl restart telemt"
|
||||
echo
|
||||
echo "2. Для проверки статуса службы выполните команду:"
|
||||
echo " systemctl status telemt"
|
||||
echo
|
||||
echo "3. Для получения ссылок на подключение выполните команду:"
|
||||
echo " journalctl -u telemt -n -g '\''links'\'' --no-pager -o cat | tac"
|
||||
if [ "$OS" != "linux" ]; then
|
||||
case "$OS" in
|
||||
openbsd)
|
||||
die "install.sh installs only Linux release artifacts. On OpenBSD, build from source (see docs/OPENBSD.en.md)."
|
||||
;;
|
||||
*)
|
||||
die "unsupported operating system for install.sh: $OS"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
'
|
||||
|
||||
LIBC="$(detect_libc)"
|
||||
|
||||
case "$VERSION" in
|
||||
latest)
|
||||
URL="https://github.com/$REPO/releases/latest/download/${BIN_NAME}-${ARCH}-linux-${LIBC}.tar.gz"
|
||||
;;
|
||||
*)
|
||||
URL="https://github.com/$REPO/releases/download/${VERSION}/${BIN_NAME}-${ARCH}-linux-${LIBC}.tar.gz"
|
||||
;;
|
||||
esac
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
trap 'rm -rf "$TMPDIR"' EXIT INT TERM
|
||||
|
||||
say "Installing $BIN_NAME ($VERSION) for $ARCH-linux-$LIBC..."
|
||||
fetch_to_stdout "$URL" | tar -xzf - -C "$TMPDIR"
|
||||
|
||||
[ -f "$TMPDIR/$BIN_NAME" ] || die "archive did not contain $BIN_NAME"
|
||||
|
||||
install_binary "$TMPDIR/$BIN_NAME" "$INSTALL_DIR/$BIN_NAME"
|
||||
|
||||
say "Installed: $INSTALL_DIR/$BIN_NAME"
|
||||
"$INSTALL_DIR/$BIN_NAME" --version 2>/dev/null || true
|
||||
|
||||
269
src/api/config_store.rs
Normal file
269
src/api/config_store.rs
Normal file
@@ -0,0 +1,269 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use hyper::header::IF_MATCH;
|
||||
use serde::Serialize;
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
|
||||
use super::model::ApiFailure;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(super) enum AccessSection {
|
||||
Users,
|
||||
UserAdTags,
|
||||
UserMaxTcpConns,
|
||||
UserExpirations,
|
||||
UserDataQuota,
|
||||
UserMaxUniqueIps,
|
||||
}
|
||||
|
||||
impl AccessSection {
|
||||
fn table_name(self) -> &'static str {
|
||||
match self {
|
||||
Self::Users => "access.users",
|
||||
Self::UserAdTags => "access.user_ad_tags",
|
||||
Self::UserMaxTcpConns => "access.user_max_tcp_conns",
|
||||
Self::UserExpirations => "access.user_expirations",
|
||||
Self::UserDataQuota => "access.user_data_quota",
|
||||
Self::UserMaxUniqueIps => "access.user_max_unique_ips",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn parse_if_match(headers: &hyper::HeaderMap) -> Option<String> {
|
||||
headers
|
||||
.get(IF_MATCH)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(|value| value.trim_matches('"').to_string())
|
||||
}
|
||||
|
||||
pub(super) async fn ensure_expected_revision(
|
||||
config_path: &Path,
|
||||
expected_revision: Option<&str>,
|
||||
) -> Result<(), ApiFailure> {
|
||||
let Some(expected) = expected_revision else {
|
||||
return Ok(());
|
||||
};
|
||||
let current = current_revision(config_path).await?;
|
||||
if current != expected {
|
||||
return Err(ApiFailure::new(
|
||||
hyper::StatusCode::CONFLICT,
|
||||
"revision_conflict",
|
||||
"Config revision mismatch",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) async fn current_revision(config_path: &Path) -> Result<String, ApiFailure> {
|
||||
let content = tokio::fs::read_to_string(config_path)
|
||||
.await
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to read config: {}", e)))?;
|
||||
Ok(compute_revision(&content))
|
||||
}
|
||||
|
||||
pub(super) fn compute_revision(content: &str) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(content.as_bytes());
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
pub(super) async fn load_config_from_disk(config_path: &Path) -> Result<ProxyConfig, ApiFailure> {
|
||||
let config_path = config_path.to_path_buf();
|
||||
tokio::task::spawn_blocking(move || ProxyConfig::load(config_path))
|
||||
.await
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to join config loader: {}", e)))?
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to load config: {}", e)))
|
||||
}
|
||||
|
||||
pub(super) async fn save_config_to_disk(
|
||||
config_path: &Path,
|
||||
cfg: &ProxyConfig,
|
||||
) -> Result<String, ApiFailure> {
|
||||
let serialized = toml::to_string_pretty(cfg)
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to serialize config: {}", e)))?;
|
||||
write_atomic(config_path.to_path_buf(), serialized.clone()).await?;
|
||||
Ok(compute_revision(&serialized))
|
||||
}
|
||||
|
||||
pub(super) async fn save_access_sections_to_disk(
|
||||
config_path: &Path,
|
||||
cfg: &ProxyConfig,
|
||||
sections: &[AccessSection],
|
||||
) -> Result<String, ApiFailure> {
|
||||
let mut content = tokio::fs::read_to_string(config_path)
|
||||
.await
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to read config: {}", e)))?;
|
||||
|
||||
let mut applied = Vec::new();
|
||||
for section in sections {
|
||||
if applied.contains(section) {
|
||||
continue;
|
||||
}
|
||||
let rendered = render_access_section(cfg, *section)?;
|
||||
content = upsert_toml_table(&content, section.table_name(), &rendered);
|
||||
applied.push(*section);
|
||||
}
|
||||
|
||||
write_atomic(config_path.to_path_buf(), content.clone()).await?;
|
||||
Ok(compute_revision(&content))
|
||||
}
|
||||
|
||||
fn render_access_section(cfg: &ProxyConfig, section: AccessSection) -> Result<String, ApiFailure> {
|
||||
let body = match section {
|
||||
AccessSection::Users => {
|
||||
let rows: BTreeMap<String, String> = cfg
|
||||
.access
|
||||
.users
|
||||
.iter()
|
||||
.map(|(key, value)| (key.clone(), value.clone()))
|
||||
.collect();
|
||||
serialize_table_body(&rows)?
|
||||
}
|
||||
AccessSection::UserAdTags => {
|
||||
let rows: BTreeMap<String, String> = cfg
|
||||
.access
|
||||
.user_ad_tags
|
||||
.iter()
|
||||
.map(|(key, value)| (key.clone(), value.clone()))
|
||||
.collect();
|
||||
serialize_table_body(&rows)?
|
||||
}
|
||||
AccessSection::UserMaxTcpConns => {
|
||||
let rows: BTreeMap<String, usize> = cfg
|
||||
.access
|
||||
.user_max_tcp_conns
|
||||
.iter()
|
||||
.map(|(key, value)| (key.clone(), *value))
|
||||
.collect();
|
||||
serialize_table_body(&rows)?
|
||||
}
|
||||
AccessSection::UserExpirations => {
|
||||
let rows: BTreeMap<String, DateTime<Utc>> = cfg
|
||||
.access
|
||||
.user_expirations
|
||||
.iter()
|
||||
.map(|(key, value)| (key.clone(), *value))
|
||||
.collect();
|
||||
serialize_table_body(&rows)?
|
||||
}
|
||||
AccessSection::UserDataQuota => {
|
||||
let rows: BTreeMap<String, u64> = cfg
|
||||
.access
|
||||
.user_data_quota
|
||||
.iter()
|
||||
.map(|(key, value)| (key.clone(), *value))
|
||||
.collect();
|
||||
serialize_table_body(&rows)?
|
||||
}
|
||||
AccessSection::UserMaxUniqueIps => {
|
||||
let rows: BTreeMap<String, usize> = cfg
|
||||
.access
|
||||
.user_max_unique_ips
|
||||
.iter()
|
||||
.map(|(key, value)| (key.clone(), *value))
|
||||
.collect();
|
||||
serialize_table_body(&rows)?
|
||||
}
|
||||
};
|
||||
|
||||
let mut out = format!("[{}]\n", section.table_name());
|
||||
if !body.is_empty() {
|
||||
out.push_str(&body);
|
||||
}
|
||||
if !out.ends_with('\n') {
|
||||
out.push('\n');
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
fn serialize_table_body<T: Serialize>(value: &T) -> Result<String, ApiFailure> {
|
||||
toml::to_string(value)
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to serialize access section: {}", e)))
|
||||
}
|
||||
|
||||
fn upsert_toml_table(source: &str, table_name: &str, replacement: &str) -> String {
|
||||
if let Some((start, end)) = find_toml_table_bounds(source, table_name) {
|
||||
let mut out = String::with_capacity(source.len() + replacement.len());
|
||||
out.push_str(&source[..start]);
|
||||
out.push_str(replacement);
|
||||
out.push_str(&source[end..]);
|
||||
return out;
|
||||
}
|
||||
|
||||
let mut out = source.to_string();
|
||||
if !out.is_empty() && !out.ends_with('\n') {
|
||||
out.push('\n');
|
||||
}
|
||||
if !out.is_empty() {
|
||||
out.push('\n');
|
||||
}
|
||||
out.push_str(replacement);
|
||||
out
|
||||
}
|
||||
|
||||
fn find_toml_table_bounds(source: &str, table_name: &str) -> Option<(usize, usize)> {
|
||||
let target = format!("[{}]", table_name);
|
||||
let mut offset = 0usize;
|
||||
let mut start = None;
|
||||
|
||||
for line in source.split_inclusive('\n') {
|
||||
let trimmed = line.trim();
|
||||
if let Some(start_offset) = start {
|
||||
if trimmed.starts_with('[') {
|
||||
return Some((start_offset, offset));
|
||||
}
|
||||
} else if trimmed == target {
|
||||
start = Some(offset);
|
||||
}
|
||||
offset = offset.saturating_add(line.len());
|
||||
}
|
||||
|
||||
start.map(|start_offset| (start_offset, source.len()))
|
||||
}
|
||||
|
||||
async fn write_atomic(path: PathBuf, contents: String) -> Result<(), ApiFailure> {
|
||||
tokio::task::spawn_blocking(move || write_atomic_sync(&path, &contents))
|
||||
.await
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to join writer: {}", e)))?
|
||||
.map_err(|e| ApiFailure::internal(format!("failed to write config: {}", e)))
|
||||
}
|
||||
|
||||
fn write_atomic_sync(path: &Path, contents: &str) -> std::io::Result<()> {
|
||||
let parent = path.parent().unwrap_or_else(|| Path::new("."));
|
||||
std::fs::create_dir_all(parent)?;
|
||||
|
||||
let tmp_name = format!(
|
||||
".{}.tmp-{}",
|
||||
path.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("config.toml"),
|
||||
rand::random::<u64>()
|
||||
);
|
||||
let tmp_path = parent.join(tmp_name);
|
||||
|
||||
let write_result = (|| {
|
||||
let mut file = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&tmp_path)?;
|
||||
file.write_all(contents.as_bytes())?;
|
||||
file.sync_all()?;
|
||||
std::fs::rename(&tmp_path, path)?;
|
||||
if let Ok(dir) = std::fs::File::open(parent) {
|
||||
let _ = dir.sync_all();
|
||||
}
|
||||
Ok(())
|
||||
})();
|
||||
|
||||
if write_result.is_err() {
|
||||
let _ = std::fs::remove_file(&tmp_path);
|
||||
}
|
||||
write_result
|
||||
}
|
||||
90
src/api/events.rs
Normal file
90
src/api/events.rs
Normal file
@@ -0,0 +1,90 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Mutex;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub(super) struct ApiEventRecord {
|
||||
pub(super) seq: u64,
|
||||
pub(super) ts_epoch_secs: u64,
|
||||
pub(super) event_type: String,
|
||||
pub(super) context: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub(super) struct ApiEventSnapshot {
|
||||
pub(super) capacity: usize,
|
||||
pub(super) dropped_total: u64,
|
||||
pub(super) events: Vec<ApiEventRecord>,
|
||||
}
|
||||
|
||||
struct ApiEventsInner {
|
||||
capacity: usize,
|
||||
dropped_total: u64,
|
||||
next_seq: u64,
|
||||
events: VecDeque<ApiEventRecord>,
|
||||
}
|
||||
|
||||
/// Bounded ring-buffer for control-plane API/runtime events.
|
||||
pub(crate) struct ApiEventStore {
|
||||
inner: Mutex<ApiEventsInner>,
|
||||
}
|
||||
|
||||
impl ApiEventStore {
|
||||
pub(super) fn new(capacity: usize) -> Self {
|
||||
let bounded = capacity.max(16);
|
||||
Self {
|
||||
inner: Mutex::new(ApiEventsInner {
|
||||
capacity: bounded,
|
||||
dropped_total: 0,
|
||||
next_seq: 1,
|
||||
events: VecDeque::with_capacity(bounded),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn record(&self, event_type: &str, context: impl Into<String>) {
|
||||
let now_epoch_secs = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
let mut context = context.into();
|
||||
if context.len() > 256 {
|
||||
context.truncate(256);
|
||||
}
|
||||
|
||||
let mut guard = self.inner.lock().expect("api event store mutex poisoned");
|
||||
if guard.events.len() == guard.capacity {
|
||||
guard.events.pop_front();
|
||||
guard.dropped_total = guard.dropped_total.saturating_add(1);
|
||||
}
|
||||
let seq = guard.next_seq;
|
||||
guard.next_seq = guard.next_seq.saturating_add(1);
|
||||
guard.events.push_back(ApiEventRecord {
|
||||
seq,
|
||||
ts_epoch_secs: now_epoch_secs,
|
||||
event_type: event_type.to_string(),
|
||||
context,
|
||||
});
|
||||
}
|
||||
|
||||
pub(super) fn snapshot(&self, limit: usize) -> ApiEventSnapshot {
|
||||
let guard = self.inner.lock().expect("api event store mutex poisoned");
|
||||
let bounded_limit = limit.clamp(1, guard.capacity.max(1));
|
||||
let mut items: Vec<ApiEventRecord> = guard
|
||||
.events
|
||||
.iter()
|
||||
.rev()
|
||||
.take(bounded_limit)
|
||||
.cloned()
|
||||
.collect();
|
||||
items.reverse();
|
||||
|
||||
ApiEventSnapshot {
|
||||
capacity: guard.capacity,
|
||||
dropped_total: guard.dropped_total,
|
||||
events: items,
|
||||
}
|
||||
}
|
||||
}
|
||||
91
src/api/http_utils.rs
Normal file
91
src/api/http_utils.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use http_body_util::{BodyExt, Full};
|
||||
use hyper::StatusCode;
|
||||
use hyper::body::{Bytes, Incoming};
|
||||
use serde::Serialize;
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
use super::model::{ApiFailure, ErrorBody, ErrorResponse, SuccessResponse};
|
||||
|
||||
pub(super) fn success_response<T: Serialize>(
|
||||
status: StatusCode,
|
||||
data: T,
|
||||
revision: String,
|
||||
) -> hyper::Response<Full<Bytes>> {
|
||||
let payload = SuccessResponse {
|
||||
ok: true,
|
||||
data,
|
||||
revision,
|
||||
};
|
||||
let body = serde_json::to_vec(&payload).unwrap_or_else(|_| b"{\"ok\":false}".to_vec());
|
||||
hyper::Response::builder()
|
||||
.status(status)
|
||||
.header("content-type", "application/json; charset=utf-8")
|
||||
.body(Full::new(Bytes::from(body)))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub(super) fn error_response(
|
||||
request_id: u64,
|
||||
failure: ApiFailure,
|
||||
) -> hyper::Response<Full<Bytes>> {
|
||||
let payload = ErrorResponse {
|
||||
ok: false,
|
||||
error: ErrorBody {
|
||||
code: failure.code,
|
||||
message: failure.message,
|
||||
},
|
||||
request_id,
|
||||
};
|
||||
let body = serde_json::to_vec(&payload).unwrap_or_else(|_| {
|
||||
format!(
|
||||
"{{\"ok\":false,\"error\":{{\"code\":\"internal_error\",\"message\":\"serialization failed\"}},\"request_id\":{}}}",
|
||||
request_id
|
||||
)
|
||||
.into_bytes()
|
||||
});
|
||||
hyper::Response::builder()
|
||||
.status(failure.status)
|
||||
.header("content-type", "application/json; charset=utf-8")
|
||||
.body(Full::new(Bytes::from(body)))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub(super) async fn read_json<T: DeserializeOwned>(
|
||||
body: Incoming,
|
||||
limit: usize,
|
||||
) -> Result<T, ApiFailure> {
|
||||
let bytes = read_body_with_limit(body, limit).await?;
|
||||
serde_json::from_slice(&bytes).map_err(|_| ApiFailure::bad_request("Invalid JSON body"))
|
||||
}
|
||||
|
||||
pub(super) async fn read_optional_json<T: DeserializeOwned>(
|
||||
body: Incoming,
|
||||
limit: usize,
|
||||
) -> Result<Option<T>, ApiFailure> {
|
||||
let bytes = read_body_with_limit(body, limit).await?;
|
||||
if bytes.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
serde_json::from_slice(&bytes)
|
||||
.map(Some)
|
||||
.map_err(|_| ApiFailure::bad_request("Invalid JSON body"))
|
||||
}
|
||||
|
||||
async fn read_body_with_limit(body: Incoming, limit: usize) -> Result<Vec<u8>, ApiFailure> {
|
||||
let mut collected = Vec::new();
|
||||
let mut body = body;
|
||||
while let Some(frame_result) = body.frame().await {
|
||||
let frame = frame_result.map_err(|_| ApiFailure::bad_request("Invalid request body"))?;
|
||||
if let Some(chunk) = frame.data_ref() {
|
||||
if collected.len().saturating_add(chunk.len()) > limit {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::PAYLOAD_TOO_LARGE,
|
||||
"payload_too_large",
|
||||
format!("Body exceeds {} bytes", limit),
|
||||
));
|
||||
}
|
||||
collected.extend_from_slice(chunk);
|
||||
}
|
||||
}
|
||||
Ok(collected)
|
||||
}
|
||||
554
src/api/mod.rs
Normal file
554
src/api/mod.rs
Normal file
@@ -0,0 +1,554 @@
|
||||
use std::convert::Infallible;
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||
|
||||
use http_body_util::Full;
|
||||
use hyper::body::{Bytes, Incoming};
|
||||
use hyper::header::AUTHORIZATION;
|
||||
use hyper::server::conn::http1;
|
||||
use hyper::service::service_fn;
|
||||
use hyper::{Method, Request, Response, StatusCode};
|
||||
use tokio::net::TcpListener;
|
||||
use tokio::sync::{Mutex, RwLock, watch};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::proxy::route_mode::RouteRuntimeController;
|
||||
use crate::startup::StartupTracker;
|
||||
use crate::stats::Stats;
|
||||
use crate::transport::middle_proxy::MePool;
|
||||
use crate::transport::UpstreamManager;
|
||||
|
||||
mod config_store;
|
||||
mod events;
|
||||
mod http_utils;
|
||||
mod model;
|
||||
mod runtime_edge;
|
||||
mod runtime_init;
|
||||
mod runtime_min;
|
||||
mod runtime_selftest;
|
||||
mod runtime_stats;
|
||||
mod runtime_watch;
|
||||
mod runtime_zero;
|
||||
mod users;
|
||||
|
||||
use config_store::{current_revision, parse_if_match};
|
||||
use http_utils::{error_response, read_json, read_optional_json, success_response};
|
||||
use events::ApiEventStore;
|
||||
use model::{
|
||||
ApiFailure, CreateUserRequest, HealthData, PatchUserRequest, RotateSecretRequest, SummaryData,
|
||||
};
|
||||
use runtime_edge::{
|
||||
EdgeConnectionsCacheEntry, build_runtime_connections_summary_data,
|
||||
build_runtime_events_recent_data,
|
||||
};
|
||||
use runtime_init::build_runtime_initialization_data;
|
||||
use runtime_min::{
|
||||
build_runtime_me_pool_state_data, build_runtime_me_quality_data, build_runtime_nat_stun_data,
|
||||
build_runtime_upstream_quality_data, build_security_whitelist_data,
|
||||
};
|
||||
use runtime_selftest::build_runtime_me_selftest_data;
|
||||
use runtime_stats::{
|
||||
MinimalCacheEntry, build_dcs_data, build_me_writers_data, build_minimal_all_data,
|
||||
build_upstreams_data, build_zero_all_data,
|
||||
};
|
||||
use runtime_zero::{
|
||||
build_limits_effective_data, build_runtime_gates_data, build_security_posture_data,
|
||||
build_system_info_data,
|
||||
};
|
||||
use runtime_watch::spawn_runtime_watchers;
|
||||
use users::{create_user, delete_user, patch_user, rotate_secret, users_from_config};
|
||||
|
||||
pub(super) struct ApiRuntimeState {
|
||||
pub(super) process_started_at_epoch_secs: u64,
|
||||
pub(super) config_reload_count: AtomicU64,
|
||||
pub(super) last_config_reload_epoch_secs: AtomicU64,
|
||||
pub(super) admission_open: AtomicBool,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(super) struct ApiShared {
|
||||
pub(super) stats: Arc<Stats>,
|
||||
pub(super) ip_tracker: Arc<UserIpTracker>,
|
||||
pub(super) me_pool: Arc<RwLock<Option<Arc<MePool>>>>,
|
||||
pub(super) upstream_manager: Arc<UpstreamManager>,
|
||||
pub(super) config_path: PathBuf,
|
||||
pub(super) detected_ips_rx: watch::Receiver<(Option<IpAddr>, Option<IpAddr>)>,
|
||||
pub(super) mutation_lock: Arc<Mutex<()>>,
|
||||
pub(super) minimal_cache: Arc<Mutex<Option<MinimalCacheEntry>>>,
|
||||
pub(super) runtime_edge_connections_cache: Arc<Mutex<Option<EdgeConnectionsCacheEntry>>>,
|
||||
pub(super) runtime_edge_recompute_lock: Arc<Mutex<()>>,
|
||||
pub(super) runtime_events: Arc<ApiEventStore>,
|
||||
pub(super) request_id: Arc<AtomicU64>,
|
||||
pub(super) runtime_state: Arc<ApiRuntimeState>,
|
||||
pub(super) startup_tracker: Arc<StartupTracker>,
|
||||
pub(super) route_runtime: Arc<RouteRuntimeController>,
|
||||
}
|
||||
|
||||
impl ApiShared {
|
||||
fn next_request_id(&self) -> u64 {
|
||||
self.request_id.fetch_add(1, Ordering::Relaxed)
|
||||
}
|
||||
|
||||
fn detected_link_ips(&self) -> (Option<IpAddr>, Option<IpAddr>) {
|
||||
*self.detected_ips_rx.borrow()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn serve(
|
||||
listen: SocketAddr,
|
||||
stats: Arc<Stats>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
me_pool: Arc<RwLock<Option<Arc<MePool>>>>,
|
||||
route_runtime: Arc<RouteRuntimeController>,
|
||||
upstream_manager: Arc<UpstreamManager>,
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
admission_rx: watch::Receiver<bool>,
|
||||
config_path: PathBuf,
|
||||
detected_ips_rx: watch::Receiver<(Option<IpAddr>, Option<IpAddr>)>,
|
||||
process_started_at_epoch_secs: u64,
|
||||
startup_tracker: Arc<StartupTracker>,
|
||||
) {
|
||||
let listener = match TcpListener::bind(listen).await {
|
||||
Ok(listener) => listener,
|
||||
Err(error) => {
|
||||
warn!(
|
||||
error = %error,
|
||||
listen = %listen,
|
||||
"Failed to bind API listener"
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
info!("API endpoint: http://{}/v1/*", listen);
|
||||
|
||||
let runtime_state = Arc::new(ApiRuntimeState {
|
||||
process_started_at_epoch_secs,
|
||||
config_reload_count: AtomicU64::new(0),
|
||||
last_config_reload_epoch_secs: AtomicU64::new(0),
|
||||
admission_open: AtomicBool::new(*admission_rx.borrow()),
|
||||
});
|
||||
|
||||
let shared = Arc::new(ApiShared {
|
||||
stats,
|
||||
ip_tracker,
|
||||
me_pool,
|
||||
upstream_manager,
|
||||
config_path,
|
||||
detected_ips_rx,
|
||||
mutation_lock: Arc::new(Mutex::new(())),
|
||||
minimal_cache: Arc::new(Mutex::new(None)),
|
||||
runtime_edge_connections_cache: Arc::new(Mutex::new(None)),
|
||||
runtime_edge_recompute_lock: Arc::new(Mutex::new(())),
|
||||
runtime_events: Arc::new(ApiEventStore::new(
|
||||
config_rx.borrow().server.api.runtime_edge_events_capacity,
|
||||
)),
|
||||
request_id: Arc::new(AtomicU64::new(1)),
|
||||
runtime_state: runtime_state.clone(),
|
||||
startup_tracker,
|
||||
route_runtime,
|
||||
});
|
||||
|
||||
spawn_runtime_watchers(
|
||||
config_rx.clone(),
|
||||
admission_rx.clone(),
|
||||
runtime_state.clone(),
|
||||
shared.runtime_events.clone(),
|
||||
);
|
||||
|
||||
loop {
|
||||
let (stream, peer) = match listener.accept().await {
|
||||
Ok(v) => v,
|
||||
Err(error) => {
|
||||
warn!(error = %error, "API accept error");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let shared_conn = shared.clone();
|
||||
let config_rx_conn = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
let svc = service_fn(move |req: Request<Incoming>| {
|
||||
let shared_req = shared_conn.clone();
|
||||
let config_rx_req = config_rx_conn.clone();
|
||||
async move { handle(req, peer, shared_req, config_rx_req).await }
|
||||
});
|
||||
if let Err(error) = http1::Builder::new()
|
||||
.serve_connection(hyper_util::rt::TokioIo::new(stream), svc)
|
||||
.await
|
||||
{
|
||||
debug!(error = %error, "API connection error");
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
req: Request<Incoming>,
|
||||
peer: SocketAddr,
|
||||
shared: Arc<ApiShared>,
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
) -> Result<Response<Full<Bytes>>, Infallible> {
|
||||
let request_id = shared.next_request_id();
|
||||
let cfg = config_rx.borrow().clone();
|
||||
let api_cfg = &cfg.server.api;
|
||||
|
||||
if !api_cfg.enabled {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"api_disabled",
|
||||
"API is disabled",
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
if !api_cfg.whitelist.is_empty()
|
||||
&& !api_cfg
|
||||
.whitelist
|
||||
.iter()
|
||||
.any(|net| net.contains(peer.ip()))
|
||||
{
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(StatusCode::FORBIDDEN, "forbidden", "Source IP is not allowed"),
|
||||
));
|
||||
}
|
||||
|
||||
if !api_cfg.auth_header.is_empty() {
|
||||
let auth_ok = req
|
||||
.headers()
|
||||
.get(AUTHORIZATION)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|v| v == api_cfg.auth_header)
|
||||
.unwrap_or(false);
|
||||
if !auth_ok {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::UNAUTHORIZED,
|
||||
"unauthorized",
|
||||
"Missing or invalid Authorization header",
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let method = req.method().clone();
|
||||
let path = req.uri().path().to_string();
|
||||
let query = req.uri().query().map(str::to_string);
|
||||
let body_limit = api_cfg.request_body_limit_bytes;
|
||||
|
||||
let result: Result<Response<Full<Bytes>>, ApiFailure> = async {
|
||||
match (method.as_str(), path.as_str()) {
|
||||
("GET", "/v1/health") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = HealthData {
|
||||
status: "ok",
|
||||
read_only: api_cfg.read_only,
|
||||
};
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/system/info") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_system_info_data(shared.as_ref(), cfg.as_ref(), &revision);
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/gates") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_gates_data(shared.as_ref(), cfg.as_ref()).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/initialization") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_initialization_data(shared.as_ref()).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/limits/effective") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_limits_effective_data(cfg.as_ref());
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/security/posture") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_security_posture_data(cfg.as_ref());
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/security/whitelist") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_security_whitelist_data(cfg.as_ref());
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/summary") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = SummaryData {
|
||||
uptime_seconds: shared.stats.uptime_secs(),
|
||||
connections_total: shared.stats.get_connects_all(),
|
||||
connections_bad_total: shared.stats.get_connects_bad(),
|
||||
handshake_timeouts_total: shared.stats.get_handshake_timeouts(),
|
||||
configured_users: cfg.access.users.len(),
|
||||
};
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/zero/all") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_zero_all_data(&shared.stats, cfg.access.users.len());
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/upstreams") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_upstreams_data(shared.as_ref(), api_cfg);
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/minimal/all") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_minimal_all_data(shared.as_ref(), api_cfg).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/me-writers") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_me_writers_data(shared.as_ref(), api_cfg).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/dcs") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_dcs_data(shared.as_ref(), api_cfg).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/me_pool_state") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_me_pool_state_data(shared.as_ref()).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/me_quality") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_me_quality_data(shared.as_ref()).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/upstream_quality") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_upstream_quality_data(shared.as_ref()).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/nat_stun") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_nat_stun_data(shared.as_ref()).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/me-selftest") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_me_selftest_data(shared.as_ref(), cfg.as_ref()).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/connections/summary") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_connections_summary_data(shared.as_ref(), cfg.as_ref()).await;
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/runtime/events/recent") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let data = build_runtime_events_recent_data(
|
||||
shared.as_ref(),
|
||||
cfg.as_ref(),
|
||||
query.as_deref(),
|
||||
);
|
||||
Ok(success_response(StatusCode::OK, data, revision))
|
||||
}
|
||||
("GET", "/v1/stats/users") | ("GET", "/v1/users") => {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let (detected_ip_v4, detected_ip_v6) = shared.detected_link_ips();
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
Ok(success_response(StatusCode::OK, users, revision))
|
||||
}
|
||||
("POST", "/v1/users") => {
|
||||
if api_cfg.read_only {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::FORBIDDEN,
|
||||
"read_only",
|
||||
"API runs in read-only mode",
|
||||
),
|
||||
));
|
||||
}
|
||||
let expected_revision = parse_if_match(req.headers());
|
||||
let body = read_json::<CreateUserRequest>(req.into_body(), body_limit).await?;
|
||||
let result = create_user(body, expected_revision, &shared).await;
|
||||
let (data, revision) = match result {
|
||||
Ok(ok) => ok,
|
||||
Err(error) => {
|
||||
shared.runtime_events.record("api.user.create.failed", error.code);
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
shared
|
||||
.runtime_events
|
||||
.record("api.user.create.ok", format!("username={}", data.user.username));
|
||||
Ok(success_response(StatusCode::CREATED, data, revision))
|
||||
}
|
||||
_ => {
|
||||
if let Some(user) = path.strip_prefix("/v1/users/")
|
||||
&& !user.is_empty()
|
||||
&& !user.contains('/')
|
||||
{
|
||||
if method == Method::GET {
|
||||
let revision = current_revision(&shared.config_path).await?;
|
||||
let (detected_ip_v4, detected_ip_v6) = shared.detected_link_ips();
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
if let Some(user_info) = users.into_iter().find(|entry| entry.username == user)
|
||||
{
|
||||
return Ok(success_response(StatusCode::OK, user_info, revision));
|
||||
}
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "User not found"),
|
||||
));
|
||||
}
|
||||
if method == Method::PATCH {
|
||||
if api_cfg.read_only {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::FORBIDDEN,
|
||||
"read_only",
|
||||
"API runs in read-only mode",
|
||||
),
|
||||
));
|
||||
}
|
||||
let expected_revision = parse_if_match(req.headers());
|
||||
let body = read_json::<PatchUserRequest>(req.into_body(), body_limit).await?;
|
||||
let result = patch_user(user, body, expected_revision, &shared).await;
|
||||
let (data, revision) = match result {
|
||||
Ok(ok) => ok,
|
||||
Err(error) => {
|
||||
shared.runtime_events.record(
|
||||
"api.user.patch.failed",
|
||||
format!("username={} code={}", user, error.code),
|
||||
);
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
shared
|
||||
.runtime_events
|
||||
.record("api.user.patch.ok", format!("username={}", data.username));
|
||||
return Ok(success_response(StatusCode::OK, data, revision));
|
||||
}
|
||||
if method == Method::DELETE {
|
||||
if api_cfg.read_only {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::FORBIDDEN,
|
||||
"read_only",
|
||||
"API runs in read-only mode",
|
||||
),
|
||||
));
|
||||
}
|
||||
let expected_revision = parse_if_match(req.headers());
|
||||
let result = delete_user(user, expected_revision, &shared).await;
|
||||
let (deleted_user, revision) = match result {
|
||||
Ok(ok) => ok,
|
||||
Err(error) => {
|
||||
shared.runtime_events.record(
|
||||
"api.user.delete.failed",
|
||||
format!("username={} code={}", user, error.code),
|
||||
);
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
shared.runtime_events.record(
|
||||
"api.user.delete.ok",
|
||||
format!("username={}", deleted_user),
|
||||
);
|
||||
return Ok(success_response(StatusCode::OK, deleted_user, revision));
|
||||
}
|
||||
if method == Method::POST
|
||||
&& let Some(base_user) = user.strip_suffix("/rotate-secret")
|
||||
&& !base_user.is_empty()
|
||||
&& !base_user.contains('/')
|
||||
{
|
||||
if api_cfg.read_only {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::FORBIDDEN,
|
||||
"read_only",
|
||||
"API runs in read-only mode",
|
||||
),
|
||||
));
|
||||
}
|
||||
let expected_revision = parse_if_match(req.headers());
|
||||
let body =
|
||||
read_optional_json::<RotateSecretRequest>(req.into_body(), body_limit)
|
||||
.await?;
|
||||
let result = rotate_secret(
|
||||
base_user,
|
||||
body.unwrap_or_default(),
|
||||
expected_revision,
|
||||
&shared,
|
||||
)
|
||||
.await;
|
||||
let (data, revision) = match result {
|
||||
Ok(ok) => ok,
|
||||
Err(error) => {
|
||||
shared.runtime_events.record(
|
||||
"api.user.rotate_secret.failed",
|
||||
format!("username={} code={}", base_user, error.code),
|
||||
);
|
||||
return Err(error);
|
||||
}
|
||||
};
|
||||
shared.runtime_events.record(
|
||||
"api.user.rotate_secret.ok",
|
||||
format!("username={}", base_user),
|
||||
);
|
||||
return Ok(success_response(StatusCode::OK, data, revision));
|
||||
}
|
||||
if method == Method::POST {
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "Route not found"),
|
||||
));
|
||||
}
|
||||
return Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(
|
||||
StatusCode::METHOD_NOT_ALLOWED,
|
||||
"method_not_allowed",
|
||||
"Unsupported HTTP method for this route",
|
||||
),
|
||||
));
|
||||
}
|
||||
Ok(error_response(
|
||||
request_id,
|
||||
ApiFailure::new(StatusCode::NOT_FOUND, "not_found", "Route not found"),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(error) => Ok(error_response(request_id, error)),
|
||||
}
|
||||
}
|
||||
477
src/api/model.rs
Normal file
477
src/api/model.rs
Normal file
@@ -0,0 +1,477 @@
|
||||
use std::net::IpAddr;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use hyper::StatusCode;
|
||||
use rand::Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
const MAX_USERNAME_LEN: usize = 64;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct ApiFailure {
|
||||
pub(super) status: StatusCode,
|
||||
pub(super) code: &'static str,
|
||||
pub(super) message: String,
|
||||
}
|
||||
|
||||
impl ApiFailure {
|
||||
pub(super) fn new(status: StatusCode, code: &'static str, message: impl Into<String>) -> Self {
|
||||
Self {
|
||||
status,
|
||||
code,
|
||||
message: message.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn internal(message: impl Into<String>) -> Self {
|
||||
Self::new(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", message)
|
||||
}
|
||||
|
||||
pub(super) fn bad_request(message: impl Into<String>) -> Self {
|
||||
Self::new(StatusCode::BAD_REQUEST, "bad_request", message)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct ErrorBody {
|
||||
pub(super) code: &'static str,
|
||||
pub(super) message: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct ErrorResponse {
|
||||
pub(super) ok: bool,
|
||||
pub(super) error: ErrorBody,
|
||||
pub(super) request_id: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct SuccessResponse<T> {
|
||||
pub(super) ok: bool,
|
||||
pub(super) data: T,
|
||||
pub(super) revision: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct HealthData {
|
||||
pub(super) status: &'static str,
|
||||
pub(super) read_only: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct SummaryData {
|
||||
pub(super) uptime_seconds: f64,
|
||||
pub(super) connections_total: u64,
|
||||
pub(super) connections_bad_total: u64,
|
||||
pub(super) handshake_timeouts_total: u64,
|
||||
pub(super) configured_users: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroCodeCount {
|
||||
pub(super) code: i32,
|
||||
pub(super) total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroCoreData {
|
||||
pub(super) uptime_seconds: f64,
|
||||
pub(super) connections_total: u64,
|
||||
pub(super) connections_bad_total: u64,
|
||||
pub(super) handshake_timeouts_total: u64,
|
||||
pub(super) configured_users: usize,
|
||||
pub(super) telemetry_core_enabled: bool,
|
||||
pub(super) telemetry_user_enabled: bool,
|
||||
pub(super) telemetry_me_level: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroUpstreamData {
|
||||
pub(super) connect_attempt_total: u64,
|
||||
pub(super) connect_success_total: u64,
|
||||
pub(super) connect_fail_total: u64,
|
||||
pub(super) connect_failfast_hard_error_total: u64,
|
||||
pub(super) connect_attempts_bucket_1: u64,
|
||||
pub(super) connect_attempts_bucket_2: u64,
|
||||
pub(super) connect_attempts_bucket_3_4: u64,
|
||||
pub(super) connect_attempts_bucket_gt_4: u64,
|
||||
pub(super) connect_duration_success_bucket_le_100ms: u64,
|
||||
pub(super) connect_duration_success_bucket_101_500ms: u64,
|
||||
pub(super) connect_duration_success_bucket_501_1000ms: u64,
|
||||
pub(super) connect_duration_success_bucket_gt_1000ms: u64,
|
||||
pub(super) connect_duration_fail_bucket_le_100ms: u64,
|
||||
pub(super) connect_duration_fail_bucket_101_500ms: u64,
|
||||
pub(super) connect_duration_fail_bucket_501_1000ms: u64,
|
||||
pub(super) connect_duration_fail_bucket_gt_1000ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct UpstreamDcStatus {
|
||||
pub(super) dc: i16,
|
||||
pub(super) latency_ema_ms: Option<f64>,
|
||||
pub(super) ip_preference: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct UpstreamStatus {
|
||||
pub(super) upstream_id: usize,
|
||||
pub(super) route_kind: &'static str,
|
||||
pub(super) address: String,
|
||||
pub(super) weight: u16,
|
||||
pub(super) scopes: String,
|
||||
pub(super) healthy: bool,
|
||||
pub(super) fails: u32,
|
||||
pub(super) last_check_age_secs: u64,
|
||||
pub(super) effective_latency_ms: Option<f64>,
|
||||
pub(super) dc: Vec<UpstreamDcStatus>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct UpstreamSummaryData {
|
||||
pub(super) configured_total: usize,
|
||||
pub(super) healthy_total: usize,
|
||||
pub(super) unhealthy_total: usize,
|
||||
pub(super) direct_total: usize,
|
||||
pub(super) socks4_total: usize,
|
||||
pub(super) socks5_total: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct UpstreamsData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) zero: ZeroUpstreamData,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) summary: Option<UpstreamSummaryData>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) upstreams: Option<Vec<UpstreamStatus>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroMiddleProxyData {
|
||||
pub(super) keepalive_sent_total: u64,
|
||||
pub(super) keepalive_failed_total: u64,
|
||||
pub(super) keepalive_pong_total: u64,
|
||||
pub(super) keepalive_timeout_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_sent_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_failed_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_skipped_no_meta_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_response_total: u64,
|
||||
pub(super) rpc_proxy_req_signal_close_sent_total: u64,
|
||||
pub(super) reconnect_attempt_total: u64,
|
||||
pub(super) reconnect_success_total: u64,
|
||||
pub(super) handshake_reject_total: u64,
|
||||
pub(super) handshake_error_codes: Vec<ZeroCodeCount>,
|
||||
pub(super) reader_eof_total: u64,
|
||||
pub(super) idle_close_by_peer_total: u64,
|
||||
pub(super) route_drop_no_conn_total: u64,
|
||||
pub(super) route_drop_channel_closed_total: u64,
|
||||
pub(super) route_drop_queue_full_total: u64,
|
||||
pub(super) route_drop_queue_full_base_total: u64,
|
||||
pub(super) route_drop_queue_full_high_total: u64,
|
||||
pub(super) socks_kdf_strict_reject_total: u64,
|
||||
pub(super) socks_kdf_compat_fallback_total: u64,
|
||||
pub(super) endpoint_quarantine_total: u64,
|
||||
pub(super) kdf_drift_total: u64,
|
||||
pub(super) kdf_port_only_drift_total: u64,
|
||||
pub(super) hardswap_pending_reuse_total: u64,
|
||||
pub(super) hardswap_pending_ttl_expired_total: u64,
|
||||
pub(super) single_endpoint_outage_enter_total: u64,
|
||||
pub(super) single_endpoint_outage_exit_total: u64,
|
||||
pub(super) single_endpoint_outage_reconnect_attempt_total: u64,
|
||||
pub(super) single_endpoint_outage_reconnect_success_total: u64,
|
||||
pub(super) single_endpoint_quarantine_bypass_total: u64,
|
||||
pub(super) single_endpoint_shadow_rotate_total: u64,
|
||||
pub(super) single_endpoint_shadow_rotate_skipped_quarantine_total: u64,
|
||||
pub(super) floor_mode_switch_total: u64,
|
||||
pub(super) floor_mode_switch_static_to_adaptive_total: u64,
|
||||
pub(super) floor_mode_switch_adaptive_to_static_total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroPoolData {
|
||||
pub(super) pool_swap_total: u64,
|
||||
pub(super) pool_drain_active: u64,
|
||||
pub(super) pool_force_close_total: u64,
|
||||
pub(super) pool_stale_pick_total: u64,
|
||||
pub(super) writer_removed_total: u64,
|
||||
pub(super) writer_removed_unexpected_total: u64,
|
||||
pub(super) refill_triggered_total: u64,
|
||||
pub(super) refill_skipped_inflight_total: u64,
|
||||
pub(super) refill_failed_total: u64,
|
||||
pub(super) writer_restored_same_endpoint_total: u64,
|
||||
pub(super) writer_restored_fallback_total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroDesyncData {
|
||||
pub(super) secure_padding_invalid_total: u64,
|
||||
pub(super) desync_total: u64,
|
||||
pub(super) desync_full_logged_total: u64,
|
||||
pub(super) desync_suppressed_total: u64,
|
||||
pub(super) desync_frames_bucket_0: u64,
|
||||
pub(super) desync_frames_bucket_1_2: u64,
|
||||
pub(super) desync_frames_bucket_3_10: u64,
|
||||
pub(super) desync_frames_bucket_gt_10: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct ZeroAllData {
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) core: ZeroCoreData,
|
||||
pub(super) upstream: ZeroUpstreamData,
|
||||
pub(super) middle_proxy: ZeroMiddleProxyData,
|
||||
pub(super) pool: ZeroPoolData,
|
||||
pub(super) desync: ZeroDesyncData,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MeWritersSummary {
|
||||
pub(super) configured_dc_groups: usize,
|
||||
pub(super) configured_endpoints: usize,
|
||||
pub(super) available_endpoints: usize,
|
||||
pub(super) available_pct: f64,
|
||||
pub(super) required_writers: usize,
|
||||
pub(super) alive_writers: usize,
|
||||
pub(super) coverage_pct: f64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MeWriterStatus {
|
||||
pub(super) writer_id: u64,
|
||||
pub(super) dc: Option<i16>,
|
||||
pub(super) endpoint: String,
|
||||
pub(super) generation: u64,
|
||||
pub(super) state: &'static str,
|
||||
pub(super) draining: bool,
|
||||
pub(super) degraded: bool,
|
||||
pub(super) bound_clients: usize,
|
||||
pub(super) idle_for_secs: Option<u64>,
|
||||
pub(super) rtt_ema_ms: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MeWritersData {
|
||||
pub(super) middle_proxy_enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) summary: MeWritersSummary,
|
||||
pub(super) writers: Vec<MeWriterStatus>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct DcStatus {
|
||||
pub(super) dc: i16,
|
||||
pub(super) endpoints: Vec<String>,
|
||||
pub(super) endpoint_writers: Vec<DcEndpointWriters>,
|
||||
pub(super) available_endpoints: usize,
|
||||
pub(super) available_pct: f64,
|
||||
pub(super) required_writers: usize,
|
||||
pub(super) floor_min: usize,
|
||||
pub(super) floor_target: usize,
|
||||
pub(super) floor_max: usize,
|
||||
pub(super) floor_capped: bool,
|
||||
pub(super) alive_writers: usize,
|
||||
pub(super) coverage_pct: f64,
|
||||
pub(super) rtt_ms: Option<f64>,
|
||||
pub(super) load: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct DcEndpointWriters {
|
||||
pub(super) endpoint: String,
|
||||
pub(super) active_writers: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct DcStatusData {
|
||||
pub(super) middle_proxy_enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) dcs: Vec<DcStatus>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalQuarantineData {
|
||||
pub(super) endpoint: String,
|
||||
pub(super) remaining_ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalDcPathData {
|
||||
pub(super) dc: i16,
|
||||
pub(super) ip_preference: Option<&'static str>,
|
||||
pub(super) selected_addr_v4: Option<String>,
|
||||
pub(super) selected_addr_v6: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalMeRuntimeData {
|
||||
pub(super) active_generation: u64,
|
||||
pub(super) warm_generation: u64,
|
||||
pub(super) pending_hardswap_generation: u64,
|
||||
pub(super) pending_hardswap_age_secs: Option<u64>,
|
||||
pub(super) hardswap_enabled: bool,
|
||||
pub(super) floor_mode: &'static str,
|
||||
pub(super) adaptive_floor_idle_secs: u64,
|
||||
pub(super) adaptive_floor_min_writers_single_endpoint: u8,
|
||||
pub(super) adaptive_floor_min_writers_multi_endpoint: u8,
|
||||
pub(super) adaptive_floor_recover_grace_secs: u64,
|
||||
pub(super) adaptive_floor_writers_per_core_total: u16,
|
||||
pub(super) adaptive_floor_cpu_cores_override: u16,
|
||||
pub(super) adaptive_floor_max_extra_writers_single_per_core: u16,
|
||||
pub(super) adaptive_floor_max_extra_writers_multi_per_core: u16,
|
||||
pub(super) adaptive_floor_max_active_writers_per_core: u16,
|
||||
pub(super) adaptive_floor_max_warm_writers_per_core: u16,
|
||||
pub(super) adaptive_floor_max_active_writers_global: u32,
|
||||
pub(super) adaptive_floor_max_warm_writers_global: u32,
|
||||
pub(super) adaptive_floor_cpu_cores_detected: u32,
|
||||
pub(super) adaptive_floor_cpu_cores_effective: u32,
|
||||
pub(super) adaptive_floor_global_cap_raw: u64,
|
||||
pub(super) adaptive_floor_global_cap_effective: u64,
|
||||
pub(super) adaptive_floor_target_writers_total: u64,
|
||||
pub(super) adaptive_floor_active_cap_configured: u64,
|
||||
pub(super) adaptive_floor_active_cap_effective: u64,
|
||||
pub(super) adaptive_floor_warm_cap_configured: u64,
|
||||
pub(super) adaptive_floor_warm_cap_effective: u64,
|
||||
pub(super) adaptive_floor_active_writers_current: u64,
|
||||
pub(super) adaptive_floor_warm_writers_current: u64,
|
||||
pub(super) me_keepalive_enabled: bool,
|
||||
pub(super) me_keepalive_interval_secs: u64,
|
||||
pub(super) me_keepalive_jitter_secs: u64,
|
||||
pub(super) me_keepalive_payload_random: bool,
|
||||
pub(super) rpc_proxy_req_every_secs: u64,
|
||||
pub(super) me_reconnect_max_concurrent_per_dc: u32,
|
||||
pub(super) me_reconnect_backoff_base_ms: u64,
|
||||
pub(super) me_reconnect_backoff_cap_ms: u64,
|
||||
pub(super) me_reconnect_fast_retry_count: u32,
|
||||
pub(super) me_pool_drain_ttl_secs: u64,
|
||||
pub(super) me_pool_force_close_secs: u64,
|
||||
pub(super) me_pool_min_fresh_ratio: f32,
|
||||
pub(super) me_bind_stale_mode: &'static str,
|
||||
pub(super) me_bind_stale_ttl_secs: u64,
|
||||
pub(super) me_single_endpoint_shadow_writers: u8,
|
||||
pub(super) me_single_endpoint_outage_mode_enabled: bool,
|
||||
pub(super) me_single_endpoint_outage_disable_quarantine: bool,
|
||||
pub(super) me_single_endpoint_outage_backoff_min_ms: u64,
|
||||
pub(super) me_single_endpoint_outage_backoff_max_ms: u64,
|
||||
pub(super) me_single_endpoint_shadow_rotate_every_secs: u64,
|
||||
pub(super) me_deterministic_writer_sort: bool,
|
||||
pub(super) me_writer_pick_mode: &'static str,
|
||||
pub(super) me_writer_pick_sample_size: u8,
|
||||
pub(super) me_socks_kdf_policy: &'static str,
|
||||
pub(super) quarantined_endpoints_total: usize,
|
||||
pub(super) quarantined_endpoints: Vec<MinimalQuarantineData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalAllPayload {
|
||||
pub(super) me_writers: MeWritersData,
|
||||
pub(super) dcs: DcStatusData,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) me_runtime: Option<MinimalMeRuntimeData>,
|
||||
pub(super) network_path: Vec<MinimalDcPathData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub(super) struct MinimalAllData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) data: Option<MinimalAllPayload>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct UserLinks {
|
||||
pub(super) classic: Vec<String>,
|
||||
pub(super) secure: Vec<String>,
|
||||
pub(super) tls: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct UserInfo {
|
||||
pub(super) username: String,
|
||||
pub(super) user_ad_tag: Option<String>,
|
||||
pub(super) max_tcp_conns: Option<usize>,
|
||||
pub(super) expiration_rfc3339: Option<String>,
|
||||
pub(super) data_quota_bytes: Option<u64>,
|
||||
pub(super) max_unique_ips: Option<usize>,
|
||||
pub(super) current_connections: u64,
|
||||
pub(super) active_unique_ips: usize,
|
||||
pub(super) active_unique_ips_list: Vec<IpAddr>,
|
||||
pub(super) recent_unique_ips: usize,
|
||||
pub(super) recent_unique_ips_list: Vec<IpAddr>,
|
||||
pub(super) total_octets: u64,
|
||||
pub(super) links: UserLinks,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct CreateUserResponse {
|
||||
pub(super) user: UserInfo,
|
||||
pub(super) secret: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub(super) struct CreateUserRequest {
|
||||
pub(super) username: String,
|
||||
pub(super) secret: Option<String>,
|
||||
pub(super) user_ad_tag: Option<String>,
|
||||
pub(super) max_tcp_conns: Option<usize>,
|
||||
pub(super) expiration_rfc3339: Option<String>,
|
||||
pub(super) data_quota_bytes: Option<u64>,
|
||||
pub(super) max_unique_ips: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub(super) struct PatchUserRequest {
|
||||
pub(super) secret: Option<String>,
|
||||
pub(super) user_ad_tag: Option<String>,
|
||||
pub(super) max_tcp_conns: Option<usize>,
|
||||
pub(super) expiration_rfc3339: Option<String>,
|
||||
pub(super) data_quota_bytes: Option<u64>,
|
||||
pub(super) max_unique_ips: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Default, Deserialize)]
|
||||
pub(super) struct RotateSecretRequest {
|
||||
pub(super) secret: Option<String>,
|
||||
}
|
||||
|
||||
pub(super) fn parse_optional_expiration(
|
||||
value: Option<&str>,
|
||||
) -> Result<Option<DateTime<Utc>>, ApiFailure> {
|
||||
let Some(raw) = value else {
|
||||
return Ok(None);
|
||||
};
|
||||
let parsed = DateTime::parse_from_rfc3339(raw)
|
||||
.map_err(|_| ApiFailure::bad_request("expiration_rfc3339 must be valid RFC3339"))?;
|
||||
Ok(Some(parsed.with_timezone(&Utc)))
|
||||
}
|
||||
|
||||
pub(super) fn is_valid_user_secret(secret: &str) -> bool {
|
||||
secret.len() == 32 && secret.chars().all(|c| c.is_ascii_hexdigit())
|
||||
}
|
||||
|
||||
pub(super) fn is_valid_ad_tag(tag: &str) -> bool {
|
||||
tag.len() == 32 && tag.chars().all(|c| c.is_ascii_hexdigit())
|
||||
}
|
||||
|
||||
pub(super) fn is_valid_username(user: &str) -> bool {
|
||||
!user.is_empty()
|
||||
&& user.len() <= MAX_USERNAME_LEN
|
||||
&& user
|
||||
.chars()
|
||||
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.'))
|
||||
}
|
||||
|
||||
pub(super) fn random_user_secret() -> String {
|
||||
let mut bytes = [0u8; 16];
|
||||
rand::rng().fill(&mut bytes);
|
||||
hex::encode(bytes)
|
||||
}
|
||||
294
src/api/runtime_edge.rs
Normal file
294
src/api/runtime_edge.rs
Normal file
@@ -0,0 +1,294 @@
|
||||
use std::cmp::Reverse;
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
|
||||
use super::ApiShared;
|
||||
use super::events::ApiEventRecord;
|
||||
|
||||
const FEATURE_DISABLED_REASON: &str = "feature_disabled";
|
||||
const SOURCE_UNAVAILABLE_REASON: &str = "source_unavailable";
|
||||
const EVENTS_DEFAULT_LIMIT: usize = 50;
|
||||
const EVENTS_MAX_LIMIT: usize = 1000;
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub(super) struct RuntimeEdgeConnectionUserData {
|
||||
pub(super) username: String,
|
||||
pub(super) current_connections: u64,
|
||||
pub(super) total_octets: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub(super) struct RuntimeEdgeConnectionTotalsData {
|
||||
pub(super) current_connections: u64,
|
||||
pub(super) current_connections_me: u64,
|
||||
pub(super) current_connections_direct: u64,
|
||||
pub(super) active_users: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub(super) struct RuntimeEdgeConnectionTopData {
|
||||
pub(super) limit: usize,
|
||||
pub(super) by_connections: Vec<RuntimeEdgeConnectionUserData>,
|
||||
pub(super) by_throughput: Vec<RuntimeEdgeConnectionUserData>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub(super) struct RuntimeEdgeConnectionCacheData {
|
||||
pub(super) ttl_ms: u64,
|
||||
pub(super) served_from_cache: bool,
|
||||
pub(super) stale_cache_used: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub(super) struct RuntimeEdgeConnectionTelemetryData {
|
||||
pub(super) user_enabled: bool,
|
||||
pub(super) throughput_is_cumulative: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub(super) struct RuntimeEdgeConnectionsSummaryPayload {
|
||||
pub(super) cache: RuntimeEdgeConnectionCacheData,
|
||||
pub(super) totals: RuntimeEdgeConnectionTotalsData,
|
||||
pub(super) top: RuntimeEdgeConnectionTopData,
|
||||
pub(super) telemetry: RuntimeEdgeConnectionTelemetryData,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeEdgeConnectionsSummaryData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) data: Option<RuntimeEdgeConnectionsSummaryPayload>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct EdgeConnectionsCacheEntry {
|
||||
pub(super) expires_at: Instant,
|
||||
pub(super) payload: RuntimeEdgeConnectionsSummaryPayload,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeEdgeEventsPayload {
|
||||
pub(super) capacity: usize,
|
||||
pub(super) dropped_total: u64,
|
||||
pub(super) events: Vec<ApiEventRecord>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeEdgeEventsData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) data: Option<RuntimeEdgeEventsPayload>,
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_connections_summary_data(
|
||||
shared: &ApiShared,
|
||||
cfg: &ProxyConfig,
|
||||
) -> RuntimeEdgeConnectionsSummaryData {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let api_cfg = &cfg.server.api;
|
||||
if !api_cfg.runtime_edge_enabled {
|
||||
return RuntimeEdgeConnectionsSummaryData {
|
||||
enabled: false,
|
||||
reason: Some(FEATURE_DISABLED_REASON),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: None,
|
||||
};
|
||||
}
|
||||
|
||||
let (generated_at_epoch_secs, payload) = match get_connections_payload_cached(
|
||||
shared,
|
||||
api_cfg.runtime_edge_cache_ttl_ms,
|
||||
api_cfg.runtime_edge_top_n,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Some(v) => v,
|
||||
None => {
|
||||
return RuntimeEdgeConnectionsSummaryData {
|
||||
enabled: true,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: None,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
RuntimeEdgeConnectionsSummaryData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
data: Some(payload),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn build_runtime_events_recent_data(
|
||||
shared: &ApiShared,
|
||||
cfg: &ProxyConfig,
|
||||
query: Option<&str>,
|
||||
) -> RuntimeEdgeEventsData {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let api_cfg = &cfg.server.api;
|
||||
if !api_cfg.runtime_edge_enabled {
|
||||
return RuntimeEdgeEventsData {
|
||||
enabled: false,
|
||||
reason: Some(FEATURE_DISABLED_REASON),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: None,
|
||||
};
|
||||
}
|
||||
|
||||
let limit = parse_recent_events_limit(query, EVENTS_DEFAULT_LIMIT, EVENTS_MAX_LIMIT);
|
||||
let snapshot = shared.runtime_events.snapshot(limit);
|
||||
|
||||
RuntimeEdgeEventsData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: Some(RuntimeEdgeEventsPayload {
|
||||
capacity: snapshot.capacity,
|
||||
dropped_total: snapshot.dropped_total,
|
||||
events: snapshot.events,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_connections_payload_cached(
|
||||
shared: &ApiShared,
|
||||
cache_ttl_ms: u64,
|
||||
top_n: usize,
|
||||
) -> Option<(u64, RuntimeEdgeConnectionsSummaryPayload)> {
|
||||
if cache_ttl_ms > 0 {
|
||||
let now = Instant::now();
|
||||
let cached = shared.runtime_edge_connections_cache.lock().await.clone();
|
||||
if let Some(entry) = cached
|
||||
&& now < entry.expires_at
|
||||
{
|
||||
let mut payload = entry.payload;
|
||||
payload.cache.served_from_cache = true;
|
||||
payload.cache.stale_cache_used = false;
|
||||
return Some((entry.generated_at_epoch_secs, payload));
|
||||
}
|
||||
}
|
||||
|
||||
let Ok(_guard) = shared.runtime_edge_recompute_lock.try_lock() else {
|
||||
let cached = shared.runtime_edge_connections_cache.lock().await.clone();
|
||||
if let Some(entry) = cached {
|
||||
let mut payload = entry.payload;
|
||||
payload.cache.served_from_cache = true;
|
||||
payload.cache.stale_cache_used = true;
|
||||
return Some((entry.generated_at_epoch_secs, payload));
|
||||
}
|
||||
return None;
|
||||
};
|
||||
|
||||
let generated_at_epoch_secs = now_epoch_secs();
|
||||
let payload = recompute_connections_payload(shared, cache_ttl_ms, top_n).await;
|
||||
|
||||
if cache_ttl_ms > 0 {
|
||||
let entry = EdgeConnectionsCacheEntry {
|
||||
expires_at: Instant::now() + Duration::from_millis(cache_ttl_ms),
|
||||
payload: payload.clone(),
|
||||
generated_at_epoch_secs,
|
||||
};
|
||||
*shared.runtime_edge_connections_cache.lock().await = Some(entry);
|
||||
}
|
||||
|
||||
Some((generated_at_epoch_secs, payload))
|
||||
}
|
||||
|
||||
async fn recompute_connections_payload(
|
||||
shared: &ApiShared,
|
||||
cache_ttl_ms: u64,
|
||||
top_n: usize,
|
||||
) -> RuntimeEdgeConnectionsSummaryPayload {
|
||||
let mut rows = Vec::<RuntimeEdgeConnectionUserData>::new();
|
||||
let mut active_users = 0usize;
|
||||
for entry in shared.stats.iter_user_stats() {
|
||||
let user_stats = entry.value();
|
||||
let current_connections = user_stats
|
||||
.curr_connects
|
||||
.load(std::sync::atomic::Ordering::Relaxed);
|
||||
let total_octets = user_stats
|
||||
.octets_from_client
|
||||
.load(std::sync::atomic::Ordering::Relaxed)
|
||||
.saturating_add(
|
||||
user_stats
|
||||
.octets_to_client
|
||||
.load(std::sync::atomic::Ordering::Relaxed),
|
||||
);
|
||||
if current_connections > 0 {
|
||||
active_users = active_users.saturating_add(1);
|
||||
}
|
||||
rows.push(RuntimeEdgeConnectionUserData {
|
||||
username: entry.key().clone(),
|
||||
current_connections,
|
||||
total_octets,
|
||||
});
|
||||
}
|
||||
|
||||
let limit = top_n.max(1);
|
||||
let mut by_connections = rows.clone();
|
||||
by_connections.sort_by_key(|row| (Reverse(row.current_connections), row.username.clone()));
|
||||
by_connections.truncate(limit);
|
||||
|
||||
let mut by_throughput = rows;
|
||||
by_throughput.sort_by_key(|row| (Reverse(row.total_octets), row.username.clone()));
|
||||
by_throughput.truncate(limit);
|
||||
|
||||
let telemetry = shared.stats.telemetry_policy();
|
||||
RuntimeEdgeConnectionsSummaryPayload {
|
||||
cache: RuntimeEdgeConnectionCacheData {
|
||||
ttl_ms: cache_ttl_ms,
|
||||
served_from_cache: false,
|
||||
stale_cache_used: false,
|
||||
},
|
||||
totals: RuntimeEdgeConnectionTotalsData {
|
||||
current_connections: shared.stats.get_current_connections_total(),
|
||||
current_connections_me: shared.stats.get_current_connections_me(),
|
||||
current_connections_direct: shared.stats.get_current_connections_direct(),
|
||||
active_users,
|
||||
},
|
||||
top: RuntimeEdgeConnectionTopData {
|
||||
limit,
|
||||
by_connections,
|
||||
by_throughput,
|
||||
},
|
||||
telemetry: RuntimeEdgeConnectionTelemetryData {
|
||||
user_enabled: telemetry.user_enabled,
|
||||
throughput_is_cumulative: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_recent_events_limit(query: Option<&str>, default_limit: usize, max_limit: usize) -> usize {
|
||||
let Some(query) = query else {
|
||||
return default_limit;
|
||||
};
|
||||
for pair in query.split('&') {
|
||||
let mut split = pair.splitn(2, '=');
|
||||
if split.next() == Some("limit")
|
||||
&& let Some(raw) = split.next()
|
||||
&& let Ok(parsed) = raw.parse::<usize>()
|
||||
{
|
||||
return parsed.clamp(1, max_limit);
|
||||
}
|
||||
}
|
||||
default_limit
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
186
src/api/runtime_init.rs
Normal file
186
src/api/runtime_init.rs
Normal file
@@ -0,0 +1,186 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::startup::{
|
||||
COMPONENT_ME_CONNECTIVITY_PING, COMPONENT_ME_POOL_CONSTRUCT, COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
COMPONENT_ME_PROXY_CONFIG_V4, COMPONENT_ME_PROXY_CONFIG_V6, COMPONENT_ME_SECRET_FETCH,
|
||||
StartupComponentStatus, StartupMeStatus, compute_progress_pct,
|
||||
};
|
||||
|
||||
use super::ApiShared;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeInitializationComponentData {
|
||||
pub(super) id: &'static str,
|
||||
pub(super) title: &'static str,
|
||||
pub(super) status: &'static str,
|
||||
pub(super) started_at_epoch_ms: Option<u64>,
|
||||
pub(super) finished_at_epoch_ms: Option<u64>,
|
||||
pub(super) duration_ms: Option<u64>,
|
||||
pub(super) attempts: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) details: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeInitializationMeData {
|
||||
pub(super) status: &'static str,
|
||||
pub(super) current_stage: String,
|
||||
pub(super) progress_pct: f64,
|
||||
pub(super) init_attempt: u32,
|
||||
pub(super) retry_limit: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) last_error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeInitializationData {
|
||||
pub(super) status: &'static str,
|
||||
pub(super) degraded: bool,
|
||||
pub(super) current_stage: String,
|
||||
pub(super) progress_pct: f64,
|
||||
pub(super) started_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) ready_at_epoch_secs: Option<u64>,
|
||||
pub(super) total_elapsed_ms: u64,
|
||||
pub(super) transport_mode: String,
|
||||
pub(super) me: RuntimeInitializationMeData,
|
||||
pub(super) components: Vec<RuntimeInitializationComponentData>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(super) struct RuntimeStartupSummaryData {
|
||||
pub(super) status: &'static str,
|
||||
pub(super) stage: String,
|
||||
pub(super) progress_pct: f64,
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_startup_summary(shared: &ApiShared) -> RuntimeStartupSummaryData {
|
||||
let snapshot = shared.startup_tracker.snapshot().await;
|
||||
let me_pool_progress = current_me_pool_stage_progress(shared).await;
|
||||
let progress_pct = compute_progress_pct(&snapshot, me_pool_progress);
|
||||
RuntimeStartupSummaryData {
|
||||
status: snapshot.status.as_str(),
|
||||
stage: snapshot.current_stage,
|
||||
progress_pct,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_initialization_data(
|
||||
shared: &ApiShared,
|
||||
) -> RuntimeInitializationData {
|
||||
let snapshot = shared.startup_tracker.snapshot().await;
|
||||
let me_pool_progress = current_me_pool_stage_progress(shared).await;
|
||||
let progress_pct = compute_progress_pct(&snapshot, me_pool_progress);
|
||||
let me_progress_pct = compute_me_progress_pct(&snapshot, me_pool_progress);
|
||||
|
||||
RuntimeInitializationData {
|
||||
status: snapshot.status.as_str(),
|
||||
degraded: snapshot.degraded,
|
||||
current_stage: snapshot.current_stage,
|
||||
progress_pct,
|
||||
started_at_epoch_secs: snapshot.started_at_epoch_secs,
|
||||
ready_at_epoch_secs: snapshot.ready_at_epoch_secs,
|
||||
total_elapsed_ms: snapshot.total_elapsed_ms,
|
||||
transport_mode: snapshot.transport_mode,
|
||||
me: RuntimeInitializationMeData {
|
||||
status: snapshot.me.status.as_str(),
|
||||
current_stage: snapshot.me.current_stage,
|
||||
progress_pct: me_progress_pct,
|
||||
init_attempt: snapshot.me.init_attempt,
|
||||
retry_limit: snapshot.me.retry_limit,
|
||||
last_error: snapshot.me.last_error,
|
||||
},
|
||||
components: snapshot
|
||||
.components
|
||||
.into_iter()
|
||||
.map(|component| RuntimeInitializationComponentData {
|
||||
id: component.id,
|
||||
title: component.title,
|
||||
status: component.status.as_str(),
|
||||
started_at_epoch_ms: component.started_at_epoch_ms,
|
||||
finished_at_epoch_ms: component.finished_at_epoch_ms,
|
||||
duration_ms: component.duration_ms,
|
||||
attempts: component.attempts,
|
||||
details: component.details,
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_me_progress_pct(
|
||||
snapshot: &crate::startup::StartupSnapshot,
|
||||
me_pool_progress: Option<f64>,
|
||||
) -> f64 {
|
||||
match snapshot.me.status {
|
||||
StartupMeStatus::Pending => 0.0,
|
||||
StartupMeStatus::Ready | StartupMeStatus::Failed | StartupMeStatus::Skipped => 100.0,
|
||||
StartupMeStatus::Initializing => {
|
||||
let mut total_weight = 0.0f64;
|
||||
let mut completed_weight = 0.0f64;
|
||||
for component in &snapshot.components {
|
||||
if !is_me_component(component.id) {
|
||||
continue;
|
||||
}
|
||||
total_weight += component.weight;
|
||||
let unit_progress = match component.status {
|
||||
StartupComponentStatus::Pending => 0.0,
|
||||
StartupComponentStatus::Running => {
|
||||
if component.id == COMPONENT_ME_POOL_INIT_STAGE1 {
|
||||
me_pool_progress.unwrap_or(0.0).clamp(0.0, 1.0)
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
StartupComponentStatus::Ready
|
||||
| StartupComponentStatus::Failed
|
||||
| StartupComponentStatus::Skipped => 1.0,
|
||||
};
|
||||
completed_weight += component.weight * unit_progress;
|
||||
}
|
||||
if total_weight <= f64::EPSILON {
|
||||
0.0
|
||||
} else {
|
||||
((completed_weight / total_weight) * 100.0).clamp(0.0, 100.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_me_component(component_id: &str) -> bool {
|
||||
matches!(
|
||||
component_id,
|
||||
COMPONENT_ME_SECRET_FETCH
|
||||
| COMPONENT_ME_PROXY_CONFIG_V4
|
||||
| COMPONENT_ME_PROXY_CONFIG_V6
|
||||
| COMPONENT_ME_POOL_CONSTRUCT
|
||||
| COMPONENT_ME_POOL_INIT_STAGE1
|
||||
| COMPONENT_ME_CONNECTIVITY_PING
|
||||
)
|
||||
}
|
||||
|
||||
async fn current_me_pool_stage_progress(shared: &ApiShared) -> Option<f64> {
|
||||
let snapshot = shared.startup_tracker.snapshot().await;
|
||||
if snapshot.me.status != StartupMeStatus::Initializing {
|
||||
return None;
|
||||
}
|
||||
|
||||
let pool = shared.me_pool.read().await.clone()?;
|
||||
let status = pool.api_status_snapshot().await;
|
||||
let configured_dc_groups = status.configured_dc_groups;
|
||||
let covered_dc_groups = status
|
||||
.dcs
|
||||
.iter()
|
||||
.filter(|dc| dc.alive_writers > 0)
|
||||
.count();
|
||||
|
||||
let dc_coverage = ratio_01(covered_dc_groups, configured_dc_groups);
|
||||
let writer_coverage = ratio_01(status.alive_writers, status.required_writers);
|
||||
Some((0.7 * dc_coverage + 0.3 * writer_coverage).clamp(0.0, 1.0))
|
||||
}
|
||||
|
||||
fn ratio_01(part: usize, total: usize) -> f64 {
|
||||
if total == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
((part as f64) / (total as f64)).clamp(0.0, 1.0)
|
||||
}
|
||||
534
src/api/runtime_min.rs
Normal file
534
src/api/runtime_min.rs
Normal file
@@ -0,0 +1,534 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
|
||||
use super::ApiShared;
|
||||
|
||||
const SOURCE_UNAVAILABLE_REASON: &str = "source_unavailable";
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct SecurityWhitelistData {
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) enabled: bool,
|
||||
pub(super) entries_total: usize,
|
||||
pub(super) entries: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStateGenerationData {
|
||||
pub(super) active_generation: u64,
|
||||
pub(super) warm_generation: u64,
|
||||
pub(super) pending_hardswap_generation: u64,
|
||||
pub(super) pending_hardswap_age_secs: Option<u64>,
|
||||
pub(super) draining_generations: Vec<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStateHardswapData {
|
||||
pub(super) enabled: bool,
|
||||
pub(super) pending: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStateWriterContourData {
|
||||
pub(super) warm: usize,
|
||||
pub(super) active: usize,
|
||||
pub(super) draining: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStateWriterHealthData {
|
||||
pub(super) healthy: usize,
|
||||
pub(super) degraded: usize,
|
||||
pub(super) draining: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStateWriterData {
|
||||
pub(super) total: usize,
|
||||
pub(super) alive_non_draining: usize,
|
||||
pub(super) draining: usize,
|
||||
pub(super) degraded: usize,
|
||||
pub(super) contour: RuntimeMePoolStateWriterContourData,
|
||||
pub(super) health: RuntimeMePoolStateWriterHealthData,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStateRefillDcData {
|
||||
pub(super) dc: i16,
|
||||
pub(super) family: &'static str,
|
||||
pub(super) inflight: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStateRefillData {
|
||||
pub(super) inflight_endpoints_total: usize,
|
||||
pub(super) inflight_dc_total: usize,
|
||||
pub(super) by_dc: Vec<RuntimeMePoolStateRefillDcData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStatePayload {
|
||||
pub(super) generations: RuntimeMePoolStateGenerationData,
|
||||
pub(super) hardswap: RuntimeMePoolStateHardswapData,
|
||||
pub(super) writers: RuntimeMePoolStateWriterData,
|
||||
pub(super) refill: RuntimeMePoolStateRefillData,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMePoolStateData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) data: Option<RuntimeMePoolStatePayload>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeQualityCountersData {
|
||||
pub(super) idle_close_by_peer_total: u64,
|
||||
pub(super) reader_eof_total: u64,
|
||||
pub(super) kdf_drift_total: u64,
|
||||
pub(super) kdf_port_only_drift_total: u64,
|
||||
pub(super) reconnect_attempt_total: u64,
|
||||
pub(super) reconnect_success_total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeQualityRouteDropData {
|
||||
pub(super) no_conn_total: u64,
|
||||
pub(super) channel_closed_total: u64,
|
||||
pub(super) queue_full_total: u64,
|
||||
pub(super) queue_full_base_total: u64,
|
||||
pub(super) queue_full_high_total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeQualityDcRttData {
|
||||
pub(super) dc: i16,
|
||||
pub(super) rtt_ema_ms: Option<f64>,
|
||||
pub(super) alive_writers: usize,
|
||||
pub(super) required_writers: usize,
|
||||
pub(super) coverage_pct: f64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeQualityPayload {
|
||||
pub(super) counters: RuntimeMeQualityCountersData,
|
||||
pub(super) route_drops: RuntimeMeQualityRouteDropData,
|
||||
pub(super) dc_rtt: Vec<RuntimeMeQualityDcRttData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeQualityData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) data: Option<RuntimeMeQualityPayload>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeUpstreamQualityPolicyData {
|
||||
pub(super) connect_retry_attempts: u32,
|
||||
pub(super) connect_retry_backoff_ms: u64,
|
||||
pub(super) connect_budget_ms: u64,
|
||||
pub(super) unhealthy_fail_threshold: u32,
|
||||
pub(super) connect_failfast_hard_errors: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeUpstreamQualityCountersData {
|
||||
pub(super) connect_attempt_total: u64,
|
||||
pub(super) connect_success_total: u64,
|
||||
pub(super) connect_fail_total: u64,
|
||||
pub(super) connect_failfast_hard_error_total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeUpstreamQualitySummaryData {
|
||||
pub(super) configured_total: usize,
|
||||
pub(super) healthy_total: usize,
|
||||
pub(super) unhealthy_total: usize,
|
||||
pub(super) direct_total: usize,
|
||||
pub(super) socks4_total: usize,
|
||||
pub(super) socks5_total: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeUpstreamQualityDcData {
|
||||
pub(super) dc: i16,
|
||||
pub(super) latency_ema_ms: Option<f64>,
|
||||
pub(super) ip_preference: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeUpstreamQualityUpstreamData {
|
||||
pub(super) upstream_id: usize,
|
||||
pub(super) route_kind: &'static str,
|
||||
pub(super) address: String,
|
||||
pub(super) weight: u16,
|
||||
pub(super) scopes: String,
|
||||
pub(super) healthy: bool,
|
||||
pub(super) fails: u32,
|
||||
pub(super) last_check_age_secs: u64,
|
||||
pub(super) effective_latency_ms: Option<f64>,
|
||||
pub(super) dc: Vec<RuntimeUpstreamQualityDcData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeUpstreamQualityData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
pub(super) policy: RuntimeUpstreamQualityPolicyData,
|
||||
pub(super) counters: RuntimeUpstreamQualityCountersData,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) summary: Option<RuntimeUpstreamQualitySummaryData>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) upstreams: Option<Vec<RuntimeUpstreamQualityUpstreamData>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeNatStunReflectionData {
|
||||
pub(super) addr: String,
|
||||
pub(super) age_secs: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeNatStunFlagsData {
|
||||
pub(super) nat_probe_enabled: bool,
|
||||
pub(super) nat_probe_disabled_runtime: bool,
|
||||
pub(super) nat_probe_attempts: u8,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeNatStunServersData {
|
||||
pub(super) configured: Vec<String>,
|
||||
pub(super) live: Vec<String>,
|
||||
pub(super) live_total: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeNatStunReflectionBlockData {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) v4: Option<RuntimeNatStunReflectionData>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) v6: Option<RuntimeNatStunReflectionData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeNatStunPayload {
|
||||
pub(super) flags: RuntimeNatStunFlagsData,
|
||||
pub(super) servers: RuntimeNatStunServersData,
|
||||
pub(super) reflection: RuntimeNatStunReflectionBlockData,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) stun_backoff_remaining_ms: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeNatStunData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) data: Option<RuntimeNatStunPayload>,
|
||||
}
|
||||
|
||||
pub(super) fn build_security_whitelist_data(cfg: &ProxyConfig) -> SecurityWhitelistData {
|
||||
let entries = cfg
|
||||
.server
|
||||
.api
|
||||
.whitelist
|
||||
.iter()
|
||||
.map(ToString::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
SecurityWhitelistData {
|
||||
generated_at_epoch_secs: now_epoch_secs(),
|
||||
enabled: !entries.is_empty(),
|
||||
entries_total: entries.len(),
|
||||
entries,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_me_pool_state_data(shared: &ApiShared) -> RuntimeMePoolStateData {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Some(pool) = shared.me_pool.read().await.clone() else {
|
||||
return RuntimeMePoolStateData {
|
||||
enabled: false,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: None,
|
||||
};
|
||||
};
|
||||
|
||||
let status = pool.api_status_snapshot().await;
|
||||
let runtime = pool.api_runtime_snapshot().await;
|
||||
let refill = pool.api_refill_snapshot().await;
|
||||
|
||||
let mut draining_generations = BTreeSet::<u64>::new();
|
||||
let mut contour_warm = 0usize;
|
||||
let mut contour_active = 0usize;
|
||||
let mut contour_draining = 0usize;
|
||||
let mut draining = 0usize;
|
||||
let mut degraded = 0usize;
|
||||
let mut healthy = 0usize;
|
||||
|
||||
for writer in &status.writers {
|
||||
if writer.draining {
|
||||
draining_generations.insert(writer.generation);
|
||||
draining += 1;
|
||||
}
|
||||
if writer.degraded && !writer.draining {
|
||||
degraded += 1;
|
||||
}
|
||||
if !writer.degraded && !writer.draining {
|
||||
healthy += 1;
|
||||
}
|
||||
match writer.state {
|
||||
"warm" => contour_warm += 1,
|
||||
"active" => contour_active += 1,
|
||||
_ => contour_draining += 1,
|
||||
}
|
||||
}
|
||||
|
||||
RuntimeMePoolStateData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs: status.generated_at_epoch_secs,
|
||||
data: Some(RuntimeMePoolStatePayload {
|
||||
generations: RuntimeMePoolStateGenerationData {
|
||||
active_generation: runtime.active_generation,
|
||||
warm_generation: runtime.warm_generation,
|
||||
pending_hardswap_generation: runtime.pending_hardswap_generation,
|
||||
pending_hardswap_age_secs: runtime.pending_hardswap_age_secs,
|
||||
draining_generations: draining_generations.into_iter().collect(),
|
||||
},
|
||||
hardswap: RuntimeMePoolStateHardswapData {
|
||||
enabled: runtime.hardswap_enabled,
|
||||
pending: runtime.pending_hardswap_generation != 0,
|
||||
},
|
||||
writers: RuntimeMePoolStateWriterData {
|
||||
total: status.writers.len(),
|
||||
alive_non_draining: status.writers.len().saturating_sub(draining),
|
||||
draining,
|
||||
degraded,
|
||||
contour: RuntimeMePoolStateWriterContourData {
|
||||
warm: contour_warm,
|
||||
active: contour_active,
|
||||
draining: contour_draining,
|
||||
},
|
||||
health: RuntimeMePoolStateWriterHealthData {
|
||||
healthy,
|
||||
degraded,
|
||||
draining,
|
||||
},
|
||||
},
|
||||
refill: RuntimeMePoolStateRefillData {
|
||||
inflight_endpoints_total: refill.inflight_endpoints_total,
|
||||
inflight_dc_total: refill.inflight_dc_total,
|
||||
by_dc: refill
|
||||
.by_dc
|
||||
.into_iter()
|
||||
.map(|entry| RuntimeMePoolStateRefillDcData {
|
||||
dc: entry.dc,
|
||||
family: entry.family,
|
||||
inflight: entry.inflight,
|
||||
})
|
||||
.collect(),
|
||||
},
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_me_quality_data(shared: &ApiShared) -> RuntimeMeQualityData {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Some(pool) = shared.me_pool.read().await.clone() else {
|
||||
return RuntimeMeQualityData {
|
||||
enabled: false,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: None,
|
||||
};
|
||||
};
|
||||
|
||||
let status = pool.api_status_snapshot().await;
|
||||
RuntimeMeQualityData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs: status.generated_at_epoch_secs,
|
||||
data: Some(RuntimeMeQualityPayload {
|
||||
counters: RuntimeMeQualityCountersData {
|
||||
idle_close_by_peer_total: shared.stats.get_me_idle_close_by_peer_total(),
|
||||
reader_eof_total: shared.stats.get_me_reader_eof_total(),
|
||||
kdf_drift_total: shared.stats.get_me_kdf_drift_total(),
|
||||
kdf_port_only_drift_total: shared.stats.get_me_kdf_port_only_drift_total(),
|
||||
reconnect_attempt_total: shared.stats.get_me_reconnect_attempts(),
|
||||
reconnect_success_total: shared.stats.get_me_reconnect_success(),
|
||||
},
|
||||
route_drops: RuntimeMeQualityRouteDropData {
|
||||
no_conn_total: shared.stats.get_me_route_drop_no_conn(),
|
||||
channel_closed_total: shared.stats.get_me_route_drop_channel_closed(),
|
||||
queue_full_total: shared.stats.get_me_route_drop_queue_full(),
|
||||
queue_full_base_total: shared.stats.get_me_route_drop_queue_full_base(),
|
||||
queue_full_high_total: shared.stats.get_me_route_drop_queue_full_high(),
|
||||
},
|
||||
dc_rtt: status
|
||||
.dcs
|
||||
.into_iter()
|
||||
.map(|dc| RuntimeMeQualityDcRttData {
|
||||
dc: dc.dc,
|
||||
rtt_ema_ms: dc.rtt_ms,
|
||||
alive_writers: dc.alive_writers,
|
||||
required_writers: dc.required_writers,
|
||||
coverage_pct: dc.coverage_pct,
|
||||
})
|
||||
.collect(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_upstream_quality_data(
|
||||
shared: &ApiShared,
|
||||
) -> RuntimeUpstreamQualityData {
|
||||
let generated_at_epoch_secs = now_epoch_secs();
|
||||
let policy = shared.upstream_manager.api_policy_snapshot();
|
||||
let counters = RuntimeUpstreamQualityCountersData {
|
||||
connect_attempt_total: shared.stats.get_upstream_connect_attempt_total(),
|
||||
connect_success_total: shared.stats.get_upstream_connect_success_total(),
|
||||
connect_fail_total: shared.stats.get_upstream_connect_fail_total(),
|
||||
connect_failfast_hard_error_total: shared.stats.get_upstream_connect_failfast_hard_error_total(),
|
||||
};
|
||||
|
||||
let Some(snapshot) = shared.upstream_manager.try_api_snapshot() else {
|
||||
return RuntimeUpstreamQualityData {
|
||||
enabled: false,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs,
|
||||
policy: RuntimeUpstreamQualityPolicyData {
|
||||
connect_retry_attempts: policy.connect_retry_attempts,
|
||||
connect_retry_backoff_ms: policy.connect_retry_backoff_ms,
|
||||
connect_budget_ms: policy.connect_budget_ms,
|
||||
unhealthy_fail_threshold: policy.unhealthy_fail_threshold,
|
||||
connect_failfast_hard_errors: policy.connect_failfast_hard_errors,
|
||||
},
|
||||
counters,
|
||||
summary: None,
|
||||
upstreams: None,
|
||||
};
|
||||
};
|
||||
|
||||
RuntimeUpstreamQualityData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
policy: RuntimeUpstreamQualityPolicyData {
|
||||
connect_retry_attempts: policy.connect_retry_attempts,
|
||||
connect_retry_backoff_ms: policy.connect_retry_backoff_ms,
|
||||
connect_budget_ms: policy.connect_budget_ms,
|
||||
unhealthy_fail_threshold: policy.unhealthy_fail_threshold,
|
||||
connect_failfast_hard_errors: policy.connect_failfast_hard_errors,
|
||||
},
|
||||
counters,
|
||||
summary: Some(RuntimeUpstreamQualitySummaryData {
|
||||
configured_total: snapshot.summary.configured_total,
|
||||
healthy_total: snapshot.summary.healthy_total,
|
||||
unhealthy_total: snapshot.summary.unhealthy_total,
|
||||
direct_total: snapshot.summary.direct_total,
|
||||
socks4_total: snapshot.summary.socks4_total,
|
||||
socks5_total: snapshot.summary.socks5_total,
|
||||
}),
|
||||
upstreams: Some(
|
||||
snapshot
|
||||
.upstreams
|
||||
.into_iter()
|
||||
.map(|upstream| RuntimeUpstreamQualityUpstreamData {
|
||||
upstream_id: upstream.upstream_id,
|
||||
route_kind: match upstream.route_kind {
|
||||
crate::transport::UpstreamRouteKind::Direct => "direct",
|
||||
crate::transport::UpstreamRouteKind::Socks4 => "socks4",
|
||||
crate::transport::UpstreamRouteKind::Socks5 => "socks5",
|
||||
},
|
||||
address: upstream.address,
|
||||
weight: upstream.weight,
|
||||
scopes: upstream.scopes,
|
||||
healthy: upstream.healthy,
|
||||
fails: upstream.fails,
|
||||
last_check_age_secs: upstream.last_check_age_secs,
|
||||
effective_latency_ms: upstream.effective_latency_ms,
|
||||
dc: upstream
|
||||
.dc
|
||||
.into_iter()
|
||||
.map(|dc| RuntimeUpstreamQualityDcData {
|
||||
dc: dc.dc,
|
||||
latency_ema_ms: dc.latency_ema_ms,
|
||||
ip_preference: match dc.ip_preference {
|
||||
crate::transport::upstream::IpPreference::Unknown => "unknown",
|
||||
crate::transport::upstream::IpPreference::PreferV6 => "prefer_v6",
|
||||
crate::transport::upstream::IpPreference::PreferV4 => "prefer_v4",
|
||||
crate::transport::upstream::IpPreference::BothWork => "both_work",
|
||||
crate::transport::upstream::IpPreference::Unavailable => "unavailable",
|
||||
},
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_nat_stun_data(shared: &ApiShared) -> RuntimeNatStunData {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Some(pool) = shared.me_pool.read().await.clone() else {
|
||||
return RuntimeNatStunData {
|
||||
enabled: false,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: None,
|
||||
};
|
||||
};
|
||||
|
||||
let snapshot = pool.api_nat_stun_snapshot().await;
|
||||
RuntimeNatStunData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: Some(RuntimeNatStunPayload {
|
||||
flags: RuntimeNatStunFlagsData {
|
||||
nat_probe_enabled: snapshot.nat_probe_enabled,
|
||||
nat_probe_disabled_runtime: snapshot.nat_probe_disabled_runtime,
|
||||
nat_probe_attempts: snapshot.nat_probe_attempts,
|
||||
},
|
||||
servers: RuntimeNatStunServersData {
|
||||
configured: snapshot.configured_servers,
|
||||
live: snapshot.live_servers.clone(),
|
||||
live_total: snapshot.live_servers.len(),
|
||||
},
|
||||
reflection: RuntimeNatStunReflectionBlockData {
|
||||
v4: snapshot.reflection_v4.map(|entry| RuntimeNatStunReflectionData {
|
||||
addr: entry.addr.to_string(),
|
||||
age_secs: entry.age_secs,
|
||||
}),
|
||||
v6: snapshot.reflection_v6.map(|entry| RuntimeNatStunReflectionData {
|
||||
addr: entry.addr.to_string(),
|
||||
age_secs: entry.age_secs,
|
||||
}),
|
||||
},
|
||||
stun_backoff_remaining_ms: snapshot.stun_backoff_remaining_ms,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
299
src/api/runtime_selftest.rs
Normal file
299
src/api/runtime_selftest.rs
Normal file
@@ -0,0 +1,299 @@
|
||||
use std::net::IpAddr;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::config::{ProxyConfig, UpstreamType};
|
||||
use crate::network::probe::{detect_interface_ipv4, detect_interface_ipv6, is_bogon};
|
||||
use crate::transport::middle_proxy::{bnd_snapshot, timeskew_snapshot, upstream_bnd_snapshots};
|
||||
use crate::transport::UpstreamRouteKind;
|
||||
|
||||
use super::ApiShared;
|
||||
|
||||
const SOURCE_UNAVAILABLE_REASON: &str = "source_unavailable";
|
||||
const KDF_EWMA_TAU_SECS: f64 = 600.0;
|
||||
const KDF_EWMA_THRESHOLD_ERRORS_PER_MIN: f64 = 0.30;
|
||||
const TIMESKEW_THRESHOLD_SECS: u64 = 60;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestKdfData {
|
||||
pub(super) state: &'static str,
|
||||
pub(super) ewma_errors_per_min: f64,
|
||||
pub(super) threshold_errors_per_min: f64,
|
||||
pub(super) errors_total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestTimeskewData {
|
||||
pub(super) state: &'static str,
|
||||
pub(super) max_skew_secs_15m: Option<u64>,
|
||||
pub(super) samples_15m: usize,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) last_skew_secs: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) last_source: Option<&'static str>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) last_seen_age_secs: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestIpFamilyData {
|
||||
pub(super) addr: String,
|
||||
pub(super) state: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestIpData {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) v4: Option<RuntimeMeSelftestIpFamilyData>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) v6: Option<RuntimeMeSelftestIpFamilyData>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestPidData {
|
||||
pub(super) pid: u32,
|
||||
pub(super) state: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestBndData {
|
||||
pub(super) addr_state: &'static str,
|
||||
pub(super) port_state: &'static str,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) last_addr: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) last_seen_age_secs: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestUpstreamData {
|
||||
pub(super) upstream_id: usize,
|
||||
pub(super) route_kind: &'static str,
|
||||
pub(super) address: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) bnd: Option<RuntimeMeSelftestBndData>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) ip: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestPayload {
|
||||
pub(super) kdf: RuntimeMeSelftestKdfData,
|
||||
pub(super) timeskew: RuntimeMeSelftestTimeskewData,
|
||||
pub(super) ip: RuntimeMeSelftestIpData,
|
||||
pub(super) pid: RuntimeMeSelftestPidData,
|
||||
pub(super) bnd: Option<RuntimeMeSelftestBndData>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) upstreams: Option<Vec<RuntimeMeSelftestUpstreamData>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeMeSelftestData {
|
||||
pub(super) enabled: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reason: Option<&'static str>,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) data: Option<RuntimeMeSelftestPayload>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct KdfEwmaState {
|
||||
initialized: bool,
|
||||
last_epoch_secs: u64,
|
||||
last_total_errors: u64,
|
||||
ewma_errors_per_min: f64,
|
||||
}
|
||||
|
||||
static KDF_EWMA_STATE: OnceLock<Mutex<KdfEwmaState>> = OnceLock::new();
|
||||
|
||||
fn kdf_ewma_state() -> &'static Mutex<KdfEwmaState> {
|
||||
KDF_EWMA_STATE.get_or_init(|| Mutex::new(KdfEwmaState::default()))
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_me_selftest_data(
|
||||
shared: &ApiShared,
|
||||
cfg: &ProxyConfig,
|
||||
) -> RuntimeMeSelftestData {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
if shared.me_pool.read().await.is_none() {
|
||||
return RuntimeMeSelftestData {
|
||||
enabled: false,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: None,
|
||||
};
|
||||
}
|
||||
|
||||
let kdf_errors_total = shared
|
||||
.stats
|
||||
.get_me_kdf_drift_total()
|
||||
.saturating_add(shared.stats.get_me_socks_kdf_strict_reject());
|
||||
let kdf_ewma = update_kdf_ewma(now_epoch_secs, kdf_errors_total);
|
||||
let kdf_state = if kdf_ewma >= KDF_EWMA_THRESHOLD_ERRORS_PER_MIN {
|
||||
"error"
|
||||
} else {
|
||||
"ok"
|
||||
};
|
||||
|
||||
let skew = timeskew_snapshot();
|
||||
let timeskew_state = if skew.max_skew_secs_15m.unwrap_or(0) > TIMESKEW_THRESHOLD_SECS {
|
||||
"error"
|
||||
} else {
|
||||
"ok"
|
||||
};
|
||||
|
||||
let ip_v4 = detect_interface_ipv4().map(|ip| RuntimeMeSelftestIpFamilyData {
|
||||
addr: ip.to_string(),
|
||||
state: classify_ip(IpAddr::V4(ip)),
|
||||
});
|
||||
let ip_v6 = detect_interface_ipv6().map(|ip| RuntimeMeSelftestIpFamilyData {
|
||||
addr: ip.to_string(),
|
||||
state: classify_ip(IpAddr::V6(ip)),
|
||||
});
|
||||
|
||||
let pid = std::process::id();
|
||||
let pid_state = if pid == 1 { "one" } else { "non-one" };
|
||||
|
||||
let has_socks_upstreams = cfg.upstreams.iter().any(|upstream| {
|
||||
upstream.enabled
|
||||
&& matches!(
|
||||
upstream.upstream_type,
|
||||
UpstreamType::Socks4 { .. } | UpstreamType::Socks5 { .. }
|
||||
)
|
||||
});
|
||||
|
||||
let bnd = if has_socks_upstreams {
|
||||
let snapshot = bnd_snapshot();
|
||||
Some(RuntimeMeSelftestBndData {
|
||||
addr_state: snapshot.addr_status,
|
||||
port_state: snapshot.port_status,
|
||||
last_addr: snapshot.last_addr.map(|value| value.to_string()),
|
||||
last_seen_age_secs: snapshot.last_seen_age_secs,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let upstreams = build_upstream_selftest_data(shared);
|
||||
|
||||
RuntimeMeSelftestData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
data: Some(RuntimeMeSelftestPayload {
|
||||
kdf: RuntimeMeSelftestKdfData {
|
||||
state: kdf_state,
|
||||
ewma_errors_per_min: round3(kdf_ewma),
|
||||
threshold_errors_per_min: KDF_EWMA_THRESHOLD_ERRORS_PER_MIN,
|
||||
errors_total: kdf_errors_total,
|
||||
},
|
||||
timeskew: RuntimeMeSelftestTimeskewData {
|
||||
state: timeskew_state,
|
||||
max_skew_secs_15m: skew.max_skew_secs_15m,
|
||||
samples_15m: skew.samples_15m,
|
||||
last_skew_secs: skew.last_skew_secs,
|
||||
last_source: skew.last_source,
|
||||
last_seen_age_secs: skew.last_seen_age_secs,
|
||||
},
|
||||
ip: RuntimeMeSelftestIpData {
|
||||
v4: ip_v4,
|
||||
v6: ip_v6,
|
||||
},
|
||||
pid: RuntimeMeSelftestPidData {
|
||||
pid,
|
||||
state: pid_state,
|
||||
},
|
||||
bnd,
|
||||
upstreams,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_upstream_selftest_data(shared: &ApiShared) -> Option<Vec<RuntimeMeSelftestUpstreamData>> {
|
||||
let snapshot = shared.upstream_manager.try_api_snapshot()?;
|
||||
if snapshot.summary.configured_total <= 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut upstream_bnd_by_id: HashMap<usize, _> = upstream_bnd_snapshots()
|
||||
.into_iter()
|
||||
.map(|entry| (entry.upstream_id, entry))
|
||||
.collect();
|
||||
let mut rows = Vec::with_capacity(snapshot.upstreams.len());
|
||||
for upstream in snapshot.upstreams {
|
||||
let upstream_bnd = upstream_bnd_by_id.remove(&upstream.upstream_id);
|
||||
rows.push(RuntimeMeSelftestUpstreamData {
|
||||
upstream_id: upstream.upstream_id,
|
||||
route_kind: map_route_kind(upstream.route_kind),
|
||||
address: upstream.address,
|
||||
bnd: upstream_bnd.as_ref().map(|entry| RuntimeMeSelftestBndData {
|
||||
addr_state: entry.addr_status,
|
||||
port_state: entry.port_status,
|
||||
last_addr: entry.last_addr.map(|value| value.to_string()),
|
||||
last_seen_age_secs: entry.last_seen_age_secs,
|
||||
}),
|
||||
ip: upstream_bnd.and_then(|entry| entry.last_ip.map(|value| value.to_string())),
|
||||
});
|
||||
}
|
||||
Some(rows)
|
||||
}
|
||||
|
||||
fn update_kdf_ewma(now_epoch_secs: u64, total_errors: u64) -> f64 {
|
||||
let Ok(mut guard) = kdf_ewma_state().lock() else {
|
||||
return 0.0;
|
||||
};
|
||||
|
||||
if !guard.initialized {
|
||||
guard.initialized = true;
|
||||
guard.last_epoch_secs = now_epoch_secs;
|
||||
guard.last_total_errors = total_errors;
|
||||
guard.ewma_errors_per_min = 0.0;
|
||||
return guard.ewma_errors_per_min;
|
||||
}
|
||||
|
||||
let dt_secs = now_epoch_secs.saturating_sub(guard.last_epoch_secs);
|
||||
if dt_secs == 0 {
|
||||
return guard.ewma_errors_per_min;
|
||||
}
|
||||
|
||||
let delta_errors = total_errors.saturating_sub(guard.last_total_errors);
|
||||
let instant_rate_per_min = (delta_errors as f64) * 60.0 / (dt_secs as f64);
|
||||
let alpha = 1.0 - f64::exp(-(dt_secs as f64) / KDF_EWMA_TAU_SECS);
|
||||
guard.ewma_errors_per_min = guard.ewma_errors_per_min
|
||||
+ alpha * (instant_rate_per_min - guard.ewma_errors_per_min);
|
||||
guard.last_epoch_secs = now_epoch_secs;
|
||||
guard.last_total_errors = total_errors;
|
||||
guard.ewma_errors_per_min
|
||||
}
|
||||
|
||||
fn classify_ip(ip: IpAddr) -> &'static str {
|
||||
if ip.is_loopback() {
|
||||
return "loopback";
|
||||
}
|
||||
if is_bogon(ip) {
|
||||
return "bogon";
|
||||
}
|
||||
"good"
|
||||
}
|
||||
|
||||
fn map_route_kind(value: UpstreamRouteKind) -> &'static str {
|
||||
match value {
|
||||
UpstreamRouteKind::Direct => "direct",
|
||||
UpstreamRouteKind::Socks4 => "socks4",
|
||||
UpstreamRouteKind::Socks5 => "socks5",
|
||||
}
|
||||
}
|
||||
|
||||
fn round3(value: f64) -> f64 {
|
||||
(value * 1000.0).round() / 1000.0
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
526
src/api/runtime_stats.rs
Normal file
526
src/api/runtime_stats.rs
Normal file
@@ -0,0 +1,526 @@
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use crate::config::ApiConfig;
|
||||
use crate::stats::Stats;
|
||||
use crate::transport::upstream::IpPreference;
|
||||
use crate::transport::UpstreamRouteKind;
|
||||
|
||||
use super::ApiShared;
|
||||
use super::model::{
|
||||
DcEndpointWriters, DcStatus, DcStatusData, MeWriterStatus, MeWritersData, MeWritersSummary,
|
||||
MinimalAllData, MinimalAllPayload, MinimalDcPathData, MinimalMeRuntimeData,
|
||||
MinimalQuarantineData, UpstreamDcStatus, UpstreamStatus, UpstreamSummaryData, UpstreamsData,
|
||||
ZeroAllData, ZeroCodeCount, ZeroCoreData, ZeroDesyncData, ZeroMiddleProxyData, ZeroPoolData,
|
||||
ZeroUpstreamData,
|
||||
};
|
||||
|
||||
const FEATURE_DISABLED_REASON: &str = "feature_disabled";
|
||||
const SOURCE_UNAVAILABLE_REASON: &str = "source_unavailable";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct MinimalCacheEntry {
|
||||
pub(super) expires_at: Instant,
|
||||
pub(super) payload: MinimalAllPayload,
|
||||
pub(super) generated_at_epoch_secs: u64,
|
||||
}
|
||||
|
||||
pub(super) fn build_zero_all_data(stats: &Stats, configured_users: usize) -> ZeroAllData {
|
||||
let telemetry = stats.telemetry_policy();
|
||||
let handshake_error_codes = stats
|
||||
.get_me_handshake_error_code_counts()
|
||||
.into_iter()
|
||||
.map(|(code, total)| ZeroCodeCount { code, total })
|
||||
.collect();
|
||||
|
||||
ZeroAllData {
|
||||
generated_at_epoch_secs: now_epoch_secs(),
|
||||
core: ZeroCoreData {
|
||||
uptime_seconds: stats.uptime_secs(),
|
||||
connections_total: stats.get_connects_all(),
|
||||
connections_bad_total: stats.get_connects_bad(),
|
||||
handshake_timeouts_total: stats.get_handshake_timeouts(),
|
||||
configured_users,
|
||||
telemetry_core_enabled: telemetry.core_enabled,
|
||||
telemetry_user_enabled: telemetry.user_enabled,
|
||||
telemetry_me_level: telemetry.me_level.to_string(),
|
||||
},
|
||||
upstream: build_zero_upstream_data(stats),
|
||||
middle_proxy: ZeroMiddleProxyData {
|
||||
keepalive_sent_total: stats.get_me_keepalive_sent(),
|
||||
keepalive_failed_total: stats.get_me_keepalive_failed(),
|
||||
keepalive_pong_total: stats.get_me_keepalive_pong(),
|
||||
keepalive_timeout_total: stats.get_me_keepalive_timeout(),
|
||||
rpc_proxy_req_signal_sent_total: stats.get_me_rpc_proxy_req_signal_sent_total(),
|
||||
rpc_proxy_req_signal_failed_total: stats.get_me_rpc_proxy_req_signal_failed_total(),
|
||||
rpc_proxy_req_signal_skipped_no_meta_total: stats
|
||||
.get_me_rpc_proxy_req_signal_skipped_no_meta_total(),
|
||||
rpc_proxy_req_signal_response_total: stats.get_me_rpc_proxy_req_signal_response_total(),
|
||||
rpc_proxy_req_signal_close_sent_total: stats
|
||||
.get_me_rpc_proxy_req_signal_close_sent_total(),
|
||||
reconnect_attempt_total: stats.get_me_reconnect_attempts(),
|
||||
reconnect_success_total: stats.get_me_reconnect_success(),
|
||||
handshake_reject_total: stats.get_me_handshake_reject_total(),
|
||||
handshake_error_codes,
|
||||
reader_eof_total: stats.get_me_reader_eof_total(),
|
||||
idle_close_by_peer_total: stats.get_me_idle_close_by_peer_total(),
|
||||
route_drop_no_conn_total: stats.get_me_route_drop_no_conn(),
|
||||
route_drop_channel_closed_total: stats.get_me_route_drop_channel_closed(),
|
||||
route_drop_queue_full_total: stats.get_me_route_drop_queue_full(),
|
||||
route_drop_queue_full_base_total: stats.get_me_route_drop_queue_full_base(),
|
||||
route_drop_queue_full_high_total: stats.get_me_route_drop_queue_full_high(),
|
||||
socks_kdf_strict_reject_total: stats.get_me_socks_kdf_strict_reject(),
|
||||
socks_kdf_compat_fallback_total: stats.get_me_socks_kdf_compat_fallback(),
|
||||
endpoint_quarantine_total: stats.get_me_endpoint_quarantine_total(),
|
||||
kdf_drift_total: stats.get_me_kdf_drift_total(),
|
||||
kdf_port_only_drift_total: stats.get_me_kdf_port_only_drift_total(),
|
||||
hardswap_pending_reuse_total: stats.get_me_hardswap_pending_reuse_total(),
|
||||
hardswap_pending_ttl_expired_total: stats.get_me_hardswap_pending_ttl_expired_total(),
|
||||
single_endpoint_outage_enter_total: stats.get_me_single_endpoint_outage_enter_total(),
|
||||
single_endpoint_outage_exit_total: stats.get_me_single_endpoint_outage_exit_total(),
|
||||
single_endpoint_outage_reconnect_attempt_total: stats
|
||||
.get_me_single_endpoint_outage_reconnect_attempt_total(),
|
||||
single_endpoint_outage_reconnect_success_total: stats
|
||||
.get_me_single_endpoint_outage_reconnect_success_total(),
|
||||
single_endpoint_quarantine_bypass_total: stats
|
||||
.get_me_single_endpoint_quarantine_bypass_total(),
|
||||
single_endpoint_shadow_rotate_total: stats.get_me_single_endpoint_shadow_rotate_total(),
|
||||
single_endpoint_shadow_rotate_skipped_quarantine_total: stats
|
||||
.get_me_single_endpoint_shadow_rotate_skipped_quarantine_total(),
|
||||
floor_mode_switch_total: stats.get_me_floor_mode_switch_total(),
|
||||
floor_mode_switch_static_to_adaptive_total: stats
|
||||
.get_me_floor_mode_switch_static_to_adaptive_total(),
|
||||
floor_mode_switch_adaptive_to_static_total: stats
|
||||
.get_me_floor_mode_switch_adaptive_to_static_total(),
|
||||
},
|
||||
pool: ZeroPoolData {
|
||||
pool_swap_total: stats.get_pool_swap_total(),
|
||||
pool_drain_active: stats.get_pool_drain_active(),
|
||||
pool_force_close_total: stats.get_pool_force_close_total(),
|
||||
pool_stale_pick_total: stats.get_pool_stale_pick_total(),
|
||||
writer_removed_total: stats.get_me_writer_removed_total(),
|
||||
writer_removed_unexpected_total: stats.get_me_writer_removed_unexpected_total(),
|
||||
refill_triggered_total: stats.get_me_refill_triggered_total(),
|
||||
refill_skipped_inflight_total: stats.get_me_refill_skipped_inflight_total(),
|
||||
refill_failed_total: stats.get_me_refill_failed_total(),
|
||||
writer_restored_same_endpoint_total: stats.get_me_writer_restored_same_endpoint_total(),
|
||||
writer_restored_fallback_total: stats.get_me_writer_restored_fallback_total(),
|
||||
},
|
||||
desync: ZeroDesyncData {
|
||||
secure_padding_invalid_total: stats.get_secure_padding_invalid(),
|
||||
desync_total: stats.get_desync_total(),
|
||||
desync_full_logged_total: stats.get_desync_full_logged(),
|
||||
desync_suppressed_total: stats.get_desync_suppressed(),
|
||||
desync_frames_bucket_0: stats.get_desync_frames_bucket_0(),
|
||||
desync_frames_bucket_1_2: stats.get_desync_frames_bucket_1_2(),
|
||||
desync_frames_bucket_3_10: stats.get_desync_frames_bucket_3_10(),
|
||||
desync_frames_bucket_gt_10: stats.get_desync_frames_bucket_gt_10(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn build_zero_upstream_data(stats: &Stats) -> ZeroUpstreamData {
|
||||
ZeroUpstreamData {
|
||||
connect_attempt_total: stats.get_upstream_connect_attempt_total(),
|
||||
connect_success_total: stats.get_upstream_connect_success_total(),
|
||||
connect_fail_total: stats.get_upstream_connect_fail_total(),
|
||||
connect_failfast_hard_error_total: stats.get_upstream_connect_failfast_hard_error_total(),
|
||||
connect_attempts_bucket_1: stats.get_upstream_connect_attempts_bucket_1(),
|
||||
connect_attempts_bucket_2: stats.get_upstream_connect_attempts_bucket_2(),
|
||||
connect_attempts_bucket_3_4: stats.get_upstream_connect_attempts_bucket_3_4(),
|
||||
connect_attempts_bucket_gt_4: stats.get_upstream_connect_attempts_bucket_gt_4(),
|
||||
connect_duration_success_bucket_le_100ms: stats
|
||||
.get_upstream_connect_duration_success_bucket_le_100ms(),
|
||||
connect_duration_success_bucket_101_500ms: stats
|
||||
.get_upstream_connect_duration_success_bucket_101_500ms(),
|
||||
connect_duration_success_bucket_501_1000ms: stats
|
||||
.get_upstream_connect_duration_success_bucket_501_1000ms(),
|
||||
connect_duration_success_bucket_gt_1000ms: stats
|
||||
.get_upstream_connect_duration_success_bucket_gt_1000ms(),
|
||||
connect_duration_fail_bucket_le_100ms: stats.get_upstream_connect_duration_fail_bucket_le_100ms(),
|
||||
connect_duration_fail_bucket_101_500ms: stats
|
||||
.get_upstream_connect_duration_fail_bucket_101_500ms(),
|
||||
connect_duration_fail_bucket_501_1000ms: stats
|
||||
.get_upstream_connect_duration_fail_bucket_501_1000ms(),
|
||||
connect_duration_fail_bucket_gt_1000ms: stats
|
||||
.get_upstream_connect_duration_fail_bucket_gt_1000ms(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn build_upstreams_data(shared: &ApiShared, api_cfg: &ApiConfig) -> UpstreamsData {
|
||||
let generated_at_epoch_secs = now_epoch_secs();
|
||||
let zero = build_zero_upstream_data(&shared.stats);
|
||||
if !api_cfg.minimal_runtime_enabled {
|
||||
return UpstreamsData {
|
||||
enabled: false,
|
||||
reason: Some(FEATURE_DISABLED_REASON),
|
||||
generated_at_epoch_secs,
|
||||
zero,
|
||||
summary: None,
|
||||
upstreams: None,
|
||||
};
|
||||
}
|
||||
|
||||
let Some(snapshot) = shared.upstream_manager.try_api_snapshot() else {
|
||||
return UpstreamsData {
|
||||
enabled: true,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs,
|
||||
zero,
|
||||
summary: None,
|
||||
upstreams: None,
|
||||
};
|
||||
};
|
||||
|
||||
let summary = UpstreamSummaryData {
|
||||
configured_total: snapshot.summary.configured_total,
|
||||
healthy_total: snapshot.summary.healthy_total,
|
||||
unhealthy_total: snapshot.summary.unhealthy_total,
|
||||
direct_total: snapshot.summary.direct_total,
|
||||
socks4_total: snapshot.summary.socks4_total,
|
||||
socks5_total: snapshot.summary.socks5_total,
|
||||
};
|
||||
let upstreams = snapshot
|
||||
.upstreams
|
||||
.into_iter()
|
||||
.map(|upstream| UpstreamStatus {
|
||||
upstream_id: upstream.upstream_id,
|
||||
route_kind: map_route_kind(upstream.route_kind),
|
||||
address: upstream.address,
|
||||
weight: upstream.weight,
|
||||
scopes: upstream.scopes,
|
||||
healthy: upstream.healthy,
|
||||
fails: upstream.fails,
|
||||
last_check_age_secs: upstream.last_check_age_secs,
|
||||
effective_latency_ms: upstream.effective_latency_ms,
|
||||
dc: upstream
|
||||
.dc
|
||||
.into_iter()
|
||||
.map(|dc| UpstreamDcStatus {
|
||||
dc: dc.dc,
|
||||
latency_ema_ms: dc.latency_ema_ms,
|
||||
ip_preference: map_ip_preference(dc.ip_preference),
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
UpstreamsData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
zero,
|
||||
summary: Some(summary),
|
||||
upstreams: Some(upstreams),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_minimal_all_data(
|
||||
shared: &ApiShared,
|
||||
api_cfg: &ApiConfig,
|
||||
) -> MinimalAllData {
|
||||
let now = now_epoch_secs();
|
||||
if !api_cfg.minimal_runtime_enabled {
|
||||
return MinimalAllData {
|
||||
enabled: false,
|
||||
reason: Some(FEATURE_DISABLED_REASON),
|
||||
generated_at_epoch_secs: now,
|
||||
data: None,
|
||||
};
|
||||
}
|
||||
|
||||
let Some((generated_at_epoch_secs, payload)) =
|
||||
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
|
||||
else {
|
||||
return MinimalAllData {
|
||||
enabled: true,
|
||||
reason: Some(SOURCE_UNAVAILABLE_REASON),
|
||||
generated_at_epoch_secs: now,
|
||||
data: Some(MinimalAllPayload {
|
||||
me_writers: disabled_me_writers(now, SOURCE_UNAVAILABLE_REASON),
|
||||
dcs: disabled_dcs(now, SOURCE_UNAVAILABLE_REASON),
|
||||
me_runtime: None,
|
||||
network_path: Vec::new(),
|
||||
}),
|
||||
};
|
||||
};
|
||||
|
||||
MinimalAllData {
|
||||
enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
data: Some(payload),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_me_writers_data(
|
||||
shared: &ApiShared,
|
||||
api_cfg: &ApiConfig,
|
||||
) -> MeWritersData {
|
||||
let now = now_epoch_secs();
|
||||
if !api_cfg.minimal_runtime_enabled {
|
||||
return disabled_me_writers(now, FEATURE_DISABLED_REASON);
|
||||
}
|
||||
|
||||
let Some((_, payload)) =
|
||||
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
|
||||
else {
|
||||
return disabled_me_writers(now, SOURCE_UNAVAILABLE_REASON);
|
||||
};
|
||||
payload.me_writers
|
||||
}
|
||||
|
||||
pub(super) async fn build_dcs_data(shared: &ApiShared, api_cfg: &ApiConfig) -> DcStatusData {
|
||||
let now = now_epoch_secs();
|
||||
if !api_cfg.minimal_runtime_enabled {
|
||||
return disabled_dcs(now, FEATURE_DISABLED_REASON);
|
||||
}
|
||||
|
||||
let Some((_, payload)) =
|
||||
get_minimal_payload_cached(shared, api_cfg.minimal_runtime_cache_ttl_ms).await
|
||||
else {
|
||||
return disabled_dcs(now, SOURCE_UNAVAILABLE_REASON);
|
||||
};
|
||||
payload.dcs
|
||||
}
|
||||
|
||||
async fn get_minimal_payload_cached(
|
||||
shared: &ApiShared,
|
||||
cache_ttl_ms: u64,
|
||||
) -> Option<(u64, MinimalAllPayload)> {
|
||||
if cache_ttl_ms > 0 {
|
||||
let now = Instant::now();
|
||||
let cached = shared.minimal_cache.lock().await.clone();
|
||||
if let Some(entry) = cached
|
||||
&& now < entry.expires_at
|
||||
{
|
||||
return Some((entry.generated_at_epoch_secs, entry.payload));
|
||||
}
|
||||
}
|
||||
|
||||
let pool = shared.me_pool.read().await.clone()?;
|
||||
let status = pool.api_status_snapshot().await;
|
||||
let runtime = pool.api_runtime_snapshot().await;
|
||||
let generated_at_epoch_secs = status.generated_at_epoch_secs;
|
||||
|
||||
let me_writers = MeWritersData {
|
||||
middle_proxy_enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
summary: MeWritersSummary {
|
||||
configured_dc_groups: status.configured_dc_groups,
|
||||
configured_endpoints: status.configured_endpoints,
|
||||
available_endpoints: status.available_endpoints,
|
||||
available_pct: status.available_pct,
|
||||
required_writers: status.required_writers,
|
||||
alive_writers: status.alive_writers,
|
||||
coverage_pct: status.coverage_pct,
|
||||
},
|
||||
writers: status
|
||||
.writers
|
||||
.into_iter()
|
||||
.map(|entry| MeWriterStatus {
|
||||
writer_id: entry.writer_id,
|
||||
dc: entry.dc,
|
||||
endpoint: entry.endpoint.to_string(),
|
||||
generation: entry.generation,
|
||||
state: entry.state,
|
||||
draining: entry.draining,
|
||||
degraded: entry.degraded,
|
||||
bound_clients: entry.bound_clients,
|
||||
idle_for_secs: entry.idle_for_secs,
|
||||
rtt_ema_ms: entry.rtt_ema_ms,
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
let dcs = DcStatusData {
|
||||
middle_proxy_enabled: true,
|
||||
reason: None,
|
||||
generated_at_epoch_secs,
|
||||
dcs: status
|
||||
.dcs
|
||||
.into_iter()
|
||||
.map(|entry| DcStatus {
|
||||
dc: entry.dc,
|
||||
endpoints: entry
|
||||
.endpoints
|
||||
.into_iter()
|
||||
.map(|value| value.to_string())
|
||||
.collect(),
|
||||
endpoint_writers: entry
|
||||
.endpoint_writers
|
||||
.into_iter()
|
||||
.map(|coverage| DcEndpointWriters {
|
||||
endpoint: coverage.endpoint.to_string(),
|
||||
active_writers: coverage.active_writers,
|
||||
})
|
||||
.collect(),
|
||||
available_endpoints: entry.available_endpoints,
|
||||
available_pct: entry.available_pct,
|
||||
required_writers: entry.required_writers,
|
||||
floor_min: entry.floor_min,
|
||||
floor_target: entry.floor_target,
|
||||
floor_max: entry.floor_max,
|
||||
floor_capped: entry.floor_capped,
|
||||
alive_writers: entry.alive_writers,
|
||||
coverage_pct: entry.coverage_pct,
|
||||
rtt_ms: entry.rtt_ms,
|
||||
load: entry.load,
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
let me_runtime = MinimalMeRuntimeData {
|
||||
active_generation: runtime.active_generation,
|
||||
warm_generation: runtime.warm_generation,
|
||||
pending_hardswap_generation: runtime.pending_hardswap_generation,
|
||||
pending_hardswap_age_secs: runtime.pending_hardswap_age_secs,
|
||||
hardswap_enabled: runtime.hardswap_enabled,
|
||||
floor_mode: runtime.floor_mode,
|
||||
adaptive_floor_idle_secs: runtime.adaptive_floor_idle_secs,
|
||||
adaptive_floor_min_writers_single_endpoint: runtime
|
||||
.adaptive_floor_min_writers_single_endpoint,
|
||||
adaptive_floor_min_writers_multi_endpoint: runtime
|
||||
.adaptive_floor_min_writers_multi_endpoint,
|
||||
adaptive_floor_recover_grace_secs: runtime.adaptive_floor_recover_grace_secs,
|
||||
adaptive_floor_writers_per_core_total: runtime
|
||||
.adaptive_floor_writers_per_core_total,
|
||||
adaptive_floor_cpu_cores_override: runtime.adaptive_floor_cpu_cores_override,
|
||||
adaptive_floor_max_extra_writers_single_per_core: runtime
|
||||
.adaptive_floor_max_extra_writers_single_per_core,
|
||||
adaptive_floor_max_extra_writers_multi_per_core: runtime
|
||||
.adaptive_floor_max_extra_writers_multi_per_core,
|
||||
adaptive_floor_max_active_writers_per_core: runtime
|
||||
.adaptive_floor_max_active_writers_per_core,
|
||||
adaptive_floor_max_warm_writers_per_core: runtime
|
||||
.adaptive_floor_max_warm_writers_per_core,
|
||||
adaptive_floor_max_active_writers_global: runtime
|
||||
.adaptive_floor_max_active_writers_global,
|
||||
adaptive_floor_max_warm_writers_global: runtime
|
||||
.adaptive_floor_max_warm_writers_global,
|
||||
adaptive_floor_cpu_cores_detected: runtime.adaptive_floor_cpu_cores_detected,
|
||||
adaptive_floor_cpu_cores_effective: runtime.adaptive_floor_cpu_cores_effective,
|
||||
adaptive_floor_global_cap_raw: runtime.adaptive_floor_global_cap_raw,
|
||||
adaptive_floor_global_cap_effective: runtime.adaptive_floor_global_cap_effective,
|
||||
adaptive_floor_target_writers_total: runtime.adaptive_floor_target_writers_total,
|
||||
adaptive_floor_active_cap_configured: runtime.adaptive_floor_active_cap_configured,
|
||||
adaptive_floor_active_cap_effective: runtime.adaptive_floor_active_cap_effective,
|
||||
adaptive_floor_warm_cap_configured: runtime.adaptive_floor_warm_cap_configured,
|
||||
adaptive_floor_warm_cap_effective: runtime.adaptive_floor_warm_cap_effective,
|
||||
adaptive_floor_active_writers_current: runtime.adaptive_floor_active_writers_current,
|
||||
adaptive_floor_warm_writers_current: runtime.adaptive_floor_warm_writers_current,
|
||||
me_keepalive_enabled: runtime.me_keepalive_enabled,
|
||||
me_keepalive_interval_secs: runtime.me_keepalive_interval_secs,
|
||||
me_keepalive_jitter_secs: runtime.me_keepalive_jitter_secs,
|
||||
me_keepalive_payload_random: runtime.me_keepalive_payload_random,
|
||||
rpc_proxy_req_every_secs: runtime.rpc_proxy_req_every_secs,
|
||||
me_reconnect_max_concurrent_per_dc: runtime.me_reconnect_max_concurrent_per_dc,
|
||||
me_reconnect_backoff_base_ms: runtime.me_reconnect_backoff_base_ms,
|
||||
me_reconnect_backoff_cap_ms: runtime.me_reconnect_backoff_cap_ms,
|
||||
me_reconnect_fast_retry_count: runtime.me_reconnect_fast_retry_count,
|
||||
me_pool_drain_ttl_secs: runtime.me_pool_drain_ttl_secs,
|
||||
me_pool_force_close_secs: runtime.me_pool_force_close_secs,
|
||||
me_pool_min_fresh_ratio: runtime.me_pool_min_fresh_ratio,
|
||||
me_bind_stale_mode: runtime.me_bind_stale_mode,
|
||||
me_bind_stale_ttl_secs: runtime.me_bind_stale_ttl_secs,
|
||||
me_single_endpoint_shadow_writers: runtime.me_single_endpoint_shadow_writers,
|
||||
me_single_endpoint_outage_mode_enabled: runtime.me_single_endpoint_outage_mode_enabled,
|
||||
me_single_endpoint_outage_disable_quarantine: runtime
|
||||
.me_single_endpoint_outage_disable_quarantine,
|
||||
me_single_endpoint_outage_backoff_min_ms: runtime.me_single_endpoint_outage_backoff_min_ms,
|
||||
me_single_endpoint_outage_backoff_max_ms: runtime.me_single_endpoint_outage_backoff_max_ms,
|
||||
me_single_endpoint_shadow_rotate_every_secs: runtime
|
||||
.me_single_endpoint_shadow_rotate_every_secs,
|
||||
me_deterministic_writer_sort: runtime.me_deterministic_writer_sort,
|
||||
me_writer_pick_mode: runtime.me_writer_pick_mode,
|
||||
me_writer_pick_sample_size: runtime.me_writer_pick_sample_size,
|
||||
me_socks_kdf_policy: runtime.me_socks_kdf_policy,
|
||||
quarantined_endpoints_total: runtime.quarantined_endpoints.len(),
|
||||
quarantined_endpoints: runtime
|
||||
.quarantined_endpoints
|
||||
.into_iter()
|
||||
.map(|entry| MinimalQuarantineData {
|
||||
endpoint: entry.endpoint.to_string(),
|
||||
remaining_ms: entry.remaining_ms,
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
let network_path = runtime
|
||||
.network_path
|
||||
.into_iter()
|
||||
.map(|entry| MinimalDcPathData {
|
||||
dc: entry.dc,
|
||||
ip_preference: entry.ip_preference,
|
||||
selected_addr_v4: entry.selected_addr_v4.map(|value| value.to_string()),
|
||||
selected_addr_v6: entry.selected_addr_v6.map(|value| value.to_string()),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let payload = MinimalAllPayload {
|
||||
me_writers,
|
||||
dcs,
|
||||
me_runtime: Some(me_runtime),
|
||||
network_path,
|
||||
};
|
||||
|
||||
if cache_ttl_ms > 0 {
|
||||
let entry = MinimalCacheEntry {
|
||||
expires_at: Instant::now() + Duration::from_millis(cache_ttl_ms),
|
||||
payload: payload.clone(),
|
||||
generated_at_epoch_secs,
|
||||
};
|
||||
*shared.minimal_cache.lock().await = Some(entry);
|
||||
}
|
||||
|
||||
Some((generated_at_epoch_secs, payload))
|
||||
}
|
||||
|
||||
fn disabled_me_writers(now_epoch_secs: u64, reason: &'static str) -> MeWritersData {
|
||||
MeWritersData {
|
||||
middle_proxy_enabled: false,
|
||||
reason: Some(reason),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
summary: MeWritersSummary {
|
||||
configured_dc_groups: 0,
|
||||
configured_endpoints: 0,
|
||||
available_endpoints: 0,
|
||||
available_pct: 0.0,
|
||||
required_writers: 0,
|
||||
alive_writers: 0,
|
||||
coverage_pct: 0.0,
|
||||
},
|
||||
writers: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn disabled_dcs(now_epoch_secs: u64, reason: &'static str) -> DcStatusData {
|
||||
DcStatusData {
|
||||
middle_proxy_enabled: false,
|
||||
reason: Some(reason),
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
dcs: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_route_kind(value: UpstreamRouteKind) -> &'static str {
|
||||
match value {
|
||||
UpstreamRouteKind::Direct => "direct",
|
||||
UpstreamRouteKind::Socks4 => "socks4",
|
||||
UpstreamRouteKind::Socks5 => "socks5",
|
||||
}
|
||||
}
|
||||
|
||||
fn map_ip_preference(value: IpPreference) -> &'static str {
|
||||
match value {
|
||||
IpPreference::Unknown => "unknown",
|
||||
IpPreference::PreferV6 => "prefer_v6",
|
||||
IpPreference::PreferV4 => "prefer_v4",
|
||||
IpPreference::BothWork => "both_work",
|
||||
IpPreference::Unavailable => "unavailable",
|
||||
}
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
66
src/api/runtime_watch.rs
Normal file
66
src/api/runtime_watch.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use tokio::sync::watch;
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
|
||||
use super::ApiRuntimeState;
|
||||
use super::events::ApiEventStore;
|
||||
|
||||
pub(super) fn spawn_runtime_watchers(
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
admission_rx: watch::Receiver<bool>,
|
||||
runtime_state: Arc<ApiRuntimeState>,
|
||||
runtime_events: Arc<ApiEventStore>,
|
||||
) {
|
||||
let mut config_rx_reload = config_rx;
|
||||
let runtime_state_reload = runtime_state.clone();
|
||||
let runtime_events_reload = runtime_events.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
if config_rx_reload.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
runtime_state_reload
|
||||
.config_reload_count
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
runtime_state_reload
|
||||
.last_config_reload_epoch_secs
|
||||
.store(now_epoch_secs(), Ordering::Relaxed);
|
||||
runtime_events_reload.record("config.reload.applied", "config receiver updated");
|
||||
}
|
||||
});
|
||||
|
||||
let mut admission_rx_watch = admission_rx;
|
||||
tokio::spawn(async move {
|
||||
runtime_state
|
||||
.admission_open
|
||||
.store(*admission_rx_watch.borrow(), Ordering::Relaxed);
|
||||
runtime_events.record(
|
||||
"admission.state",
|
||||
format!("accepting_new_connections={}", *admission_rx_watch.borrow()),
|
||||
);
|
||||
loop {
|
||||
if admission_rx_watch.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
let admission_open = *admission_rx_watch.borrow();
|
||||
runtime_state
|
||||
.admission_open
|
||||
.store(admission_open, Ordering::Relaxed);
|
||||
runtime_events.record(
|
||||
"admission.state",
|
||||
format!("accepting_new_connections={}", admission_open),
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
305
src/api/runtime_zero.rs
Normal file
305
src/api/runtime_zero.rs
Normal file
@@ -0,0 +1,305 @@
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::config::{MeFloorMode, MeWriterPickMode, ProxyConfig, UserMaxUniqueIpsMode};
|
||||
use crate::proxy::route_mode::RelayRouteMode;
|
||||
|
||||
use super::ApiShared;
|
||||
use super::runtime_init::build_runtime_startup_summary;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct SystemInfoData {
|
||||
pub(super) version: String,
|
||||
pub(super) target_arch: String,
|
||||
pub(super) target_os: String,
|
||||
pub(super) build_profile: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) git_commit: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) build_time_utc: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) rustc_version: Option<String>,
|
||||
pub(super) process_started_at_epoch_secs: u64,
|
||||
pub(super) uptime_seconds: f64,
|
||||
pub(super) config_path: String,
|
||||
pub(super) config_hash: String,
|
||||
pub(super) config_reload_count: u64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) last_config_reload_epoch_secs: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct RuntimeGatesData {
|
||||
pub(super) accepting_new_connections: bool,
|
||||
pub(super) conditional_cast_enabled: bool,
|
||||
pub(super) me_runtime_ready: bool,
|
||||
pub(super) me2dc_fallback_enabled: bool,
|
||||
pub(super) use_middle_proxy: bool,
|
||||
pub(super) route_mode: &'static str,
|
||||
pub(super) reroute_active: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) reroute_to_direct_at_epoch_secs: Option<u64>,
|
||||
pub(super) startup_status: &'static str,
|
||||
pub(super) startup_stage: String,
|
||||
pub(super) startup_progress_pct: f64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct EffectiveTimeoutLimits {
|
||||
pub(super) client_handshake_secs: u64,
|
||||
pub(super) tg_connect_secs: u64,
|
||||
pub(super) client_keepalive_secs: u64,
|
||||
pub(super) client_ack_secs: u64,
|
||||
pub(super) me_one_retry: u8,
|
||||
pub(super) me_one_timeout_ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct EffectiveUpstreamLimits {
|
||||
pub(super) connect_retry_attempts: u32,
|
||||
pub(super) connect_retry_backoff_ms: u64,
|
||||
pub(super) connect_budget_ms: u64,
|
||||
pub(super) unhealthy_fail_threshold: u32,
|
||||
pub(super) connect_failfast_hard_errors: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct EffectiveMiddleProxyLimits {
|
||||
pub(super) floor_mode: &'static str,
|
||||
pub(super) adaptive_floor_idle_secs: u64,
|
||||
pub(super) adaptive_floor_min_writers_single_endpoint: u8,
|
||||
pub(super) adaptive_floor_min_writers_multi_endpoint: u8,
|
||||
pub(super) adaptive_floor_recover_grace_secs: u64,
|
||||
pub(super) adaptive_floor_writers_per_core_total: u16,
|
||||
pub(super) adaptive_floor_cpu_cores_override: u16,
|
||||
pub(super) adaptive_floor_max_extra_writers_single_per_core: u16,
|
||||
pub(super) adaptive_floor_max_extra_writers_multi_per_core: u16,
|
||||
pub(super) adaptive_floor_max_active_writers_per_core: u16,
|
||||
pub(super) adaptive_floor_max_warm_writers_per_core: u16,
|
||||
pub(super) adaptive_floor_max_active_writers_global: u32,
|
||||
pub(super) adaptive_floor_max_warm_writers_global: u32,
|
||||
pub(super) reconnect_max_concurrent_per_dc: u32,
|
||||
pub(super) reconnect_backoff_base_ms: u64,
|
||||
pub(super) reconnect_backoff_cap_ms: u64,
|
||||
pub(super) reconnect_fast_retry_count: u32,
|
||||
pub(super) writer_pick_mode: &'static str,
|
||||
pub(super) writer_pick_sample_size: u8,
|
||||
pub(super) me2dc_fallback: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct EffectiveUserIpPolicyLimits {
|
||||
pub(super) mode: &'static str,
|
||||
pub(super) window_secs: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct EffectiveLimitsData {
|
||||
pub(super) update_every_secs: u64,
|
||||
pub(super) me_reinit_every_secs: u64,
|
||||
pub(super) me_pool_force_close_secs: u64,
|
||||
pub(super) timeouts: EffectiveTimeoutLimits,
|
||||
pub(super) upstream: EffectiveUpstreamLimits,
|
||||
pub(super) middle_proxy: EffectiveMiddleProxyLimits,
|
||||
pub(super) user_ip_policy: EffectiveUserIpPolicyLimits,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub(super) struct SecurityPostureData {
|
||||
pub(super) api_read_only: bool,
|
||||
pub(super) api_whitelist_enabled: bool,
|
||||
pub(super) api_whitelist_entries: usize,
|
||||
pub(super) api_auth_header_enabled: bool,
|
||||
pub(super) proxy_protocol_enabled: bool,
|
||||
pub(super) log_level: String,
|
||||
pub(super) telemetry_core_enabled: bool,
|
||||
pub(super) telemetry_user_enabled: bool,
|
||||
pub(super) telemetry_me_level: String,
|
||||
}
|
||||
|
||||
pub(super) fn build_system_info_data(
|
||||
shared: &ApiShared,
|
||||
_cfg: &ProxyConfig,
|
||||
revision: &str,
|
||||
) -> SystemInfoData {
|
||||
let last_reload_epoch_secs = shared
|
||||
.runtime_state
|
||||
.last_config_reload_epoch_secs
|
||||
.load(Ordering::Relaxed);
|
||||
let last_config_reload_epoch_secs = (last_reload_epoch_secs > 0).then_some(last_reload_epoch_secs);
|
||||
|
||||
let git_commit = option_env!("TELEMT_GIT_COMMIT")
|
||||
.or(option_env!("VERGEN_GIT_SHA"))
|
||||
.or(option_env!("GIT_COMMIT"))
|
||||
.map(ToString::to_string);
|
||||
let build_time_utc = option_env!("BUILD_TIME_UTC")
|
||||
.or(option_env!("VERGEN_BUILD_TIMESTAMP"))
|
||||
.map(ToString::to_string);
|
||||
let rustc_version = option_env!("RUSTC_VERSION")
|
||||
.or(option_env!("VERGEN_RUSTC_SEMVER"))
|
||||
.map(ToString::to_string);
|
||||
|
||||
SystemInfoData {
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
target_arch: std::env::consts::ARCH.to_string(),
|
||||
target_os: std::env::consts::OS.to_string(),
|
||||
build_profile: option_env!("PROFILE").unwrap_or("unknown").to_string(),
|
||||
git_commit,
|
||||
build_time_utc,
|
||||
rustc_version,
|
||||
process_started_at_epoch_secs: shared.runtime_state.process_started_at_epoch_secs,
|
||||
uptime_seconds: shared.stats.uptime_secs(),
|
||||
config_path: shared.config_path.display().to_string(),
|
||||
config_hash: revision.to_string(),
|
||||
config_reload_count: shared.runtime_state.config_reload_count.load(Ordering::Relaxed),
|
||||
last_config_reload_epoch_secs,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_runtime_gates_data(
|
||||
shared: &ApiShared,
|
||||
cfg: &ProxyConfig,
|
||||
) -> RuntimeGatesData {
|
||||
let startup_summary = build_runtime_startup_summary(shared).await;
|
||||
let route_state = shared.route_runtime.snapshot();
|
||||
let route_mode = route_state.mode.as_str();
|
||||
let reroute_active = cfg.general.use_middle_proxy
|
||||
&& cfg.general.me2dc_fallback
|
||||
&& matches!(route_state.mode, RelayRouteMode::Direct);
|
||||
let reroute_to_direct_at_epoch_secs = if reroute_active {
|
||||
shared.route_runtime.direct_since_epoch_secs()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let me_runtime_ready = if !cfg.general.use_middle_proxy {
|
||||
true
|
||||
} else {
|
||||
shared
|
||||
.me_pool
|
||||
.read()
|
||||
.await
|
||||
.as_ref()
|
||||
.map(|pool| pool.is_runtime_ready())
|
||||
.unwrap_or(false)
|
||||
};
|
||||
|
||||
RuntimeGatesData {
|
||||
accepting_new_connections: shared.runtime_state.admission_open.load(Ordering::Relaxed),
|
||||
conditional_cast_enabled: cfg.general.use_middle_proxy,
|
||||
me_runtime_ready,
|
||||
me2dc_fallback_enabled: cfg.general.me2dc_fallback,
|
||||
use_middle_proxy: cfg.general.use_middle_proxy,
|
||||
route_mode,
|
||||
reroute_active,
|
||||
reroute_to_direct_at_epoch_secs,
|
||||
startup_status: startup_summary.status,
|
||||
startup_stage: startup_summary.stage,
|
||||
startup_progress_pct: startup_summary.progress_pct,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn build_limits_effective_data(cfg: &ProxyConfig) -> EffectiveLimitsData {
|
||||
EffectiveLimitsData {
|
||||
update_every_secs: cfg.general.effective_update_every_secs(),
|
||||
me_reinit_every_secs: cfg.general.effective_me_reinit_every_secs(),
|
||||
me_pool_force_close_secs: cfg.general.effective_me_pool_force_close_secs(),
|
||||
timeouts: EffectiveTimeoutLimits {
|
||||
client_handshake_secs: cfg.timeouts.client_handshake,
|
||||
tg_connect_secs: cfg.timeouts.tg_connect,
|
||||
client_keepalive_secs: cfg.timeouts.client_keepalive,
|
||||
client_ack_secs: cfg.timeouts.client_ack,
|
||||
me_one_retry: cfg.timeouts.me_one_retry,
|
||||
me_one_timeout_ms: cfg.timeouts.me_one_timeout_ms,
|
||||
},
|
||||
upstream: EffectiveUpstreamLimits {
|
||||
connect_retry_attempts: cfg.general.upstream_connect_retry_attempts,
|
||||
connect_retry_backoff_ms: cfg.general.upstream_connect_retry_backoff_ms,
|
||||
connect_budget_ms: cfg.general.upstream_connect_budget_ms,
|
||||
unhealthy_fail_threshold: cfg.general.upstream_unhealthy_fail_threshold,
|
||||
connect_failfast_hard_errors: cfg.general.upstream_connect_failfast_hard_errors,
|
||||
},
|
||||
middle_proxy: EffectiveMiddleProxyLimits {
|
||||
floor_mode: me_floor_mode_label(cfg.general.me_floor_mode),
|
||||
adaptive_floor_idle_secs: cfg.general.me_adaptive_floor_idle_secs,
|
||||
adaptive_floor_min_writers_single_endpoint: cfg
|
||||
.general
|
||||
.me_adaptive_floor_min_writers_single_endpoint,
|
||||
adaptive_floor_min_writers_multi_endpoint: cfg
|
||||
.general
|
||||
.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
adaptive_floor_recover_grace_secs: cfg.general.me_adaptive_floor_recover_grace_secs,
|
||||
adaptive_floor_writers_per_core_total: cfg
|
||||
.general
|
||||
.me_adaptive_floor_writers_per_core_total,
|
||||
adaptive_floor_cpu_cores_override: cfg
|
||||
.general
|
||||
.me_adaptive_floor_cpu_cores_override,
|
||||
adaptive_floor_max_extra_writers_single_per_core: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
adaptive_floor_max_extra_writers_multi_per_core: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
adaptive_floor_max_active_writers_per_core: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_active_writers_per_core,
|
||||
adaptive_floor_max_warm_writers_per_core: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_warm_writers_per_core,
|
||||
adaptive_floor_max_active_writers_global: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_active_writers_global,
|
||||
adaptive_floor_max_warm_writers_global: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_warm_writers_global,
|
||||
reconnect_max_concurrent_per_dc: cfg.general.me_reconnect_max_concurrent_per_dc,
|
||||
reconnect_backoff_base_ms: cfg.general.me_reconnect_backoff_base_ms,
|
||||
reconnect_backoff_cap_ms: cfg.general.me_reconnect_backoff_cap_ms,
|
||||
reconnect_fast_retry_count: cfg.general.me_reconnect_fast_retry_count,
|
||||
writer_pick_mode: me_writer_pick_mode_label(cfg.general.me_writer_pick_mode),
|
||||
writer_pick_sample_size: cfg.general.me_writer_pick_sample_size,
|
||||
me2dc_fallback: cfg.general.me2dc_fallback,
|
||||
},
|
||||
user_ip_policy: EffectiveUserIpPolicyLimits {
|
||||
mode: user_max_unique_ips_mode_label(cfg.access.user_max_unique_ips_mode),
|
||||
window_secs: cfg.access.user_max_unique_ips_window_secs,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn build_security_posture_data(cfg: &ProxyConfig) -> SecurityPostureData {
|
||||
SecurityPostureData {
|
||||
api_read_only: cfg.server.api.read_only,
|
||||
api_whitelist_enabled: !cfg.server.api.whitelist.is_empty(),
|
||||
api_whitelist_entries: cfg.server.api.whitelist.len(),
|
||||
api_auth_header_enabled: !cfg.server.api.auth_header.is_empty(),
|
||||
proxy_protocol_enabled: cfg.server.proxy_protocol,
|
||||
log_level: cfg.general.log_level.to_string(),
|
||||
telemetry_core_enabled: cfg.general.telemetry.core_enabled,
|
||||
telemetry_user_enabled: cfg.general.telemetry.user_enabled,
|
||||
telemetry_me_level: cfg.general.telemetry.me_level.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn user_max_unique_ips_mode_label(mode: UserMaxUniqueIpsMode) -> &'static str {
|
||||
match mode {
|
||||
UserMaxUniqueIpsMode::ActiveWindow => "active_window",
|
||||
UserMaxUniqueIpsMode::TimeWindow => "time_window",
|
||||
UserMaxUniqueIpsMode::Combined => "combined",
|
||||
}
|
||||
}
|
||||
|
||||
fn me_floor_mode_label(mode: MeFloorMode) -> &'static str {
|
||||
match mode {
|
||||
MeFloorMode::Static => "static",
|
||||
MeFloorMode::Adaptive => "adaptive",
|
||||
}
|
||||
}
|
||||
|
||||
fn me_writer_pick_mode_label(mode: MeWriterPickMode) -> &'static str {
|
||||
match mode {
|
||||
MeWriterPickMode::SortedRr => "sorted_rr",
|
||||
MeWriterPickMode::P2c => "p2c",
|
||||
}
|
||||
}
|
||||
551
src/api/users.rs
Normal file
551
src/api/users.rs
Normal file
@@ -0,0 +1,551 @@
|
||||
use std::net::IpAddr;
|
||||
|
||||
use hyper::StatusCode;
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::stats::Stats;
|
||||
|
||||
use super::ApiShared;
|
||||
use super::config_store::{
|
||||
AccessSection, ensure_expected_revision, load_config_from_disk, save_access_sections_to_disk,
|
||||
save_config_to_disk,
|
||||
};
|
||||
use super::model::{
|
||||
ApiFailure, CreateUserRequest, CreateUserResponse, PatchUserRequest, RotateSecretRequest,
|
||||
UserInfo, UserLinks, is_valid_ad_tag, is_valid_user_secret, is_valid_username,
|
||||
parse_optional_expiration, random_user_secret,
|
||||
};
|
||||
|
||||
pub(super) async fn create_user(
|
||||
body: CreateUserRequest,
|
||||
expected_revision: Option<String>,
|
||||
shared: &ApiShared,
|
||||
) -> Result<(CreateUserResponse, String), ApiFailure> {
|
||||
let touches_user_ad_tags = body.user_ad_tag.is_some();
|
||||
let touches_user_max_tcp_conns = body.max_tcp_conns.is_some();
|
||||
let touches_user_expirations = body.expiration_rfc3339.is_some();
|
||||
let touches_user_data_quota = body.data_quota_bytes.is_some();
|
||||
let touches_user_max_unique_ips = body.max_unique_ips.is_some();
|
||||
|
||||
if !is_valid_username(&body.username) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"username must match [A-Za-z0-9_.-] and be 1..64 chars",
|
||||
));
|
||||
}
|
||||
|
||||
let secret = match body.secret {
|
||||
Some(secret) => {
|
||||
if !is_valid_user_secret(&secret) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"secret must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
secret
|
||||
}
|
||||
None => random_user_secret(),
|
||||
};
|
||||
|
||||
if let Some(ad_tag) = body.user_ad_tag.as_ref() && !is_valid_ad_tag(ad_tag) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"user_ad_tag must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
|
||||
let expiration = parse_optional_expiration(body.expiration_rfc3339.as_deref())?;
|
||||
let _guard = shared.mutation_lock.lock().await;
|
||||
let mut cfg = load_config_from_disk(&shared.config_path).await?;
|
||||
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
|
||||
|
||||
if cfg.access.users.contains_key(&body.username) {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::CONFLICT,
|
||||
"user_exists",
|
||||
"User already exists",
|
||||
));
|
||||
}
|
||||
|
||||
cfg.access.users.insert(body.username.clone(), secret.clone());
|
||||
if let Some(ad_tag) = body.user_ad_tag {
|
||||
cfg.access.user_ad_tags.insert(body.username.clone(), ad_tag);
|
||||
}
|
||||
if let Some(limit) = body.max_tcp_conns {
|
||||
cfg.access.user_max_tcp_conns.insert(body.username.clone(), limit);
|
||||
}
|
||||
if let Some(expiration) = expiration {
|
||||
cfg.access
|
||||
.user_expirations
|
||||
.insert(body.username.clone(), expiration);
|
||||
}
|
||||
if let Some(quota) = body.data_quota_bytes {
|
||||
cfg.access.user_data_quota.insert(body.username.clone(), quota);
|
||||
}
|
||||
|
||||
let updated_limit = body.max_unique_ips;
|
||||
if let Some(limit) = updated_limit {
|
||||
cfg.access
|
||||
.user_max_unique_ips
|
||||
.insert(body.username.clone(), limit);
|
||||
}
|
||||
|
||||
cfg.validate()
|
||||
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
|
||||
|
||||
let mut touched_sections = vec![AccessSection::Users];
|
||||
if touches_user_ad_tags {
|
||||
touched_sections.push(AccessSection::UserAdTags);
|
||||
}
|
||||
if touches_user_max_tcp_conns {
|
||||
touched_sections.push(AccessSection::UserMaxTcpConns);
|
||||
}
|
||||
if touches_user_expirations {
|
||||
touched_sections.push(AccessSection::UserExpirations);
|
||||
}
|
||||
if touches_user_data_quota {
|
||||
touched_sections.push(AccessSection::UserDataQuota);
|
||||
}
|
||||
if touches_user_max_unique_ips {
|
||||
touched_sections.push(AccessSection::UserMaxUniqueIps);
|
||||
}
|
||||
|
||||
let revision = save_access_sections_to_disk(&shared.config_path, &cfg, &touched_sections).await?;
|
||||
drop(_guard);
|
||||
|
||||
if let Some(limit) = updated_limit {
|
||||
shared.ip_tracker.set_user_limit(&body.username, limit).await;
|
||||
}
|
||||
let (detected_ip_v4, detected_ip_v6) = shared.detected_link_ips();
|
||||
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
let user = users
|
||||
.into_iter()
|
||||
.find(|entry| entry.username == body.username)
|
||||
.unwrap_or(UserInfo {
|
||||
username: body.username.clone(),
|
||||
user_ad_tag: None,
|
||||
max_tcp_conns: None,
|
||||
expiration_rfc3339: None,
|
||||
data_quota_bytes: None,
|
||||
max_unique_ips: updated_limit,
|
||||
current_connections: 0,
|
||||
active_unique_ips: 0,
|
||||
active_unique_ips_list: Vec::new(),
|
||||
recent_unique_ips: 0,
|
||||
recent_unique_ips_list: Vec::new(),
|
||||
total_octets: 0,
|
||||
links: build_user_links(
|
||||
&cfg,
|
||||
&secret,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
),
|
||||
});
|
||||
|
||||
Ok((CreateUserResponse { user, secret }, revision))
|
||||
}
|
||||
|
||||
pub(super) async fn patch_user(
|
||||
user: &str,
|
||||
body: PatchUserRequest,
|
||||
expected_revision: Option<String>,
|
||||
shared: &ApiShared,
|
||||
) -> Result<(UserInfo, String), ApiFailure> {
|
||||
if let Some(secret) = body.secret.as_ref() && !is_valid_user_secret(secret) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"secret must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
if let Some(ad_tag) = body.user_ad_tag.as_ref() && !is_valid_ad_tag(ad_tag) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"user_ad_tag must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
let expiration = parse_optional_expiration(body.expiration_rfc3339.as_deref())?;
|
||||
let _guard = shared.mutation_lock.lock().await;
|
||||
let mut cfg = load_config_from_disk(&shared.config_path).await?;
|
||||
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
|
||||
|
||||
if !cfg.access.users.contains_key(user) {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::NOT_FOUND,
|
||||
"not_found",
|
||||
"User not found",
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(secret) = body.secret {
|
||||
cfg.access.users.insert(user.to_string(), secret);
|
||||
}
|
||||
if let Some(ad_tag) = body.user_ad_tag {
|
||||
cfg.access.user_ad_tags.insert(user.to_string(), ad_tag);
|
||||
}
|
||||
if let Some(limit) = body.max_tcp_conns {
|
||||
cfg.access.user_max_tcp_conns.insert(user.to_string(), limit);
|
||||
}
|
||||
if let Some(expiration) = expiration {
|
||||
cfg.access.user_expirations.insert(user.to_string(), expiration);
|
||||
}
|
||||
if let Some(quota) = body.data_quota_bytes {
|
||||
cfg.access.user_data_quota.insert(user.to_string(), quota);
|
||||
}
|
||||
|
||||
let mut updated_limit = None;
|
||||
if let Some(limit) = body.max_unique_ips {
|
||||
cfg.access.user_max_unique_ips.insert(user.to_string(), limit);
|
||||
updated_limit = Some(limit);
|
||||
}
|
||||
|
||||
cfg.validate()
|
||||
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
|
||||
|
||||
let revision = save_config_to_disk(&shared.config_path, &cfg).await?;
|
||||
drop(_guard);
|
||||
if let Some(limit) = updated_limit {
|
||||
shared.ip_tracker.set_user_limit(user, limit).await;
|
||||
}
|
||||
let (detected_ip_v4, detected_ip_v6) = shared.detected_link_ips();
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
let user_info = users
|
||||
.into_iter()
|
||||
.find(|entry| entry.username == user)
|
||||
.ok_or_else(|| ApiFailure::internal("failed to build updated user view"))?;
|
||||
|
||||
Ok((user_info, revision))
|
||||
}
|
||||
|
||||
pub(super) async fn rotate_secret(
|
||||
user: &str,
|
||||
body: RotateSecretRequest,
|
||||
expected_revision: Option<String>,
|
||||
shared: &ApiShared,
|
||||
) -> Result<(CreateUserResponse, String), ApiFailure> {
|
||||
let secret = body.secret.unwrap_or_else(random_user_secret);
|
||||
if !is_valid_user_secret(&secret) {
|
||||
return Err(ApiFailure::bad_request(
|
||||
"secret must be exactly 32 hex characters",
|
||||
));
|
||||
}
|
||||
|
||||
let _guard = shared.mutation_lock.lock().await;
|
||||
let mut cfg = load_config_from_disk(&shared.config_path).await?;
|
||||
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
|
||||
|
||||
if !cfg.access.users.contains_key(user) {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::NOT_FOUND,
|
||||
"not_found",
|
||||
"User not found",
|
||||
));
|
||||
}
|
||||
|
||||
cfg.access.users.insert(user.to_string(), secret.clone());
|
||||
cfg.validate()
|
||||
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
|
||||
let touched_sections = [
|
||||
AccessSection::Users,
|
||||
AccessSection::UserAdTags,
|
||||
AccessSection::UserMaxTcpConns,
|
||||
AccessSection::UserExpirations,
|
||||
AccessSection::UserDataQuota,
|
||||
AccessSection::UserMaxUniqueIps,
|
||||
];
|
||||
let revision = save_access_sections_to_disk(&shared.config_path, &cfg, &touched_sections).await?;
|
||||
drop(_guard);
|
||||
|
||||
let (detected_ip_v4, detected_ip_v6) = shared.detected_link_ips();
|
||||
let users = users_from_config(
|
||||
&cfg,
|
||||
&shared.stats,
|
||||
&shared.ip_tracker,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
)
|
||||
.await;
|
||||
let user_info = users
|
||||
.into_iter()
|
||||
.find(|entry| entry.username == user)
|
||||
.ok_or_else(|| ApiFailure::internal("failed to build updated user view"))?;
|
||||
|
||||
Ok((
|
||||
CreateUserResponse {
|
||||
user: user_info,
|
||||
secret,
|
||||
},
|
||||
revision,
|
||||
))
|
||||
}
|
||||
|
||||
pub(super) async fn delete_user(
|
||||
user: &str,
|
||||
expected_revision: Option<String>,
|
||||
shared: &ApiShared,
|
||||
) -> Result<(String, String), ApiFailure> {
|
||||
let _guard = shared.mutation_lock.lock().await;
|
||||
let mut cfg = load_config_from_disk(&shared.config_path).await?;
|
||||
ensure_expected_revision(&shared.config_path, expected_revision.as_deref()).await?;
|
||||
|
||||
if !cfg.access.users.contains_key(user) {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::NOT_FOUND,
|
||||
"not_found",
|
||||
"User not found",
|
||||
));
|
||||
}
|
||||
if cfg.access.users.len() <= 1 {
|
||||
return Err(ApiFailure::new(
|
||||
StatusCode::CONFLICT,
|
||||
"last_user_forbidden",
|
||||
"Cannot delete the last configured user",
|
||||
));
|
||||
}
|
||||
|
||||
cfg.access.users.remove(user);
|
||||
cfg.access.user_ad_tags.remove(user);
|
||||
cfg.access.user_max_tcp_conns.remove(user);
|
||||
cfg.access.user_expirations.remove(user);
|
||||
cfg.access.user_data_quota.remove(user);
|
||||
cfg.access.user_max_unique_ips.remove(user);
|
||||
|
||||
cfg.validate()
|
||||
.map_err(|e| ApiFailure::bad_request(format!("config validation failed: {}", e)))?;
|
||||
let touched_sections = [
|
||||
AccessSection::Users,
|
||||
AccessSection::UserAdTags,
|
||||
AccessSection::UserMaxTcpConns,
|
||||
AccessSection::UserExpirations,
|
||||
AccessSection::UserDataQuota,
|
||||
AccessSection::UserMaxUniqueIps,
|
||||
];
|
||||
let revision = save_access_sections_to_disk(&shared.config_path, &cfg, &touched_sections).await?;
|
||||
drop(_guard);
|
||||
shared.ip_tracker.remove_user_limit(user).await;
|
||||
shared.ip_tracker.clear_user_ips(user).await;
|
||||
|
||||
Ok((user.to_string(), revision))
|
||||
}
|
||||
|
||||
pub(super) async fn users_from_config(
|
||||
cfg: &ProxyConfig,
|
||||
stats: &Stats,
|
||||
ip_tracker: &UserIpTracker,
|
||||
startup_detected_ip_v4: Option<IpAddr>,
|
||||
startup_detected_ip_v6: Option<IpAddr>,
|
||||
) -> Vec<UserInfo> {
|
||||
let mut names = cfg.access.users.keys().cloned().collect::<Vec<_>>();
|
||||
names.sort();
|
||||
let active_ip_lists = ip_tracker.get_active_ips_for_users(&names).await;
|
||||
let recent_ip_lists = ip_tracker.get_recent_ips_for_users(&names).await;
|
||||
|
||||
let mut users = Vec::with_capacity(names.len());
|
||||
for username in names {
|
||||
let active_ip_list = active_ip_lists
|
||||
.get(&username)
|
||||
.cloned()
|
||||
.unwrap_or_else(Vec::new);
|
||||
let recent_ip_list = recent_ip_lists
|
||||
.get(&username)
|
||||
.cloned()
|
||||
.unwrap_or_else(Vec::new);
|
||||
let links = cfg
|
||||
.access
|
||||
.users
|
||||
.get(&username)
|
||||
.map(|secret| {
|
||||
build_user_links(
|
||||
cfg,
|
||||
secret,
|
||||
startup_detected_ip_v4,
|
||||
startup_detected_ip_v6,
|
||||
)
|
||||
})
|
||||
.unwrap_or(UserLinks {
|
||||
classic: Vec::new(),
|
||||
secure: Vec::new(),
|
||||
tls: Vec::new(),
|
||||
});
|
||||
users.push(UserInfo {
|
||||
user_ad_tag: cfg.access.user_ad_tags.get(&username).cloned(),
|
||||
max_tcp_conns: cfg.access.user_max_tcp_conns.get(&username).copied(),
|
||||
expiration_rfc3339: cfg
|
||||
.access
|
||||
.user_expirations
|
||||
.get(&username)
|
||||
.map(chrono::DateTime::<chrono::Utc>::to_rfc3339),
|
||||
data_quota_bytes: cfg.access.user_data_quota.get(&username).copied(),
|
||||
max_unique_ips: cfg.access.user_max_unique_ips.get(&username).copied(),
|
||||
current_connections: stats.get_user_curr_connects(&username),
|
||||
active_unique_ips: active_ip_list.len(),
|
||||
active_unique_ips_list: active_ip_list,
|
||||
recent_unique_ips: recent_ip_list.len(),
|
||||
recent_unique_ips_list: recent_ip_list,
|
||||
total_octets: stats.get_user_total_octets(&username),
|
||||
links,
|
||||
username,
|
||||
});
|
||||
}
|
||||
users
|
||||
}
|
||||
|
||||
fn build_user_links(
|
||||
cfg: &ProxyConfig,
|
||||
secret: &str,
|
||||
startup_detected_ip_v4: Option<IpAddr>,
|
||||
startup_detected_ip_v6: Option<IpAddr>,
|
||||
) -> UserLinks {
|
||||
let hosts = resolve_link_hosts(cfg, startup_detected_ip_v4, startup_detected_ip_v6);
|
||||
let port = cfg.general.links.public_port.unwrap_or(cfg.server.port);
|
||||
let tls_domains = resolve_tls_domains(cfg);
|
||||
|
||||
let mut classic = Vec::new();
|
||||
let mut secure = Vec::new();
|
||||
let mut tls = Vec::new();
|
||||
|
||||
for host in &hosts {
|
||||
if cfg.general.modes.classic {
|
||||
classic.push(format!(
|
||||
"tg://proxy?server={}&port={}&secret={}",
|
||||
host, port, secret
|
||||
));
|
||||
}
|
||||
if cfg.general.modes.secure {
|
||||
secure.push(format!(
|
||||
"tg://proxy?server={}&port={}&secret=dd{}",
|
||||
host, port, secret
|
||||
));
|
||||
}
|
||||
if cfg.general.modes.tls {
|
||||
for domain in &tls_domains {
|
||||
let domain_hex = hex::encode(domain);
|
||||
tls.push(format!(
|
||||
"tg://proxy?server={}&port={}&secret=ee{}{}",
|
||||
host, port, secret, domain_hex
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UserLinks {
|
||||
classic,
|
||||
secure,
|
||||
tls,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_link_hosts(
|
||||
cfg: &ProxyConfig,
|
||||
startup_detected_ip_v4: Option<IpAddr>,
|
||||
startup_detected_ip_v6: Option<IpAddr>,
|
||||
) -> Vec<String> {
|
||||
if let Some(host) = cfg
|
||||
.general
|
||||
.links
|
||||
.public_host
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
return vec![host.to_string()];
|
||||
}
|
||||
|
||||
let mut hosts = Vec::new();
|
||||
for listener in &cfg.server.listeners {
|
||||
if let Some(host) = listener
|
||||
.announce
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
push_unique_host(&mut hosts, host);
|
||||
continue;
|
||||
}
|
||||
if let Some(ip) = listener.announce_ip {
|
||||
if !ip.is_unspecified() {
|
||||
push_unique_host(&mut hosts, &ip.to_string());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if listener.ip.is_unspecified() {
|
||||
let detected_ip = if listener.ip.is_ipv4() {
|
||||
startup_detected_ip_v4
|
||||
} else {
|
||||
startup_detected_ip_v6
|
||||
};
|
||||
if let Some(ip) = detected_ip {
|
||||
push_unique_host(&mut hosts, &ip.to_string());
|
||||
} else {
|
||||
push_unique_host(&mut hosts, &listener.ip.to_string());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
push_unique_host(&mut hosts, &listener.ip.to_string());
|
||||
}
|
||||
|
||||
if !hosts.is_empty() {
|
||||
return hosts;
|
||||
}
|
||||
|
||||
if let Some(ip) = startup_detected_ip_v4.or(startup_detected_ip_v6) {
|
||||
return vec![ip.to_string()];
|
||||
}
|
||||
|
||||
if let Some(host) = cfg.server.listen_addr_ipv4.as_deref() {
|
||||
push_host_from_legacy_listen(&mut hosts, host);
|
||||
}
|
||||
if let Some(host) = cfg.server.listen_addr_ipv6.as_deref() {
|
||||
push_host_from_legacy_listen(&mut hosts, host);
|
||||
}
|
||||
if !hosts.is_empty() {
|
||||
return hosts;
|
||||
}
|
||||
|
||||
vec!["UNKNOWN".to_string()]
|
||||
}
|
||||
|
||||
fn push_host_from_legacy_listen(hosts: &mut Vec<String>, raw: &str) {
|
||||
let candidate = raw.trim();
|
||||
if candidate.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
match candidate.parse::<IpAddr>() {
|
||||
Ok(ip) if ip.is_unspecified() => {}
|
||||
Ok(ip) => push_unique_host(hosts, &ip.to_string()),
|
||||
Err(_) => push_unique_host(hosts, candidate),
|
||||
}
|
||||
}
|
||||
|
||||
fn push_unique_host(hosts: &mut Vec<String>, candidate: &str) {
|
||||
if !hosts.iter().any(|existing| existing == candidate) {
|
||||
hosts.push(candidate.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_tls_domains(cfg: &ProxyConfig) -> Vec<&str> {
|
||||
let mut domains = Vec::with_capacity(1 + cfg.censorship.tls_domains.len());
|
||||
let primary = cfg.censorship.tls_domain.as_str();
|
||||
if !primary.is_empty() {
|
||||
domains.push(primary);
|
||||
}
|
||||
for domain in &cfg.censorship.tls_domains {
|
||||
let value = domain.as_str();
|
||||
if value.is_empty() || domains.contains(&value) {
|
||||
continue;
|
||||
}
|
||||
domains.push(value);
|
||||
}
|
||||
domains
|
||||
}
|
||||
@@ -11,9 +11,35 @@ const DEFAULT_ME_RECONNECT_FAST_RETRY_COUNT: u32 = 16;
|
||||
const DEFAULT_ME_SINGLE_ENDPOINT_SHADOW_WRITERS: u8 = 2;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_IDLE_SECS: u64 = 90;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MIN_WRITERS_SINGLE_ENDPOINT: u8 = 1;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MIN_WRITERS_MULTI_ENDPOINT: u8 = 1;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_RECOVER_GRACE_SECS: u64 = 180;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_WRITERS_PER_CORE_TOTAL: u16 = 48;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_CPU_CORES_OVERRIDE: u16 = 0;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MAX_EXTRA_WRITERS_SINGLE_PER_CORE: u16 = 1;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MAX_EXTRA_WRITERS_MULTI_PER_CORE: u16 = 2;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MAX_ACTIVE_WRITERS_PER_CORE: u16 = 64;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MAX_WARM_WRITERS_PER_CORE: u16 = 64;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MAX_ACTIVE_WRITERS_GLOBAL: u32 = 256;
|
||||
const DEFAULT_ME_ADAPTIVE_FLOOR_MAX_WARM_WRITERS_GLOBAL: u32 = 256;
|
||||
const DEFAULT_ME_WRITER_CMD_CHANNEL_CAPACITY: usize = 4096;
|
||||
const DEFAULT_ME_ROUTE_CHANNEL_CAPACITY: usize = 768;
|
||||
const DEFAULT_ME_C2ME_CHANNEL_CAPACITY: usize = 1024;
|
||||
const DEFAULT_ME_READER_ROUTE_DATA_WAIT_MS: u64 = 2;
|
||||
const DEFAULT_ME_D2C_FLUSH_BATCH_MAX_FRAMES: usize = 32;
|
||||
const DEFAULT_ME_D2C_FLUSH_BATCH_MAX_BYTES: usize = 128 * 1024;
|
||||
const DEFAULT_ME_D2C_FLUSH_BATCH_MAX_DELAY_US: u64 = 1500;
|
||||
const DEFAULT_ME_D2C_ACK_FLUSH_IMMEDIATE: bool = false;
|
||||
const DEFAULT_DIRECT_RELAY_COPY_BUF_C2S_BYTES: usize = 64 * 1024;
|
||||
const DEFAULT_DIRECT_RELAY_COPY_BUF_S2C_BYTES: usize = 256 * 1024;
|
||||
const DEFAULT_ME_WRITER_PICK_SAMPLE_SIZE: u8 = 3;
|
||||
const DEFAULT_ME_HEALTH_INTERVAL_MS_UNHEALTHY: u64 = 1000;
|
||||
const DEFAULT_ME_HEALTH_INTERVAL_MS_HEALTHY: u64 = 3000;
|
||||
const DEFAULT_ME_ADMISSION_POLL_MS: u64 = 1000;
|
||||
const DEFAULT_ME_WARN_RATE_LIMIT_MS: u64 = 5000;
|
||||
const DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS: u64 = 30;
|
||||
const DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS: u32 = 2;
|
||||
const DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD: u32 = 5;
|
||||
const DEFAULT_UPSTREAM_CONNECT_BUDGET_MS: u64 = 3000;
|
||||
const DEFAULT_LISTEN_ADDR_IPV6: &str = "::";
|
||||
const DEFAULT_ACCESS_USER: &str = "default";
|
||||
const DEFAULT_ACCESS_SECRET: &str = "00000000000000000000000000000000";
|
||||
@@ -92,6 +118,35 @@ pub(crate) fn default_metrics_whitelist() -> Vec<IpNetwork> {
|
||||
]
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_listen() -> String {
|
||||
"0.0.0.0:9091".to_string()
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_whitelist() -> Vec<IpNetwork> {
|
||||
vec!["127.0.0.0/8".parse().unwrap()]
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_request_body_limit_bytes() -> usize {
|
||||
64 * 1024
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_minimal_runtime_enabled() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_minimal_runtime_cache_ttl_ms() -> u64 {
|
||||
1000
|
||||
}
|
||||
|
||||
pub(crate) fn default_api_runtime_edge_enabled() -> bool { false }
|
||||
pub(crate) fn default_api_runtime_edge_cache_ttl_ms() -> u64 { 1000 }
|
||||
pub(crate) fn default_api_runtime_edge_top_n() -> usize { 10 }
|
||||
pub(crate) fn default_api_runtime_edge_events_capacity() -> usize { 256 }
|
||||
|
||||
pub(crate) fn default_proxy_protocol_header_timeout_ms() -> u64 {
|
||||
500
|
||||
}
|
||||
|
||||
pub(crate) fn default_prefer_4() -> u8 {
|
||||
4
|
||||
}
|
||||
@@ -108,6 +163,10 @@ pub(crate) fn default_unknown_dc_log_path() -> Option<String> {
|
||||
Some("unknown-dc.txt".to_string())
|
||||
}
|
||||
|
||||
pub(crate) fn default_unknown_dc_file_log_enabled() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn default_pool_size() -> usize {
|
||||
8
|
||||
}
|
||||
@@ -116,6 +175,14 @@ pub(crate) fn default_proxy_secret_path() -> Option<String> {
|
||||
Some("proxy-secret".to_string())
|
||||
}
|
||||
|
||||
pub(crate) fn default_proxy_config_v4_cache_path() -> Option<String> {
|
||||
Some("cache/proxy-config-v4.txt".to_string())
|
||||
}
|
||||
|
||||
pub(crate) fn default_proxy_config_v6_cache_path() -> Option<String> {
|
||||
Some("cache/proxy-config-v6.txt".to_string())
|
||||
}
|
||||
|
||||
pub(crate) fn default_middle_proxy_nat_stun() -> Option<String> {
|
||||
None
|
||||
}
|
||||
@@ -132,6 +199,14 @@ pub(crate) fn default_middle_proxy_warm_standby() -> usize {
|
||||
DEFAULT_MIDDLE_PROXY_WARM_STANDBY
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_init_retry_attempts() -> u32 {
|
||||
0
|
||||
}
|
||||
|
||||
pub(crate) fn default_me2dc_fallback() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn default_keepalive_interval() -> u64 {
|
||||
8
|
||||
}
|
||||
@@ -196,10 +271,106 @@ pub(crate) fn default_me_adaptive_floor_min_writers_single_endpoint() -> u8 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MIN_WRITERS_SINGLE_ENDPOINT
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_min_writers_multi_endpoint() -> u8 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MIN_WRITERS_MULTI_ENDPOINT
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_recover_grace_secs() -> u64 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_RECOVER_GRACE_SECS
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_writers_per_core_total() -> u16 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_WRITERS_PER_CORE_TOTAL
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_cpu_cores_override() -> u16 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_CPU_CORES_OVERRIDE
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_max_extra_writers_single_per_core() -> u16 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MAX_EXTRA_WRITERS_SINGLE_PER_CORE
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_max_extra_writers_multi_per_core() -> u16 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MAX_EXTRA_WRITERS_MULTI_PER_CORE
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_max_active_writers_per_core() -> u16 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MAX_ACTIVE_WRITERS_PER_CORE
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_max_warm_writers_per_core() -> u16 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MAX_WARM_WRITERS_PER_CORE
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_max_active_writers_global() -> u32 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MAX_ACTIVE_WRITERS_GLOBAL
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_adaptive_floor_max_warm_writers_global() -> u32 {
|
||||
DEFAULT_ME_ADAPTIVE_FLOOR_MAX_WARM_WRITERS_GLOBAL
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_writer_cmd_channel_capacity() -> usize {
|
||||
DEFAULT_ME_WRITER_CMD_CHANNEL_CAPACITY
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_route_channel_capacity() -> usize {
|
||||
DEFAULT_ME_ROUTE_CHANNEL_CAPACITY
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_c2me_channel_capacity() -> usize {
|
||||
DEFAULT_ME_C2ME_CHANNEL_CAPACITY
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_reader_route_data_wait_ms() -> u64 {
|
||||
DEFAULT_ME_READER_ROUTE_DATA_WAIT_MS
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_d2c_flush_batch_max_frames() -> usize {
|
||||
DEFAULT_ME_D2C_FLUSH_BATCH_MAX_FRAMES
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_d2c_flush_batch_max_bytes() -> usize {
|
||||
DEFAULT_ME_D2C_FLUSH_BATCH_MAX_BYTES
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_d2c_flush_batch_max_delay_us() -> u64 {
|
||||
DEFAULT_ME_D2C_FLUSH_BATCH_MAX_DELAY_US
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_d2c_ack_flush_immediate() -> bool {
|
||||
DEFAULT_ME_D2C_ACK_FLUSH_IMMEDIATE
|
||||
}
|
||||
|
||||
pub(crate) fn default_direct_relay_copy_buf_c2s_bytes() -> usize {
|
||||
DEFAULT_DIRECT_RELAY_COPY_BUF_C2S_BYTES
|
||||
}
|
||||
|
||||
pub(crate) fn default_direct_relay_copy_buf_s2c_bytes() -> usize {
|
||||
DEFAULT_DIRECT_RELAY_COPY_BUF_S2C_BYTES
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_writer_pick_sample_size() -> u8 {
|
||||
DEFAULT_ME_WRITER_PICK_SAMPLE_SIZE
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_health_interval_ms_unhealthy() -> u64 {
|
||||
DEFAULT_ME_HEALTH_INTERVAL_MS_UNHEALTHY
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_health_interval_ms_healthy() -> u64 {
|
||||
DEFAULT_ME_HEALTH_INTERVAL_MS_HEALTHY
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_admission_poll_ms() -> u64 {
|
||||
DEFAULT_ME_ADMISSION_POLL_MS
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_warn_rate_limit_ms() -> u64 {
|
||||
DEFAULT_ME_WARN_RATE_LIMIT_MS
|
||||
}
|
||||
|
||||
pub(crate) fn default_upstream_connect_retry_attempts() -> u32 {
|
||||
DEFAULT_UPSTREAM_CONNECT_RETRY_ATTEMPTS
|
||||
}
|
||||
@@ -212,6 +383,10 @@ pub(crate) fn default_upstream_unhealthy_fail_threshold() -> u32 {
|
||||
DEFAULT_UPSTREAM_UNHEALTHY_FAIL_THRESHOLD
|
||||
}
|
||||
|
||||
pub(crate) fn default_upstream_connect_budget_ms() -> u64 {
|
||||
DEFAULT_UPSTREAM_CONNECT_BUDGET_MS
|
||||
}
|
||||
|
||||
pub(crate) fn default_upstream_connect_failfast_hard_errors() -> bool {
|
||||
false
|
||||
}
|
||||
@@ -244,6 +419,18 @@ pub(crate) fn default_me_route_backpressure_high_watermark_pct() -> u8 {
|
||||
80
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_route_no_writer_wait_ms() -> u64 {
|
||||
250
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_route_inline_recovery_attempts() -> u32 {
|
||||
3
|
||||
}
|
||||
|
||||
pub(crate) fn default_me_route_inline_recovery_wait_ms() -> u64 {
|
||||
3000
|
||||
}
|
||||
|
||||
pub(crate) fn default_beobachten_minutes() -> u64 {
|
||||
10
|
||||
}
|
||||
@@ -444,6 +631,10 @@ pub(crate) fn default_access_users() -> HashMap<String, String> {
|
||||
)])
|
||||
}
|
||||
|
||||
pub(crate) fn default_user_max_unique_ips_window_secs() -> u64 {
|
||||
DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS
|
||||
}
|
||||
|
||||
// Custom deserializer helpers
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
||||
@@ -9,20 +9,17 @@
|
||||
//! | `general` | `log_level` | Filter updated via `log_level_tx` |
|
||||
//! | `access` | `user_ad_tags` | Passed on next connection |
|
||||
//! | `general` | `ad_tag` | Passed on next connection (fallback per-user) |
|
||||
//! | `general` | `middle_proxy_pool_size` | Passed on next connection |
|
||||
//! | `general` | `me_keepalive_*` | Passed on next connection |
|
||||
//! | `general` | `desync_all_full` | Applied immediately |
|
||||
//! | `general` | `update_every` | Applied to ME updater immediately |
|
||||
//! | `general` | `hardswap` | Applied on next ME map update |
|
||||
//! | `general` | `me_pool_drain_ttl_secs` | Applied on next ME map update |
|
||||
//! | `general` | `me_pool_min_fresh_ratio` | Applied on next ME map update |
|
||||
//! | `general` | `me_reinit_drain_timeout_secs` | Applied on next ME map update |
|
||||
//! | `general` | `me_reinit_*` | Applied to ME reinit scheduler immediately |
|
||||
//! | `general` | `hardswap` / `me_*_reinit` | Applied on next ME map update |
|
||||
//! | `general` | `telemetry` / `me_*_policy` | Applied immediately |
|
||||
//! | `network` | `dns_overrides` | Applied immediately |
|
||||
//! | `access` | All user/quota fields | Effective immediately |
|
||||
//!
|
||||
//! Fields that require re-binding sockets (`server.port`, `censorship.*`,
|
||||
//! `network.*`, `use_middle_proxy`) are **not** applied; a warning is emitted.
|
||||
//! Non-hot changes are never mixed into the runtime config snapshot.
|
||||
|
||||
use std::net::IpAddr;
|
||||
use std::path::PathBuf;
|
||||
@@ -32,7 +29,10 @@ use notify::{EventKind, RecursiveMode, Watcher, recommended_watcher};
|
||||
use tokio::sync::{mpsc, watch};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use crate::config::{LogLevel, MeFloorMode, MeSocksKdfPolicy, MeTelemetryLevel};
|
||||
use crate::config::{
|
||||
LogLevel, MeBindStaleMode, MeFloorMode, MeSocksKdfPolicy, MeTelemetryLevel,
|
||||
MeWriterPickMode,
|
||||
};
|
||||
use super::load::ProxyConfig;
|
||||
|
||||
// ── Hot fields ────────────────────────────────────────────────────────────────
|
||||
@@ -43,17 +43,39 @@ pub struct HotFields {
|
||||
pub log_level: LogLevel,
|
||||
pub ad_tag: Option<String>,
|
||||
pub dns_overrides: Vec<String>,
|
||||
pub middle_proxy_pool_size: usize,
|
||||
pub desync_all_full: bool,
|
||||
pub update_every_secs: u64,
|
||||
pub me_reinit_every_secs: u64,
|
||||
pub me_reinit_singleflight: bool,
|
||||
pub me_reinit_coalesce_window_ms: u64,
|
||||
pub hardswap: bool,
|
||||
pub me_pool_drain_ttl_secs: u64,
|
||||
pub me_pool_min_fresh_ratio: f32,
|
||||
pub me_reinit_drain_timeout_secs: u64,
|
||||
pub me_keepalive_enabled: bool,
|
||||
pub me_keepalive_interval_secs: u64,
|
||||
pub me_keepalive_jitter_secs: u64,
|
||||
pub me_keepalive_payload_random: bool,
|
||||
pub me_hardswap_warmup_delay_min_ms: u64,
|
||||
pub me_hardswap_warmup_delay_max_ms: u64,
|
||||
pub me_hardswap_warmup_extra_passes: u8,
|
||||
pub me_hardswap_warmup_pass_backoff_base_ms: u64,
|
||||
pub me_bind_stale_mode: MeBindStaleMode,
|
||||
pub me_bind_stale_ttl_secs: u64,
|
||||
pub me_secret_atomic_snapshot: bool,
|
||||
pub me_deterministic_writer_sort: bool,
|
||||
pub me_writer_pick_mode: MeWriterPickMode,
|
||||
pub me_writer_pick_sample_size: u8,
|
||||
pub me_single_endpoint_shadow_writers: u8,
|
||||
pub me_single_endpoint_outage_mode_enabled: bool,
|
||||
pub me_single_endpoint_outage_disable_quarantine: bool,
|
||||
pub me_single_endpoint_outage_backoff_min_ms: u64,
|
||||
pub me_single_endpoint_outage_backoff_max_ms: u64,
|
||||
pub me_single_endpoint_shadow_rotate_every_secs: u64,
|
||||
pub me_config_stable_snapshots: u8,
|
||||
pub me_config_apply_cooldown_secs: u64,
|
||||
pub me_snapshot_require_http_2xx: bool,
|
||||
pub me_snapshot_reject_empty_map: bool,
|
||||
pub me_snapshot_min_proxy_for_lines: u32,
|
||||
pub proxy_secret_stable_snapshots: u8,
|
||||
pub proxy_secret_rotate_runtime: bool,
|
||||
pub proxy_secret_len_max: usize,
|
||||
pub telemetry_core_enabled: bool,
|
||||
pub telemetry_user_enabled: bool,
|
||||
pub telemetry_me_level: MeTelemetryLevel,
|
||||
@@ -61,11 +83,38 @@ pub struct HotFields {
|
||||
pub me_floor_mode: MeFloorMode,
|
||||
pub me_adaptive_floor_idle_secs: u64,
|
||||
pub me_adaptive_floor_min_writers_single_endpoint: u8,
|
||||
pub me_adaptive_floor_min_writers_multi_endpoint: u8,
|
||||
pub me_adaptive_floor_recover_grace_secs: u64,
|
||||
pub me_adaptive_floor_writers_per_core_total: u16,
|
||||
pub me_adaptive_floor_cpu_cores_override: u16,
|
||||
pub me_adaptive_floor_max_extra_writers_single_per_core: u16,
|
||||
pub me_adaptive_floor_max_extra_writers_multi_per_core: u16,
|
||||
pub me_adaptive_floor_max_active_writers_per_core: u16,
|
||||
pub me_adaptive_floor_max_warm_writers_per_core: u16,
|
||||
pub me_adaptive_floor_max_active_writers_global: u32,
|
||||
pub me_adaptive_floor_max_warm_writers_global: u32,
|
||||
pub me_route_backpressure_base_timeout_ms: u64,
|
||||
pub me_route_backpressure_high_timeout_ms: u64,
|
||||
pub me_route_backpressure_high_watermark_pct: u8,
|
||||
pub access: crate::config::AccessConfig,
|
||||
pub me_reader_route_data_wait_ms: u64,
|
||||
pub me_d2c_flush_batch_max_frames: usize,
|
||||
pub me_d2c_flush_batch_max_bytes: usize,
|
||||
pub me_d2c_flush_batch_max_delay_us: u64,
|
||||
pub me_d2c_ack_flush_immediate: bool,
|
||||
pub direct_relay_copy_buf_c2s_bytes: usize,
|
||||
pub direct_relay_copy_buf_s2c_bytes: usize,
|
||||
pub me_health_interval_ms_unhealthy: u64,
|
||||
pub me_health_interval_ms_healthy: u64,
|
||||
pub me_admission_poll_ms: u64,
|
||||
pub me_warn_rate_limit_ms: u64,
|
||||
pub users: std::collections::HashMap<String, String>,
|
||||
pub user_ad_tags: std::collections::HashMap<String, String>,
|
||||
pub user_max_tcp_conns: std::collections::HashMap<String, usize>,
|
||||
pub user_expirations: std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
|
||||
pub user_data_quota: std::collections::HashMap<String, u64>,
|
||||
pub user_max_unique_ips: std::collections::HashMap<String, usize>,
|
||||
pub user_max_unique_ips_mode: crate::config::UserMaxUniqueIpsMode,
|
||||
pub user_max_unique_ips_window_secs: u64,
|
||||
}
|
||||
|
||||
impl HotFields {
|
||||
@@ -74,17 +123,51 @@ impl HotFields {
|
||||
log_level: cfg.general.log_level.clone(),
|
||||
ad_tag: cfg.general.ad_tag.clone(),
|
||||
dns_overrides: cfg.network.dns_overrides.clone(),
|
||||
middle_proxy_pool_size: cfg.general.middle_proxy_pool_size,
|
||||
desync_all_full: cfg.general.desync_all_full,
|
||||
update_every_secs: cfg.general.effective_update_every_secs(),
|
||||
me_reinit_every_secs: cfg.general.me_reinit_every_secs,
|
||||
me_reinit_singleflight: cfg.general.me_reinit_singleflight,
|
||||
me_reinit_coalesce_window_ms: cfg.general.me_reinit_coalesce_window_ms,
|
||||
hardswap: cfg.general.hardswap,
|
||||
me_pool_drain_ttl_secs: cfg.general.me_pool_drain_ttl_secs,
|
||||
me_pool_min_fresh_ratio: cfg.general.me_pool_min_fresh_ratio,
|
||||
me_reinit_drain_timeout_secs: cfg.general.me_reinit_drain_timeout_secs,
|
||||
me_keepalive_enabled: cfg.general.me_keepalive_enabled,
|
||||
me_keepalive_interval_secs: cfg.general.me_keepalive_interval_secs,
|
||||
me_keepalive_jitter_secs: cfg.general.me_keepalive_jitter_secs,
|
||||
me_keepalive_payload_random: cfg.general.me_keepalive_payload_random,
|
||||
me_hardswap_warmup_delay_min_ms: cfg.general.me_hardswap_warmup_delay_min_ms,
|
||||
me_hardswap_warmup_delay_max_ms: cfg.general.me_hardswap_warmup_delay_max_ms,
|
||||
me_hardswap_warmup_extra_passes: cfg.general.me_hardswap_warmup_extra_passes,
|
||||
me_hardswap_warmup_pass_backoff_base_ms: cfg
|
||||
.general
|
||||
.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
me_bind_stale_mode: cfg.general.me_bind_stale_mode,
|
||||
me_bind_stale_ttl_secs: cfg.general.me_bind_stale_ttl_secs,
|
||||
me_secret_atomic_snapshot: cfg.general.me_secret_atomic_snapshot,
|
||||
me_deterministic_writer_sort: cfg.general.me_deterministic_writer_sort,
|
||||
me_writer_pick_mode: cfg.general.me_writer_pick_mode,
|
||||
me_writer_pick_sample_size: cfg.general.me_writer_pick_sample_size,
|
||||
me_single_endpoint_shadow_writers: cfg.general.me_single_endpoint_shadow_writers,
|
||||
me_single_endpoint_outage_mode_enabled: cfg
|
||||
.general
|
||||
.me_single_endpoint_outage_mode_enabled,
|
||||
me_single_endpoint_outage_disable_quarantine: cfg
|
||||
.general
|
||||
.me_single_endpoint_outage_disable_quarantine,
|
||||
me_single_endpoint_outage_backoff_min_ms: cfg
|
||||
.general
|
||||
.me_single_endpoint_outage_backoff_min_ms,
|
||||
me_single_endpoint_outage_backoff_max_ms: cfg
|
||||
.general
|
||||
.me_single_endpoint_outage_backoff_max_ms,
|
||||
me_single_endpoint_shadow_rotate_every_secs: cfg
|
||||
.general
|
||||
.me_single_endpoint_shadow_rotate_every_secs,
|
||||
me_config_stable_snapshots: cfg.general.me_config_stable_snapshots,
|
||||
me_config_apply_cooldown_secs: cfg.general.me_config_apply_cooldown_secs,
|
||||
me_snapshot_require_http_2xx: cfg.general.me_snapshot_require_http_2xx,
|
||||
me_snapshot_reject_empty_map: cfg.general.me_snapshot_reject_empty_map,
|
||||
me_snapshot_min_proxy_for_lines: cfg.general.me_snapshot_min_proxy_for_lines,
|
||||
proxy_secret_stable_snapshots: cfg.general.proxy_secret_stable_snapshots,
|
||||
proxy_secret_rotate_runtime: cfg.general.proxy_secret_rotate_runtime,
|
||||
proxy_secret_len_max: cfg.general.proxy_secret_len_max,
|
||||
telemetry_core_enabled: cfg.general.telemetry.core_enabled,
|
||||
telemetry_user_enabled: cfg.general.telemetry.user_enabled,
|
||||
telemetry_me_level: cfg.general.telemetry.me_level,
|
||||
@@ -94,42 +177,350 @@ impl HotFields {
|
||||
me_adaptive_floor_min_writers_single_endpoint: cfg
|
||||
.general
|
||||
.me_adaptive_floor_min_writers_single_endpoint,
|
||||
me_adaptive_floor_min_writers_multi_endpoint: cfg
|
||||
.general
|
||||
.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
me_adaptive_floor_recover_grace_secs: cfg
|
||||
.general
|
||||
.me_adaptive_floor_recover_grace_secs,
|
||||
me_adaptive_floor_writers_per_core_total: cfg
|
||||
.general
|
||||
.me_adaptive_floor_writers_per_core_total,
|
||||
me_adaptive_floor_cpu_cores_override: cfg
|
||||
.general
|
||||
.me_adaptive_floor_cpu_cores_override,
|
||||
me_adaptive_floor_max_extra_writers_single_per_core: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
me_adaptive_floor_max_extra_writers_multi_per_core: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
me_adaptive_floor_max_active_writers_per_core: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_active_writers_per_core,
|
||||
me_adaptive_floor_max_warm_writers_per_core: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_warm_writers_per_core,
|
||||
me_adaptive_floor_max_active_writers_global: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_active_writers_global,
|
||||
me_adaptive_floor_max_warm_writers_global: cfg
|
||||
.general
|
||||
.me_adaptive_floor_max_warm_writers_global,
|
||||
me_route_backpressure_base_timeout_ms: cfg.general.me_route_backpressure_base_timeout_ms,
|
||||
me_route_backpressure_high_timeout_ms: cfg.general.me_route_backpressure_high_timeout_ms,
|
||||
me_route_backpressure_high_watermark_pct: cfg.general.me_route_backpressure_high_watermark_pct,
|
||||
access: cfg.access.clone(),
|
||||
me_reader_route_data_wait_ms: cfg.general.me_reader_route_data_wait_ms,
|
||||
me_d2c_flush_batch_max_frames: cfg.general.me_d2c_flush_batch_max_frames,
|
||||
me_d2c_flush_batch_max_bytes: cfg.general.me_d2c_flush_batch_max_bytes,
|
||||
me_d2c_flush_batch_max_delay_us: cfg.general.me_d2c_flush_batch_max_delay_us,
|
||||
me_d2c_ack_flush_immediate: cfg.general.me_d2c_ack_flush_immediate,
|
||||
direct_relay_copy_buf_c2s_bytes: cfg.general.direct_relay_copy_buf_c2s_bytes,
|
||||
direct_relay_copy_buf_s2c_bytes: cfg.general.direct_relay_copy_buf_s2c_bytes,
|
||||
me_health_interval_ms_unhealthy: cfg.general.me_health_interval_ms_unhealthy,
|
||||
me_health_interval_ms_healthy: cfg.general.me_health_interval_ms_healthy,
|
||||
me_admission_poll_ms: cfg.general.me_admission_poll_ms,
|
||||
me_warn_rate_limit_ms: cfg.general.me_warn_rate_limit_ms,
|
||||
users: cfg.access.users.clone(),
|
||||
user_ad_tags: cfg.access.user_ad_tags.clone(),
|
||||
user_max_tcp_conns: cfg.access.user_max_tcp_conns.clone(),
|
||||
user_expirations: cfg.access.user_expirations.clone(),
|
||||
user_data_quota: cfg.access.user_data_quota.clone(),
|
||||
user_max_unique_ips: cfg.access.user_max_unique_ips.clone(),
|
||||
user_max_unique_ips_mode: cfg.access.user_max_unique_ips_mode,
|
||||
user_max_unique_ips_window_secs: cfg.access.user_max_unique_ips_window_secs,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
fn canonicalize_json(value: &mut serde_json::Value) {
|
||||
match value {
|
||||
serde_json::Value::Object(map) => {
|
||||
let mut pairs: Vec<(String, serde_json::Value)> =
|
||||
std::mem::take(map).into_iter().collect();
|
||||
pairs.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
for (_, item) in pairs.iter_mut() {
|
||||
canonicalize_json(item);
|
||||
}
|
||||
for (key, item) in pairs {
|
||||
map.insert(key, item);
|
||||
}
|
||||
}
|
||||
serde_json::Value::Array(items) => {
|
||||
for item in items {
|
||||
canonicalize_json(item);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn config_equal(lhs: &ProxyConfig, rhs: &ProxyConfig) -> bool {
|
||||
let mut left = match serde_json::to_value(lhs) {
|
||||
Ok(value) => value,
|
||||
Err(_) => return false,
|
||||
};
|
||||
let mut right = match serde_json::to_value(rhs) {
|
||||
Ok(value) => value,
|
||||
Err(_) => return false,
|
||||
};
|
||||
canonicalize_json(&mut left);
|
||||
canonicalize_json(&mut right);
|
||||
left == right
|
||||
}
|
||||
|
||||
fn listeners_equal(
|
||||
lhs: &[crate::config::ListenerConfig],
|
||||
rhs: &[crate::config::ListenerConfig],
|
||||
) -> bool {
|
||||
if lhs.len() != rhs.len() {
|
||||
return false;
|
||||
}
|
||||
lhs.iter().zip(rhs.iter()).all(|(a, b)| {
|
||||
a.ip == b.ip
|
||||
&& a.announce == b.announce
|
||||
&& a.announce_ip == b.announce_ip
|
||||
&& a.proxy_protocol == b.proxy_protocol
|
||||
&& a.reuse_allow == b.reuse_allow
|
||||
})
|
||||
}
|
||||
|
||||
fn overlay_hot_fields(old: &ProxyConfig, new: &ProxyConfig) -> ProxyConfig {
|
||||
let mut cfg = old.clone();
|
||||
|
||||
cfg.general.log_level = new.general.log_level.clone();
|
||||
cfg.general.ad_tag = new.general.ad_tag.clone();
|
||||
cfg.network.dns_overrides = new.network.dns_overrides.clone();
|
||||
cfg.general.desync_all_full = new.general.desync_all_full;
|
||||
cfg.general.update_every = new.general.update_every;
|
||||
cfg.general.proxy_secret_auto_reload_secs = new.general.proxy_secret_auto_reload_secs;
|
||||
cfg.general.proxy_config_auto_reload_secs = new.general.proxy_config_auto_reload_secs;
|
||||
cfg.general.me_reinit_every_secs = new.general.me_reinit_every_secs;
|
||||
cfg.general.me_reinit_singleflight = new.general.me_reinit_singleflight;
|
||||
cfg.general.me_reinit_coalesce_window_ms = new.general.me_reinit_coalesce_window_ms;
|
||||
cfg.general.hardswap = new.general.hardswap;
|
||||
cfg.general.me_pool_drain_ttl_secs = new.general.me_pool_drain_ttl_secs;
|
||||
cfg.general.me_pool_min_fresh_ratio = new.general.me_pool_min_fresh_ratio;
|
||||
cfg.general.me_reinit_drain_timeout_secs = new.general.me_reinit_drain_timeout_secs;
|
||||
cfg.general.me_hardswap_warmup_delay_min_ms = new.general.me_hardswap_warmup_delay_min_ms;
|
||||
cfg.general.me_hardswap_warmup_delay_max_ms = new.general.me_hardswap_warmup_delay_max_ms;
|
||||
cfg.general.me_hardswap_warmup_extra_passes = new.general.me_hardswap_warmup_extra_passes;
|
||||
cfg.general.me_hardswap_warmup_pass_backoff_base_ms =
|
||||
new.general.me_hardswap_warmup_pass_backoff_base_ms;
|
||||
cfg.general.me_bind_stale_mode = new.general.me_bind_stale_mode;
|
||||
cfg.general.me_bind_stale_ttl_secs = new.general.me_bind_stale_ttl_secs;
|
||||
cfg.general.me_secret_atomic_snapshot = new.general.me_secret_atomic_snapshot;
|
||||
cfg.general.me_deterministic_writer_sort = new.general.me_deterministic_writer_sort;
|
||||
cfg.general.me_writer_pick_mode = new.general.me_writer_pick_mode;
|
||||
cfg.general.me_writer_pick_sample_size = new.general.me_writer_pick_sample_size;
|
||||
cfg.general.me_single_endpoint_shadow_writers = new.general.me_single_endpoint_shadow_writers;
|
||||
cfg.general.me_single_endpoint_outage_mode_enabled =
|
||||
new.general.me_single_endpoint_outage_mode_enabled;
|
||||
cfg.general.me_single_endpoint_outage_disable_quarantine =
|
||||
new.general.me_single_endpoint_outage_disable_quarantine;
|
||||
cfg.general.me_single_endpoint_outage_backoff_min_ms =
|
||||
new.general.me_single_endpoint_outage_backoff_min_ms;
|
||||
cfg.general.me_single_endpoint_outage_backoff_max_ms =
|
||||
new.general.me_single_endpoint_outage_backoff_max_ms;
|
||||
cfg.general.me_single_endpoint_shadow_rotate_every_secs =
|
||||
new.general.me_single_endpoint_shadow_rotate_every_secs;
|
||||
cfg.general.me_config_stable_snapshots = new.general.me_config_stable_snapshots;
|
||||
cfg.general.me_config_apply_cooldown_secs = new.general.me_config_apply_cooldown_secs;
|
||||
cfg.general.me_snapshot_require_http_2xx = new.general.me_snapshot_require_http_2xx;
|
||||
cfg.general.me_snapshot_reject_empty_map = new.general.me_snapshot_reject_empty_map;
|
||||
cfg.general.me_snapshot_min_proxy_for_lines = new.general.me_snapshot_min_proxy_for_lines;
|
||||
cfg.general.proxy_secret_stable_snapshots = new.general.proxy_secret_stable_snapshots;
|
||||
cfg.general.proxy_secret_rotate_runtime = new.general.proxy_secret_rotate_runtime;
|
||||
cfg.general.proxy_secret_len_max = new.general.proxy_secret_len_max;
|
||||
cfg.general.telemetry = new.general.telemetry.clone();
|
||||
cfg.general.me_socks_kdf_policy = new.general.me_socks_kdf_policy;
|
||||
cfg.general.me_floor_mode = new.general.me_floor_mode;
|
||||
cfg.general.me_adaptive_floor_idle_secs = new.general.me_adaptive_floor_idle_secs;
|
||||
cfg.general.me_adaptive_floor_min_writers_single_endpoint =
|
||||
new.general.me_adaptive_floor_min_writers_single_endpoint;
|
||||
cfg.general.me_adaptive_floor_min_writers_multi_endpoint =
|
||||
new.general.me_adaptive_floor_min_writers_multi_endpoint;
|
||||
cfg.general.me_adaptive_floor_recover_grace_secs =
|
||||
new.general.me_adaptive_floor_recover_grace_secs;
|
||||
cfg.general.me_adaptive_floor_writers_per_core_total =
|
||||
new.general.me_adaptive_floor_writers_per_core_total;
|
||||
cfg.general.me_adaptive_floor_cpu_cores_override =
|
||||
new.general.me_adaptive_floor_cpu_cores_override;
|
||||
cfg.general.me_adaptive_floor_max_extra_writers_single_per_core =
|
||||
new.general.me_adaptive_floor_max_extra_writers_single_per_core;
|
||||
cfg.general.me_adaptive_floor_max_extra_writers_multi_per_core =
|
||||
new.general.me_adaptive_floor_max_extra_writers_multi_per_core;
|
||||
cfg.general.me_adaptive_floor_max_active_writers_per_core =
|
||||
new.general.me_adaptive_floor_max_active_writers_per_core;
|
||||
cfg.general.me_adaptive_floor_max_warm_writers_per_core =
|
||||
new.general.me_adaptive_floor_max_warm_writers_per_core;
|
||||
cfg.general.me_adaptive_floor_max_active_writers_global =
|
||||
new.general.me_adaptive_floor_max_active_writers_global;
|
||||
cfg.general.me_adaptive_floor_max_warm_writers_global =
|
||||
new.general.me_adaptive_floor_max_warm_writers_global;
|
||||
cfg.general.me_route_backpressure_base_timeout_ms =
|
||||
new.general.me_route_backpressure_base_timeout_ms;
|
||||
cfg.general.me_route_backpressure_high_timeout_ms =
|
||||
new.general.me_route_backpressure_high_timeout_ms;
|
||||
cfg.general.me_route_backpressure_high_watermark_pct =
|
||||
new.general.me_route_backpressure_high_watermark_pct;
|
||||
cfg.general.me_reader_route_data_wait_ms = new.general.me_reader_route_data_wait_ms;
|
||||
cfg.general.me_d2c_flush_batch_max_frames = new.general.me_d2c_flush_batch_max_frames;
|
||||
cfg.general.me_d2c_flush_batch_max_bytes = new.general.me_d2c_flush_batch_max_bytes;
|
||||
cfg.general.me_d2c_flush_batch_max_delay_us = new.general.me_d2c_flush_batch_max_delay_us;
|
||||
cfg.general.me_d2c_ack_flush_immediate = new.general.me_d2c_ack_flush_immediate;
|
||||
cfg.general.direct_relay_copy_buf_c2s_bytes = new.general.direct_relay_copy_buf_c2s_bytes;
|
||||
cfg.general.direct_relay_copy_buf_s2c_bytes = new.general.direct_relay_copy_buf_s2c_bytes;
|
||||
cfg.general.me_health_interval_ms_unhealthy = new.general.me_health_interval_ms_unhealthy;
|
||||
cfg.general.me_health_interval_ms_healthy = new.general.me_health_interval_ms_healthy;
|
||||
cfg.general.me_admission_poll_ms = new.general.me_admission_poll_ms;
|
||||
cfg.general.me_warn_rate_limit_ms = new.general.me_warn_rate_limit_ms;
|
||||
|
||||
cfg.access.users = new.access.users.clone();
|
||||
cfg.access.user_ad_tags = new.access.user_ad_tags.clone();
|
||||
cfg.access.user_max_tcp_conns = new.access.user_max_tcp_conns.clone();
|
||||
cfg.access.user_expirations = new.access.user_expirations.clone();
|
||||
cfg.access.user_data_quota = new.access.user_data_quota.clone();
|
||||
cfg.access.user_max_unique_ips = new.access.user_max_unique_ips.clone();
|
||||
cfg.access.user_max_unique_ips_mode = new.access.user_max_unique_ips_mode;
|
||||
cfg.access.user_max_unique_ips_window_secs = new.access.user_max_unique_ips_window_secs;
|
||||
|
||||
cfg
|
||||
}
|
||||
|
||||
/// Warn if any non-hot fields changed (require restart).
|
||||
fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig) {
|
||||
fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: bool) {
|
||||
let mut warned = false;
|
||||
if old.server.port != new.server.port {
|
||||
warned = true;
|
||||
warn!(
|
||||
"config reload: server.port changed ({} → {}); restart required",
|
||||
old.server.port, new.server.port
|
||||
);
|
||||
}
|
||||
if old.server.api.enabled != new.server.api.enabled
|
||||
|| old.server.api.listen != new.server.api.listen
|
||||
|| old.server.api.whitelist != new.server.api.whitelist
|
||||
|| old.server.api.auth_header != new.server.api.auth_header
|
||||
|| old.server.api.request_body_limit_bytes != new.server.api.request_body_limit_bytes
|
||||
|| old.server.api.minimal_runtime_enabled != new.server.api.minimal_runtime_enabled
|
||||
|| old.server.api.minimal_runtime_cache_ttl_ms
|
||||
!= new.server.api.minimal_runtime_cache_ttl_ms
|
||||
|| old.server.api.runtime_edge_enabled != new.server.api.runtime_edge_enabled
|
||||
|| old.server.api.runtime_edge_cache_ttl_ms
|
||||
!= new.server.api.runtime_edge_cache_ttl_ms
|
||||
|| old.server.api.runtime_edge_top_n != new.server.api.runtime_edge_top_n
|
||||
|| old.server.api.runtime_edge_events_capacity
|
||||
!= new.server.api.runtime_edge_events_capacity
|
||||
|| old.server.api.read_only != new.server.api.read_only
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: server.api changed; restart required");
|
||||
}
|
||||
if old.server.proxy_protocol != new.server.proxy_protocol
|
||||
|| !listeners_equal(&old.server.listeners, &new.server.listeners)
|
||||
|| old.server.listen_addr_ipv4 != new.server.listen_addr_ipv4
|
||||
|| old.server.listen_addr_ipv6 != new.server.listen_addr_ipv6
|
||||
|| old.server.listen_tcp != new.server.listen_tcp
|
||||
|| old.server.listen_unix_sock != new.server.listen_unix_sock
|
||||
|| old.server.listen_unix_sock_perm != new.server.listen_unix_sock_perm
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: server listener settings changed; restart required");
|
||||
}
|
||||
if old.censorship.tls_domain != new.censorship.tls_domain
|
||||
|| old.censorship.tls_domains != new.censorship.tls_domains
|
||||
|| old.censorship.mask != new.censorship.mask
|
||||
|| old.censorship.mask_host != new.censorship.mask_host
|
||||
|| old.censorship.mask_port != new.censorship.mask_port
|
||||
|| old.censorship.mask_unix_sock != new.censorship.mask_unix_sock
|
||||
|| old.censorship.fake_cert_len != new.censorship.fake_cert_len
|
||||
|| old.censorship.tls_emulation != new.censorship.tls_emulation
|
||||
|| old.censorship.tls_front_dir != new.censorship.tls_front_dir
|
||||
|| old.censorship.server_hello_delay_min_ms != new.censorship.server_hello_delay_min_ms
|
||||
|| old.censorship.server_hello_delay_max_ms != new.censorship.server_hello_delay_max_ms
|
||||
|| old.censorship.tls_new_session_tickets != new.censorship.tls_new_session_tickets
|
||||
|| old.censorship.tls_full_cert_ttl_secs != new.censorship.tls_full_cert_ttl_secs
|
||||
|| old.censorship.alpn_enforce != new.censorship.alpn_enforce
|
||||
|| old.censorship.mask_proxy_protocol != new.censorship.mask_proxy_protocol
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: censorship settings changed; restart required");
|
||||
}
|
||||
if old.censorship.tls_domain != new.censorship.tls_domain {
|
||||
warned = true;
|
||||
warn!(
|
||||
"config reload: censorship.tls_domain changed ('{}' → '{}'); restart required",
|
||||
old.censorship.tls_domain, new.censorship.tls_domain
|
||||
);
|
||||
}
|
||||
if old.network.ipv4 != new.network.ipv4 || old.network.ipv6 != new.network.ipv6 {
|
||||
warned = true;
|
||||
warn!("config reload: network.ipv4/ipv6 changed; restart required");
|
||||
}
|
||||
if old.network.prefer != new.network.prefer
|
||||
|| old.network.multipath != new.network.multipath
|
||||
|| old.network.stun_use != new.network.stun_use
|
||||
|| old.network.stun_servers != new.network.stun_servers
|
||||
|| old.network.stun_tcp_fallback != new.network.stun_tcp_fallback
|
||||
|| old.network.http_ip_detect_urls != new.network.http_ip_detect_urls
|
||||
|| old.network.cache_public_ip_path != new.network.cache_public_ip_path
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: non-hot network settings changed; restart required");
|
||||
}
|
||||
if old.general.use_middle_proxy != new.general.use_middle_proxy {
|
||||
warned = true;
|
||||
warn!("config reload: use_middle_proxy changed; restart required");
|
||||
}
|
||||
if old.general.stun_nat_probe_concurrency != new.general.stun_nat_probe_concurrency {
|
||||
warned = true;
|
||||
warn!("config reload: general.stun_nat_probe_concurrency changed; restart required");
|
||||
}
|
||||
if old.general.middle_proxy_pool_size != new.general.middle_proxy_pool_size {
|
||||
warned = true;
|
||||
warn!("config reload: general.middle_proxy_pool_size changed; restart required");
|
||||
}
|
||||
if old.general.me_route_no_writer_mode != new.general.me_route_no_writer_mode
|
||||
|| old.general.me_route_no_writer_wait_ms != new.general.me_route_no_writer_wait_ms
|
||||
|| old.general.me_route_inline_recovery_attempts
|
||||
!= new.general.me_route_inline_recovery_attempts
|
||||
|| old.general.me_route_inline_recovery_wait_ms
|
||||
!= new.general.me_route_inline_recovery_wait_ms
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: general.me_route_no_writer_* changed; restart required");
|
||||
}
|
||||
if old.general.unknown_dc_log_path != new.general.unknown_dc_log_path
|
||||
|| old.general.unknown_dc_file_log_enabled != new.general.unknown_dc_file_log_enabled
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: general.unknown_dc_* changed; restart required");
|
||||
}
|
||||
if old.general.me_init_retry_attempts != new.general.me_init_retry_attempts {
|
||||
warned = true;
|
||||
warn!("config reload: general.me_init_retry_attempts changed; restart required");
|
||||
}
|
||||
if old.general.me2dc_fallback != new.general.me2dc_fallback {
|
||||
warned = true;
|
||||
warn!("config reload: general.me2dc_fallback changed; restart required");
|
||||
}
|
||||
if old.general.proxy_config_v4_cache_path != new.general.proxy_config_v4_cache_path
|
||||
|| old.general.proxy_config_v6_cache_path != new.general.proxy_config_v6_cache_path
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: general.proxy_config_*_cache_path changed; restart required");
|
||||
}
|
||||
if old.general.me_keepalive_enabled != new.general.me_keepalive_enabled
|
||||
|| old.general.me_keepalive_interval_secs != new.general.me_keepalive_interval_secs
|
||||
|| old.general.me_keepalive_jitter_secs != new.general.me_keepalive_jitter_secs
|
||||
|| old.general.me_keepalive_payload_random != new.general.me_keepalive_payload_random
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: general.me_keepalive_* changed; restart required");
|
||||
}
|
||||
if old.general.upstream_connect_retry_attempts != new.general.upstream_connect_retry_attempts
|
||||
|| old.general.upstream_connect_retry_backoff_ms
|
||||
!= new.general.upstream_connect_retry_backoff_ms
|
||||
@@ -139,8 +530,12 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig) {
|
||||
!= new.general.upstream_connect_failfast_hard_errors
|
||||
|| old.general.rpc_proxy_req_every != new.general.rpc_proxy_req_every
|
||||
{
|
||||
warned = true;
|
||||
warn!("config reload: general.upstream_* changed; restart required");
|
||||
}
|
||||
if non_hot_changed && !warned {
|
||||
warn!("config reload: one or more non-hot fields changed; restart required");
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the public host for link generation — mirrors the logic in main.rs.
|
||||
@@ -223,10 +618,10 @@ fn log_changes(
|
||||
log_tx.send(new_hot.log_level.clone()).ok();
|
||||
}
|
||||
|
||||
if old_hot.access.user_ad_tags != new_hot.access.user_ad_tags {
|
||||
if old_hot.user_ad_tags != new_hot.user_ad_tags {
|
||||
info!(
|
||||
"config reload: user_ad_tags updated ({} entries)",
|
||||
new_hot.access.user_ad_tags.len(),
|
||||
new_hot.user_ad_tags.len(),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -241,13 +636,6 @@ fn log_changes(
|
||||
);
|
||||
}
|
||||
|
||||
if old_hot.middle_proxy_pool_size != new_hot.middle_proxy_pool_size {
|
||||
info!(
|
||||
"config reload: middle_proxy_pool_size: {} → {}",
|
||||
old_hot.middle_proxy_pool_size, new_hot.middle_proxy_pool_size,
|
||||
);
|
||||
}
|
||||
|
||||
if old_hot.desync_all_full != new_hot.desync_all_full {
|
||||
info!(
|
||||
"config reload: desync_all_full: {} → {}",
|
||||
@@ -261,6 +649,17 @@ fn log_changes(
|
||||
old_hot.update_every_secs, new_hot.update_every_secs,
|
||||
);
|
||||
}
|
||||
if old_hot.me_reinit_every_secs != new_hot.me_reinit_every_secs
|
||||
|| old_hot.me_reinit_singleflight != new_hot.me_reinit_singleflight
|
||||
|| old_hot.me_reinit_coalesce_window_ms != new_hot.me_reinit_coalesce_window_ms
|
||||
{
|
||||
info!(
|
||||
"config reload: me_reinit: interval={}s singleflight={} coalesce={}ms",
|
||||
new_hot.me_reinit_every_secs,
|
||||
new_hot.me_reinit_singleflight,
|
||||
new_hot.me_reinit_coalesce_window_ms
|
||||
);
|
||||
}
|
||||
|
||||
if old_hot.hardswap != new_hot.hardswap {
|
||||
info!(
|
||||
@@ -289,18 +688,88 @@ fn log_changes(
|
||||
old_hot.me_reinit_drain_timeout_secs, new_hot.me_reinit_drain_timeout_secs,
|
||||
);
|
||||
}
|
||||
|
||||
if old_hot.me_keepalive_enabled != new_hot.me_keepalive_enabled
|
||||
|| old_hot.me_keepalive_interval_secs != new_hot.me_keepalive_interval_secs
|
||||
|| old_hot.me_keepalive_jitter_secs != new_hot.me_keepalive_jitter_secs
|
||||
|| old_hot.me_keepalive_payload_random != new_hot.me_keepalive_payload_random
|
||||
if old_hot.me_hardswap_warmup_delay_min_ms != new_hot.me_hardswap_warmup_delay_min_ms
|
||||
|| old_hot.me_hardswap_warmup_delay_max_ms != new_hot.me_hardswap_warmup_delay_max_ms
|
||||
|| old_hot.me_hardswap_warmup_extra_passes != new_hot.me_hardswap_warmup_extra_passes
|
||||
|| old_hot.me_hardswap_warmup_pass_backoff_base_ms
|
||||
!= new_hot.me_hardswap_warmup_pass_backoff_base_ms
|
||||
{
|
||||
info!(
|
||||
"config reload: me_keepalive: enabled={} interval={}s jitter={}s random_payload={}",
|
||||
new_hot.me_keepalive_enabled,
|
||||
new_hot.me_keepalive_interval_secs,
|
||||
new_hot.me_keepalive_jitter_secs,
|
||||
new_hot.me_keepalive_payload_random,
|
||||
"config reload: me_hardswap_warmup: min={}ms max={}ms extra_passes={} pass_backoff={}ms",
|
||||
new_hot.me_hardswap_warmup_delay_min_ms,
|
||||
new_hot.me_hardswap_warmup_delay_max_ms,
|
||||
new_hot.me_hardswap_warmup_extra_passes,
|
||||
new_hot.me_hardswap_warmup_pass_backoff_base_ms
|
||||
);
|
||||
}
|
||||
if old_hot.me_bind_stale_mode != new_hot.me_bind_stale_mode
|
||||
|| old_hot.me_bind_stale_ttl_secs != new_hot.me_bind_stale_ttl_secs
|
||||
{
|
||||
info!(
|
||||
"config reload: me_bind_stale: mode={:?} ttl={}s",
|
||||
new_hot.me_bind_stale_mode,
|
||||
new_hot.me_bind_stale_ttl_secs
|
||||
);
|
||||
}
|
||||
if old_hot.me_secret_atomic_snapshot != new_hot.me_secret_atomic_snapshot
|
||||
|| old_hot.me_deterministic_writer_sort != new_hot.me_deterministic_writer_sort
|
||||
|| old_hot.me_writer_pick_mode != new_hot.me_writer_pick_mode
|
||||
|| old_hot.me_writer_pick_sample_size != new_hot.me_writer_pick_sample_size
|
||||
{
|
||||
info!(
|
||||
"config reload: me_runtime_flags: secret_atomic_snapshot={} deterministic_sort={} writer_pick_mode={:?} writer_pick_sample_size={}",
|
||||
new_hot.me_secret_atomic_snapshot,
|
||||
new_hot.me_deterministic_writer_sort,
|
||||
new_hot.me_writer_pick_mode,
|
||||
new_hot.me_writer_pick_sample_size,
|
||||
);
|
||||
}
|
||||
if old_hot.me_single_endpoint_shadow_writers != new_hot.me_single_endpoint_shadow_writers
|
||||
|| old_hot.me_single_endpoint_outage_mode_enabled
|
||||
!= new_hot.me_single_endpoint_outage_mode_enabled
|
||||
|| old_hot.me_single_endpoint_outage_disable_quarantine
|
||||
!= new_hot.me_single_endpoint_outage_disable_quarantine
|
||||
|| old_hot.me_single_endpoint_outage_backoff_min_ms
|
||||
!= new_hot.me_single_endpoint_outage_backoff_min_ms
|
||||
|| old_hot.me_single_endpoint_outage_backoff_max_ms
|
||||
!= new_hot.me_single_endpoint_outage_backoff_max_ms
|
||||
|| old_hot.me_single_endpoint_shadow_rotate_every_secs
|
||||
!= new_hot.me_single_endpoint_shadow_rotate_every_secs
|
||||
{
|
||||
info!(
|
||||
"config reload: me_single_endpoint: shadow={} outage_enabled={} disable_quarantine={} backoff=[{}..{}]ms rotate={}s",
|
||||
new_hot.me_single_endpoint_shadow_writers,
|
||||
new_hot.me_single_endpoint_outage_mode_enabled,
|
||||
new_hot.me_single_endpoint_outage_disable_quarantine,
|
||||
new_hot.me_single_endpoint_outage_backoff_min_ms,
|
||||
new_hot.me_single_endpoint_outage_backoff_max_ms,
|
||||
new_hot.me_single_endpoint_shadow_rotate_every_secs
|
||||
);
|
||||
}
|
||||
if old_hot.me_config_stable_snapshots != new_hot.me_config_stable_snapshots
|
||||
|| old_hot.me_config_apply_cooldown_secs != new_hot.me_config_apply_cooldown_secs
|
||||
|| old_hot.me_snapshot_require_http_2xx != new_hot.me_snapshot_require_http_2xx
|
||||
|| old_hot.me_snapshot_reject_empty_map != new_hot.me_snapshot_reject_empty_map
|
||||
|| old_hot.me_snapshot_min_proxy_for_lines != new_hot.me_snapshot_min_proxy_for_lines
|
||||
{
|
||||
info!(
|
||||
"config reload: me_snapshot_guard: stable={} cooldown={}s require_2xx={} reject_empty={} min_proxy_for={}",
|
||||
new_hot.me_config_stable_snapshots,
|
||||
new_hot.me_config_apply_cooldown_secs,
|
||||
new_hot.me_snapshot_require_http_2xx,
|
||||
new_hot.me_snapshot_reject_empty_map,
|
||||
new_hot.me_snapshot_min_proxy_for_lines
|
||||
);
|
||||
}
|
||||
if old_hot.proxy_secret_stable_snapshots != new_hot.proxy_secret_stable_snapshots
|
||||
|| old_hot.proxy_secret_rotate_runtime != new_hot.proxy_secret_rotate_runtime
|
||||
|| old_hot.proxy_secret_len_max != new_hot.proxy_secret_len_max
|
||||
{
|
||||
info!(
|
||||
"config reload: proxy_secret_runtime: stable={} rotate={} len_max={}",
|
||||
new_hot.proxy_secret_stable_snapshots,
|
||||
new_hot.proxy_secret_rotate_runtime,
|
||||
new_hot.proxy_secret_len_max
|
||||
);
|
||||
}
|
||||
|
||||
@@ -328,15 +797,42 @@ fn log_changes(
|
||||
|| old_hot.me_adaptive_floor_idle_secs != new_hot.me_adaptive_floor_idle_secs
|
||||
|| old_hot.me_adaptive_floor_min_writers_single_endpoint
|
||||
!= new_hot.me_adaptive_floor_min_writers_single_endpoint
|
||||
|| old_hot.me_adaptive_floor_min_writers_multi_endpoint
|
||||
!= new_hot.me_adaptive_floor_min_writers_multi_endpoint
|
||||
|| old_hot.me_adaptive_floor_recover_grace_secs
|
||||
!= new_hot.me_adaptive_floor_recover_grace_secs
|
||||
|| old_hot.me_adaptive_floor_writers_per_core_total
|
||||
!= new_hot.me_adaptive_floor_writers_per_core_total
|
||||
|| old_hot.me_adaptive_floor_cpu_cores_override
|
||||
!= new_hot.me_adaptive_floor_cpu_cores_override
|
||||
|| old_hot.me_adaptive_floor_max_extra_writers_single_per_core
|
||||
!= new_hot.me_adaptive_floor_max_extra_writers_single_per_core
|
||||
|| old_hot.me_adaptive_floor_max_extra_writers_multi_per_core
|
||||
!= new_hot.me_adaptive_floor_max_extra_writers_multi_per_core
|
||||
|| old_hot.me_adaptive_floor_max_active_writers_per_core
|
||||
!= new_hot.me_adaptive_floor_max_active_writers_per_core
|
||||
|| old_hot.me_adaptive_floor_max_warm_writers_per_core
|
||||
!= new_hot.me_adaptive_floor_max_warm_writers_per_core
|
||||
|| old_hot.me_adaptive_floor_max_active_writers_global
|
||||
!= new_hot.me_adaptive_floor_max_active_writers_global
|
||||
|| old_hot.me_adaptive_floor_max_warm_writers_global
|
||||
!= new_hot.me_adaptive_floor_max_warm_writers_global
|
||||
{
|
||||
info!(
|
||||
"config reload: me_floor: mode={:?} idle={}s min_single={} recover_grace={}s",
|
||||
"config reload: me_floor: mode={:?} idle={}s min_single={} min_multi={} recover_grace={}s per_core_total={} cores_override={} extra_single_per_core={} extra_multi_per_core={} max_active_per_core={} max_warm_per_core={} max_active_global={} max_warm_global={}",
|
||||
new_hot.me_floor_mode,
|
||||
new_hot.me_adaptive_floor_idle_secs,
|
||||
new_hot.me_adaptive_floor_min_writers_single_endpoint,
|
||||
new_hot.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
new_hot.me_adaptive_floor_recover_grace_secs,
|
||||
new_hot.me_adaptive_floor_writers_per_core_total,
|
||||
new_hot.me_adaptive_floor_cpu_cores_override,
|
||||
new_hot.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
new_hot.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
new_hot.me_adaptive_floor_max_active_writers_per_core,
|
||||
new_hot.me_adaptive_floor_max_warm_writers_per_core,
|
||||
new_hot.me_adaptive_floor_max_active_writers_global,
|
||||
new_hot.me_adaptive_floor_max_warm_writers_global,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -346,30 +842,59 @@ fn log_changes(
|
||||
!= new_hot.me_route_backpressure_high_timeout_ms
|
||||
|| old_hot.me_route_backpressure_high_watermark_pct
|
||||
!= new_hot.me_route_backpressure_high_watermark_pct
|
||||
|| old_hot.me_reader_route_data_wait_ms != new_hot.me_reader_route_data_wait_ms
|
||||
|| old_hot.me_health_interval_ms_unhealthy
|
||||
!= new_hot.me_health_interval_ms_unhealthy
|
||||
|| old_hot.me_health_interval_ms_healthy != new_hot.me_health_interval_ms_healthy
|
||||
|| old_hot.me_admission_poll_ms != new_hot.me_admission_poll_ms
|
||||
|| old_hot.me_warn_rate_limit_ms != new_hot.me_warn_rate_limit_ms
|
||||
{
|
||||
info!(
|
||||
"config reload: me_route_backpressure: base={}ms high={}ms watermark={}%",
|
||||
"config reload: me_route_backpressure: base={}ms high={}ms watermark={}%; me_reader_route_data_wait_ms={}; me_health_interval: unhealthy={}ms healthy={}ms; me_admission_poll={}ms; me_warn_rate_limit={}ms",
|
||||
new_hot.me_route_backpressure_base_timeout_ms,
|
||||
new_hot.me_route_backpressure_high_timeout_ms,
|
||||
new_hot.me_route_backpressure_high_watermark_pct,
|
||||
new_hot.me_reader_route_data_wait_ms,
|
||||
new_hot.me_health_interval_ms_unhealthy,
|
||||
new_hot.me_health_interval_ms_healthy,
|
||||
new_hot.me_admission_poll_ms,
|
||||
new_hot.me_warn_rate_limit_ms,
|
||||
);
|
||||
}
|
||||
|
||||
if old_hot.access.users != new_hot.access.users {
|
||||
let mut added: Vec<&String> = new_hot.access.users.keys()
|
||||
.filter(|u| !old_hot.access.users.contains_key(*u))
|
||||
if old_hot.me_d2c_flush_batch_max_frames != new_hot.me_d2c_flush_batch_max_frames
|
||||
|| old_hot.me_d2c_flush_batch_max_bytes != new_hot.me_d2c_flush_batch_max_bytes
|
||||
|| old_hot.me_d2c_flush_batch_max_delay_us != new_hot.me_d2c_flush_batch_max_delay_us
|
||||
|| old_hot.me_d2c_ack_flush_immediate != new_hot.me_d2c_ack_flush_immediate
|
||||
|| old_hot.direct_relay_copy_buf_c2s_bytes != new_hot.direct_relay_copy_buf_c2s_bytes
|
||||
|| old_hot.direct_relay_copy_buf_s2c_bytes != new_hot.direct_relay_copy_buf_s2c_bytes
|
||||
{
|
||||
info!(
|
||||
"config reload: relay_tuning: me_d2c_frames={} me_d2c_bytes={} me_d2c_delay_us={} me_ack_flush_immediate={} direct_buf_c2s={} direct_buf_s2c={}",
|
||||
new_hot.me_d2c_flush_batch_max_frames,
|
||||
new_hot.me_d2c_flush_batch_max_bytes,
|
||||
new_hot.me_d2c_flush_batch_max_delay_us,
|
||||
new_hot.me_d2c_ack_flush_immediate,
|
||||
new_hot.direct_relay_copy_buf_c2s_bytes,
|
||||
new_hot.direct_relay_copy_buf_s2c_bytes,
|
||||
);
|
||||
}
|
||||
|
||||
if old_hot.users != new_hot.users {
|
||||
let mut added: Vec<&String> = new_hot.users.keys()
|
||||
.filter(|u| !old_hot.users.contains_key(*u))
|
||||
.collect();
|
||||
added.sort();
|
||||
|
||||
let mut removed: Vec<&String> = old_hot.access.users.keys()
|
||||
.filter(|u| !new_hot.access.users.contains_key(*u))
|
||||
let mut removed: Vec<&String> = old_hot.users.keys()
|
||||
.filter(|u| !new_hot.users.contains_key(*u))
|
||||
.collect();
|
||||
removed.sort();
|
||||
|
||||
let mut changed: Vec<&String> = new_hot.access.users.keys()
|
||||
let mut changed: Vec<&String> = new_hot.users.keys()
|
||||
.filter(|u| {
|
||||
old_hot.access.users.get(*u)
|
||||
.map(|s| s != &new_hot.access.users[*u])
|
||||
old_hot.users.get(*u)
|
||||
.map(|s| s != &new_hot.users[*u])
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect();
|
||||
@@ -383,7 +908,7 @@ fn log_changes(
|
||||
let host = resolve_link_host(new_cfg, detected_ip_v4, detected_ip_v6);
|
||||
let port = new_cfg.general.links.public_port.unwrap_or(new_cfg.server.port);
|
||||
for user in &added {
|
||||
if let Some(secret) = new_hot.access.users.get(*user) {
|
||||
if let Some(secret) = new_hot.users.get(*user) {
|
||||
print_user_links(user, secret, &host, port, new_cfg);
|
||||
}
|
||||
}
|
||||
@@ -402,28 +927,38 @@ fn log_changes(
|
||||
}
|
||||
}
|
||||
|
||||
if old_hot.access.user_max_tcp_conns != new_hot.access.user_max_tcp_conns {
|
||||
if old_hot.user_max_tcp_conns != new_hot.user_max_tcp_conns {
|
||||
info!(
|
||||
"config reload: user_max_tcp_conns updated ({} entries)",
|
||||
new_hot.access.user_max_tcp_conns.len()
|
||||
new_hot.user_max_tcp_conns.len()
|
||||
);
|
||||
}
|
||||
if old_hot.access.user_expirations != new_hot.access.user_expirations {
|
||||
if old_hot.user_expirations != new_hot.user_expirations {
|
||||
info!(
|
||||
"config reload: user_expirations updated ({} entries)",
|
||||
new_hot.access.user_expirations.len()
|
||||
new_hot.user_expirations.len()
|
||||
);
|
||||
}
|
||||
if old_hot.access.user_data_quota != new_hot.access.user_data_quota {
|
||||
if old_hot.user_data_quota != new_hot.user_data_quota {
|
||||
info!(
|
||||
"config reload: user_data_quota updated ({} entries)",
|
||||
new_hot.access.user_data_quota.len()
|
||||
new_hot.user_data_quota.len()
|
||||
);
|
||||
}
|
||||
if old_hot.access.user_max_unique_ips != new_hot.access.user_max_unique_ips {
|
||||
if old_hot.user_max_unique_ips != new_hot.user_max_unique_ips {
|
||||
info!(
|
||||
"config reload: user_max_unique_ips updated ({} entries)",
|
||||
new_hot.access.user_max_unique_ips.len()
|
||||
new_hot.user_max_unique_ips.len()
|
||||
);
|
||||
}
|
||||
if old_hot.user_max_unique_ips_mode != new_hot.user_max_unique_ips_mode
|
||||
|| old_hot.user_max_unique_ips_window_secs
|
||||
!= new_hot.user_max_unique_ips_window_secs
|
||||
{
|
||||
info!(
|
||||
"config reload: user_max_unique_ips policy mode={:?} window={}s",
|
||||
new_hot.user_max_unique_ips_mode,
|
||||
new_hot.user_max_unique_ips_window_secs
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -450,15 +985,22 @@ fn reload_config(
|
||||
}
|
||||
|
||||
let old_cfg = config_tx.borrow().clone();
|
||||
let applied_cfg = overlay_hot_fields(&old_cfg, &new_cfg);
|
||||
let old_hot = HotFields::from_config(&old_cfg);
|
||||
let new_hot = HotFields::from_config(&new_cfg);
|
||||
let applied_hot = HotFields::from_config(&applied_cfg);
|
||||
let non_hot_changed = !config_equal(&applied_cfg, &new_cfg);
|
||||
let hot_changed = old_hot != applied_hot;
|
||||
|
||||
if old_hot == new_hot {
|
||||
if non_hot_changed {
|
||||
warn_non_hot_changes(&old_cfg, &new_cfg, non_hot_changed);
|
||||
}
|
||||
|
||||
if !hot_changed {
|
||||
return;
|
||||
}
|
||||
|
||||
if old_hot.dns_overrides != new_hot.dns_overrides
|
||||
&& let Err(e) = crate::network::dns_overrides::install_entries(&new_hot.dns_overrides)
|
||||
if old_hot.dns_overrides != applied_hot.dns_overrides
|
||||
&& let Err(e) = crate::network::dns_overrides::install_entries(&applied_hot.dns_overrides)
|
||||
{
|
||||
error!(
|
||||
"config reload: invalid network.dns_overrides: {}; keeping old config",
|
||||
@@ -467,9 +1009,15 @@ fn reload_config(
|
||||
return;
|
||||
}
|
||||
|
||||
warn_non_hot_changes(&old_cfg, &new_cfg);
|
||||
log_changes(&old_hot, &new_hot, &new_cfg, log_tx, detected_ip_v4, detected_ip_v6);
|
||||
config_tx.send(Arc::new(new_cfg)).ok();
|
||||
log_changes(
|
||||
&old_hot,
|
||||
&applied_hot,
|
||||
&applied_cfg,
|
||||
log_tx,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
);
|
||||
config_tx.send(Arc::new(applied_cfg)).ok();
|
||||
}
|
||||
|
||||
// ── Public API ────────────────────────────────────────────────────────────────
|
||||
@@ -595,3 +1143,80 @@ pub fn spawn_config_watcher(
|
||||
|
||||
(config_rx, log_rx)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn sample_config() -> ProxyConfig {
|
||||
ProxyConfig::default()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn overlay_applies_hot_and_preserves_non_hot() {
|
||||
let old = sample_config();
|
||||
let mut new = old.clone();
|
||||
new.general.hardswap = !old.general.hardswap;
|
||||
new.server.port = old.server.port.saturating_add(1);
|
||||
|
||||
let applied = overlay_hot_fields(&old, &new);
|
||||
assert_eq!(applied.general.hardswap, new.general.hardswap);
|
||||
assert_eq!(applied.server.port, old.server.port);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_hot_only_change_does_not_change_hot_snapshot() {
|
||||
let old = sample_config();
|
||||
let mut new = old.clone();
|
||||
new.server.port = old.server.port.saturating_add(1);
|
||||
|
||||
let applied = overlay_hot_fields(&old, &new);
|
||||
assert_eq!(HotFields::from_config(&old), HotFields::from_config(&applied));
|
||||
assert_eq!(applied.server.port, old.server.port);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bind_stale_mode_is_hot() {
|
||||
let old = sample_config();
|
||||
let mut new = old.clone();
|
||||
new.general.me_bind_stale_mode = match old.general.me_bind_stale_mode {
|
||||
MeBindStaleMode::Never => MeBindStaleMode::Ttl,
|
||||
MeBindStaleMode::Ttl => MeBindStaleMode::Always,
|
||||
MeBindStaleMode::Always => MeBindStaleMode::Never,
|
||||
};
|
||||
|
||||
let applied = overlay_hot_fields(&old, &new);
|
||||
assert_eq!(
|
||||
applied.general.me_bind_stale_mode,
|
||||
new.general.me_bind_stale_mode
|
||||
);
|
||||
assert_ne!(HotFields::from_config(&old), HotFields::from_config(&applied));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keepalive_is_not_hot() {
|
||||
let old = sample_config();
|
||||
let mut new = old.clone();
|
||||
new.general.me_keepalive_interval_secs = old.general.me_keepalive_interval_secs + 5;
|
||||
|
||||
let applied = overlay_hot_fields(&old, &new);
|
||||
assert_eq!(
|
||||
applied.general.me_keepalive_interval_secs,
|
||||
old.general.me_keepalive_interval_secs
|
||||
);
|
||||
assert_eq!(HotFields::from_config(&old), HotFields::from_config(&applied));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mixed_hot_and_non_hot_change_applies_only_hot_subset() {
|
||||
let old = sample_config();
|
||||
let mut new = old.clone();
|
||||
new.general.hardswap = !old.general.hardswap;
|
||||
new.general.use_middle_proxy = !old.general.use_middle_proxy;
|
||||
|
||||
let applied = overlay_hot_fields(&old, &new);
|
||||
assert_eq!(applied.general.hardswap, new.general.hardswap);
|
||||
assert_eq!(applied.general.use_middle_proxy, old.general.use_middle_proxy);
|
||||
assert!(!config_equal(&applied, &new));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#![allow(deprecated)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::net::IpAddr;
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::path::Path;
|
||||
|
||||
use rand::Rng;
|
||||
@@ -203,6 +203,22 @@ impl ProxyConfig {
|
||||
|
||||
sanitize_ad_tag(&mut config.general.ad_tag);
|
||||
|
||||
if let Some(path) = &config.general.proxy_config_v4_cache_path
|
||||
&& path.trim().is_empty()
|
||||
{
|
||||
return Err(ProxyError::Config(
|
||||
"general.proxy_config_v4_cache_path cannot be empty when provided".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(path) = &config.general.proxy_config_v6_cache_path
|
||||
&& path.trim().is_empty()
|
||||
{
|
||||
return Err(ProxyError::Config(
|
||||
"general.proxy_config_v6_cache_path cannot be empty when provided".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(update_every) = config.general.update_every {
|
||||
if update_every == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
@@ -237,12 +253,24 @@ impl ProxyConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_init_retry_attempts > 1_000_000 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_init_retry_attempts must be within [0, 1000000]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.upstream_connect_retry_attempts == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.upstream_connect_retry_attempts must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.upstream_connect_budget_ms == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.upstream_connect_budget_ms must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.upstream_unhealthy_fail_threshold == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.upstream_unhealthy_fail_threshold must be > 0".to_string(),
|
||||
@@ -257,6 +285,90 @@ impl ProxyConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_writer_cmd_channel_capacity == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_writer_cmd_channel_capacity must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_route_channel_capacity == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_route_channel_capacity must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_c2me_channel_capacity == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_c2me_channel_capacity must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_reader_route_data_wait_ms > 20 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_reader_route_data_wait_ms must be within [0, 20]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if !(1..=512).contains(&config.general.me_d2c_flush_batch_max_frames) {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_d2c_flush_batch_max_frames must be within [1, 512]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if !(4096..=2 * 1024 * 1024).contains(&config.general.me_d2c_flush_batch_max_bytes) {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_d2c_flush_batch_max_bytes must be within [4096, 2097152]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_d2c_flush_batch_max_delay_us > 5000 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_d2c_flush_batch_max_delay_us must be within [0, 5000]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if !(4096..=1024 * 1024).contains(&config.general.direct_relay_copy_buf_c2s_bytes) {
|
||||
return Err(ProxyError::Config(
|
||||
"general.direct_relay_copy_buf_c2s_bytes must be within [4096, 1048576]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if !(8192..=2 * 1024 * 1024).contains(&config.general.direct_relay_copy_buf_s2c_bytes) {
|
||||
return Err(ProxyError::Config(
|
||||
"general.direct_relay_copy_buf_s2c_bytes must be within [8192, 2097152]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_health_interval_ms_unhealthy == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_health_interval_ms_unhealthy must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_health_interval_ms_healthy == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_health_interval_ms_healthy must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_admission_poll_ms == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_admission_poll_ms must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_warn_rate_limit_ms == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_warn_rate_limit_ms must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.access.user_max_unique_ips_window_secs == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"access.user_max_unique_ips_window_secs must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_reinit_every_secs == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_reinit_every_secs must be > 0".to_string(),
|
||||
@@ -278,6 +390,45 @@ impl ProxyConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_adaptive_floor_min_writers_multi_endpoint == 0
|
||||
|| config.general.me_adaptive_floor_min_writers_multi_endpoint > 32
|
||||
{
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_adaptive_floor_min_writers_multi_endpoint must be within [1, 32]"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_adaptive_floor_writers_per_core_total == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_adaptive_floor_writers_per_core_total must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_adaptive_floor_max_active_writers_per_core == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_adaptive_floor_max_active_writers_per_core must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_adaptive_floor_max_warm_writers_per_core == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_adaptive_floor_max_warm_writers_per_core must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_adaptive_floor_max_active_writers_global == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_adaptive_floor_max_active_writers_global must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_adaptive_floor_max_warm_writers_global == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_adaptive_floor_max_warm_writers_global must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_single_endpoint_outage_backoff_min_ms == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_single_endpoint_outage_backoff_min_ms must be > 0".to_string(),
|
||||
@@ -398,6 +549,72 @@ impl ProxyConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if !(10..=5000).contains(&config.general.me_route_no_writer_wait_ms) {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_route_no_writer_wait_ms must be within [10, 5000]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if !(2..=4).contains(&config.general.me_writer_pick_sample_size) {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_writer_pick_sample_size must be within [2, 4]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.me_route_inline_recovery_attempts == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_route_inline_recovery_attempts must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if !(10..=30000).contains(&config.general.me_route_inline_recovery_wait_ms) {
|
||||
return Err(ProxyError::Config(
|
||||
"general.me_route_inline_recovery_wait_ms must be within [10, 30000]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.server.api.request_body_limit_bytes == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.request_body_limit_bytes must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.server.api.minimal_runtime_cache_ttl_ms > 60_000 {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.minimal_runtime_cache_ttl_ms must be within [0, 60000]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.server.api.runtime_edge_cache_ttl_ms > 60_000 {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.runtime_edge_cache_ttl_ms must be within [0, 60000]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if !(1..=1000).contains(&config.server.api.runtime_edge_top_n) {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.runtime_edge_top_n must be within [1, 1000]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if !(16..=4096).contains(&config.server.api.runtime_edge_events_capacity) {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.runtime_edge_events_capacity must be within [16, 4096]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.server.api.listen.parse::<SocketAddr>().is_err() {
|
||||
return Err(ProxyError::Config(
|
||||
"server.api.listen must be in IP:PORT format".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.server.proxy_protocol_header_timeout_ms == 0 {
|
||||
return Err(ProxyError::Config(
|
||||
"server.proxy_protocol_header_timeout_ms must be > 0".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if config.general.effective_me_pool_force_close_secs() > 0
|
||||
&& config.general.effective_me_pool_force_close_secs()
|
||||
< config.general.me_pool_drain_ttl_secs
|
||||
@@ -479,10 +696,11 @@ impl ProxyConfig {
|
||||
warn!("prefer_ipv6 is deprecated, use [network].prefer = 6");
|
||||
}
|
||||
|
||||
// Auto-enable NAT probe when Middle Proxy is requested.
|
||||
if config.general.use_middle_proxy && !config.general.middle_proxy_nat_probe {
|
||||
config.general.middle_proxy_nat_probe = true;
|
||||
warn!("Auto-enabled middle_proxy_nat_probe for middle proxy mode");
|
||||
if config.general.use_middle_proxy && !config.general.me_secret_atomic_snapshot {
|
||||
config.general.me_secret_atomic_snapshot = true;
|
||||
warn!(
|
||||
"Auto-enabled me_secret_atomic_snapshot for middle proxy mode to keep KDF key_selector/secret coherent"
|
||||
);
|
||||
}
|
||||
|
||||
validate_network_cfg(&mut config.network)?;
|
||||
@@ -635,6 +853,22 @@ mod tests {
|
||||
cfg.general.me_reconnect_fast_retry_count,
|
||||
default_me_reconnect_fast_retry_count()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.me_init_retry_attempts,
|
||||
default_me_init_retry_attempts()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.me2dc_fallback,
|
||||
default_me2dc_fallback()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.proxy_config_v4_cache_path,
|
||||
default_proxy_config_v4_cache_path()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.proxy_config_v6_cache_path,
|
||||
default_proxy_config_v6_cache_path()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.general.me_single_endpoint_shadow_writers,
|
||||
default_me_single_endpoint_shadow_writers()
|
||||
@@ -695,7 +929,45 @@ mod tests {
|
||||
assert_eq!(cfg.general.update_every, default_update_every());
|
||||
assert_eq!(cfg.server.listen_addr_ipv4, default_listen_addr_ipv4());
|
||||
assert_eq!(cfg.server.listen_addr_ipv6, default_listen_addr_ipv6_opt());
|
||||
assert_eq!(cfg.server.api.listen, default_api_listen());
|
||||
assert_eq!(cfg.server.api.whitelist, default_api_whitelist());
|
||||
assert_eq!(
|
||||
cfg.server.api.request_body_limit_bytes,
|
||||
default_api_request_body_limit_bytes()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.server.api.minimal_runtime_enabled,
|
||||
default_api_minimal_runtime_enabled()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.server.api.minimal_runtime_cache_ttl_ms,
|
||||
default_api_minimal_runtime_cache_ttl_ms()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.server.api.runtime_edge_enabled,
|
||||
default_api_runtime_edge_enabled()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.server.api.runtime_edge_cache_ttl_ms,
|
||||
default_api_runtime_edge_cache_ttl_ms()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.server.api.runtime_edge_top_n,
|
||||
default_api_runtime_edge_top_n()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.server.api.runtime_edge_events_capacity,
|
||||
default_api_runtime_edge_events_capacity()
|
||||
);
|
||||
assert_eq!(cfg.access.users, default_access_users());
|
||||
assert_eq!(
|
||||
cfg.access.user_max_unique_ips_mode,
|
||||
UserMaxUniqueIpsMode::default()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg.access.user_max_unique_ips_window_secs,
|
||||
default_user_max_unique_ips_window_secs()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -718,6 +990,19 @@ mod tests {
|
||||
general.me_reconnect_fast_retry_count,
|
||||
default_me_reconnect_fast_retry_count()
|
||||
);
|
||||
assert_eq!(
|
||||
general.me_init_retry_attempts,
|
||||
default_me_init_retry_attempts()
|
||||
);
|
||||
assert_eq!(general.me2dc_fallback, default_me2dc_fallback());
|
||||
assert_eq!(
|
||||
general.proxy_config_v4_cache_path,
|
||||
default_proxy_config_v4_cache_path()
|
||||
);
|
||||
assert_eq!(
|
||||
general.proxy_config_v6_cache_path,
|
||||
default_proxy_config_v6_cache_path()
|
||||
);
|
||||
assert_eq!(
|
||||
general.me_single_endpoint_shadow_writers,
|
||||
default_me_single_endpoint_shadow_writers()
|
||||
@@ -776,6 +1061,36 @@ mod tests {
|
||||
|
||||
let server = ServerConfig::default();
|
||||
assert_eq!(server.listen_addr_ipv6, Some(default_listen_addr_ipv6()));
|
||||
assert_eq!(server.api.listen, default_api_listen());
|
||||
assert_eq!(server.api.whitelist, default_api_whitelist());
|
||||
assert_eq!(
|
||||
server.api.request_body_limit_bytes,
|
||||
default_api_request_body_limit_bytes()
|
||||
);
|
||||
assert_eq!(
|
||||
server.api.minimal_runtime_enabled,
|
||||
default_api_minimal_runtime_enabled()
|
||||
);
|
||||
assert_eq!(
|
||||
server.api.minimal_runtime_cache_ttl_ms,
|
||||
default_api_minimal_runtime_cache_ttl_ms()
|
||||
);
|
||||
assert_eq!(
|
||||
server.api.runtime_edge_enabled,
|
||||
default_api_runtime_edge_enabled()
|
||||
);
|
||||
assert_eq!(
|
||||
server.api.runtime_edge_cache_ttl_ms,
|
||||
default_api_runtime_edge_cache_ttl_ms()
|
||||
);
|
||||
assert_eq!(
|
||||
server.api.runtime_edge_top_n,
|
||||
default_api_runtime_edge_top_n()
|
||||
);
|
||||
assert_eq!(
|
||||
server.api.runtime_edge_events_capacity,
|
||||
default_api_runtime_edge_events_capacity()
|
||||
);
|
||||
|
||||
let access = AccessConfig::default();
|
||||
assert_eq!(access.users, default_access_users());
|
||||
@@ -1031,6 +1346,46 @@ mod tests {
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_adaptive_floor_max_active_writers_per_core_zero_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_adaptive_floor_max_active_writers_per_core = 0
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_adaptive_floor_max_active_per_core_zero_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.me_adaptive_floor_max_active_writers_per_core must be > 0"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_adaptive_floor_max_warm_writers_global_zero_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_adaptive_floor_max_warm_writers_global = 0
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_adaptive_floor_max_warm_global_zero_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.me_adaptive_floor_max_warm_writers_global must be > 0"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn upstream_connect_retry_attempts_zero_is_rejected() {
|
||||
let toml = r#"
|
||||
@@ -1127,6 +1482,85 @@ mod tests {
|
||||
let _ = std::fs::remove_file(path_valid);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_route_no_writer_wait_ms_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_route_no_writer_wait_ms = 5
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_route_no_writer_wait_ms_out_of_range_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.me_route_no_writer_wait_ms must be within [10, 5000]"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_route_no_writer_mode_is_parsed() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
me_route_no_writer_mode = "inline_recovery_legacy"
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_me_route_no_writer_mode_parse_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let cfg = ProxyConfig::load(&path).unwrap();
|
||||
assert_eq!(
|
||||
cfg.general.me_route_no_writer_mode,
|
||||
crate::config::MeRouteNoWriterMode::InlineRecoveryLegacy
|
||||
);
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn proxy_config_cache_paths_empty_are_rejected() {
|
||||
let toml = r#"
|
||||
[general]
|
||||
proxy_config_v4_cache_path = " "
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_proxy_config_v4_cache_path_empty_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("general.proxy_config_v4_cache_path cannot be empty"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
|
||||
let toml_v6 = r#"
|
||||
[general]
|
||||
proxy_config_v6_cache_path = ""
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let path_v6 = dir.join("telemt_proxy_config_v6_cache_path_empty_test.toml");
|
||||
std::fs::write(&path_v6, toml_v6).unwrap();
|
||||
let err_v6 = ProxyConfig::load(&path_v6).unwrap_err().to_string();
|
||||
assert!(err_v6.contains("general.proxy_config_v6_cache_path cannot be empty"));
|
||||
let _ = std::fs::remove_file(path_v6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn me_hardswap_warmup_defaults_are_set() {
|
||||
let toml = r#"
|
||||
@@ -1322,6 +1756,94 @@ mod tests {
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn api_minimal_runtime_cache_ttl_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "127.0.0.1:9091"
|
||||
minimal_runtime_cache_ttl_ms = 70000
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_api_minimal_runtime_cache_ttl_invalid_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("server.api.minimal_runtime_cache_ttl_ms must be within [0, 60000]"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn api_runtime_edge_cache_ttl_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "127.0.0.1:9091"
|
||||
runtime_edge_cache_ttl_ms = 70000
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_api_runtime_edge_cache_ttl_invalid_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("server.api.runtime_edge_cache_ttl_ms must be within [0, 60000]"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn api_runtime_edge_top_n_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "127.0.0.1:9091"
|
||||
runtime_edge_top_n = 0
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_api_runtime_edge_top_n_invalid_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("server.api.runtime_edge_top_n must be within [1, 1000]"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn api_runtime_edge_events_capacity_out_of_range_is_rejected() {
|
||||
let toml = r#"
|
||||
[server.api]
|
||||
enabled = true
|
||||
listen = "127.0.0.1:9091"
|
||||
runtime_edge_events_capacity = 8
|
||||
|
||||
[censorship]
|
||||
tls_domain = "example.com"
|
||||
|
||||
[access.users]
|
||||
user = "00000000000000000000000000000000"
|
||||
"#;
|
||||
let dir = std::env::temp_dir();
|
||||
let path = dir.join("telemt_api_runtime_edge_events_capacity_invalid_test.toml");
|
||||
std::fs::write(&path, toml).unwrap();
|
||||
let err = ProxyConfig::load(&path).unwrap_err().to_string();
|
||||
assert!(err.contains("server.api.runtime_edge_events_capacity must be within [16, 4096]"));
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn force_close_bumped_when_below_drain_ttl() {
|
||||
let toml = r#"
|
||||
|
||||
@@ -183,6 +183,74 @@ impl MeFloorMode {
|
||||
}
|
||||
}
|
||||
|
||||
/// Middle-End route behavior when no writer is immediately available.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum MeRouteNoWriterMode {
|
||||
AsyncRecoveryFailfast,
|
||||
InlineRecoveryLegacy,
|
||||
#[default]
|
||||
HybridAsyncPersistent,
|
||||
}
|
||||
|
||||
impl MeRouteNoWriterMode {
|
||||
pub fn as_u8(self) -> u8 {
|
||||
match self {
|
||||
MeRouteNoWriterMode::AsyncRecoveryFailfast => 0,
|
||||
MeRouteNoWriterMode::InlineRecoveryLegacy => 1,
|
||||
MeRouteNoWriterMode::HybridAsyncPersistent => 2,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_u8(raw: u8) -> Self {
|
||||
match raw {
|
||||
0 => MeRouteNoWriterMode::AsyncRecoveryFailfast,
|
||||
1 => MeRouteNoWriterMode::InlineRecoveryLegacy,
|
||||
2 => MeRouteNoWriterMode::HybridAsyncPersistent,
|
||||
_ => MeRouteNoWriterMode::HybridAsyncPersistent,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Middle-End writer selection mode for new client bindings.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum MeWriterPickMode {
|
||||
SortedRr,
|
||||
#[default]
|
||||
P2c,
|
||||
}
|
||||
|
||||
impl MeWriterPickMode {
|
||||
pub fn as_u8(self) -> u8 {
|
||||
match self {
|
||||
MeWriterPickMode::SortedRr => 0,
|
||||
MeWriterPickMode::P2c => 1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_u8(raw: u8) -> Self {
|
||||
match raw {
|
||||
0 => MeWriterPickMode::SortedRr,
|
||||
1 => MeWriterPickMode::P2c,
|
||||
_ => MeWriterPickMode::P2c,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-user unique source IP limit mode.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum UserMaxUniqueIpsMode {
|
||||
/// Count only currently active source IPs.
|
||||
#[default]
|
||||
ActiveWindow,
|
||||
/// Count source IPs seen within the recent time window.
|
||||
TimeWindow,
|
||||
/// Enforce both active and recent-window limits at the same time.
|
||||
Combined,
|
||||
}
|
||||
|
||||
/// Telemetry controls for hot-path counters and ME diagnostics.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct TelemetryConfig {
|
||||
@@ -305,6 +373,14 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_proxy_secret_path")]
|
||||
pub proxy_secret_path: Option<String>,
|
||||
|
||||
/// Optional path to cache raw getProxyConfig (IPv4) snapshot for startup fallback.
|
||||
#[serde(default = "default_proxy_config_v4_cache_path")]
|
||||
pub proxy_config_v4_cache_path: Option<String>,
|
||||
|
||||
/// Optional path to cache raw getProxyConfigV6 snapshot for startup fallback.
|
||||
#[serde(default = "default_proxy_config_v6_cache_path")]
|
||||
pub proxy_config_v6_cache_path: Option<String>,
|
||||
|
||||
/// Global ad_tag (32 hex chars from @MTProxybot). Fallback when user has no per-user tag in access.user_ad_tags.
|
||||
#[serde(default)]
|
||||
pub ad_tag: Option<String>,
|
||||
@@ -340,6 +416,15 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_middle_proxy_warm_standby")]
|
||||
pub middle_proxy_warm_standby: usize,
|
||||
|
||||
/// Startup retries for Middle-End pool initialization before ME→Direct fallback.
|
||||
/// 0 means unlimited retries.
|
||||
#[serde(default = "default_me_init_retry_attempts")]
|
||||
pub me_init_retry_attempts: u32,
|
||||
|
||||
/// Allow fallback from Middle-End mode to direct DC when ME startup cannot be initialized.
|
||||
#[serde(default = "default_me2dc_fallback")]
|
||||
pub me2dc_fallback: bool,
|
||||
|
||||
/// Enable ME keepalive padding frames.
|
||||
#[serde(default = "default_true")]
|
||||
pub me_keepalive_enabled: bool,
|
||||
@@ -361,6 +446,48 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_rpc_proxy_req_every")]
|
||||
pub rpc_proxy_req_every: u64,
|
||||
|
||||
/// Capacity of per-ME writer command channel.
|
||||
#[serde(default = "default_me_writer_cmd_channel_capacity")]
|
||||
pub me_writer_cmd_channel_capacity: usize,
|
||||
|
||||
/// Capacity of per-connection ME response route channel.
|
||||
#[serde(default = "default_me_route_channel_capacity")]
|
||||
pub me_route_channel_capacity: usize,
|
||||
|
||||
/// Capacity of per-client command queue from client reader to ME sender task.
|
||||
#[serde(default = "default_me_c2me_channel_capacity")]
|
||||
pub me_c2me_channel_capacity: usize,
|
||||
|
||||
/// Bounded wait in milliseconds for routing ME DATA to per-connection queue.
|
||||
/// `0` keeps legacy no-wait behavior.
|
||||
#[serde(default = "default_me_reader_route_data_wait_ms")]
|
||||
pub me_reader_route_data_wait_ms: u64,
|
||||
|
||||
/// Maximum number of ME->Client responses coalesced before flush.
|
||||
#[serde(default = "default_me_d2c_flush_batch_max_frames")]
|
||||
pub me_d2c_flush_batch_max_frames: usize,
|
||||
|
||||
/// Maximum total payload bytes coalesced before flush.
|
||||
#[serde(default = "default_me_d2c_flush_batch_max_bytes")]
|
||||
pub me_d2c_flush_batch_max_bytes: usize,
|
||||
|
||||
/// Maximum wait in microseconds to coalesce additional ME->Client responses.
|
||||
/// `0` disables timed coalescing.
|
||||
#[serde(default = "default_me_d2c_flush_batch_max_delay_us")]
|
||||
pub me_d2c_flush_batch_max_delay_us: u64,
|
||||
|
||||
/// Flush client writer immediately after quick-ack write.
|
||||
#[serde(default = "default_me_d2c_ack_flush_immediate")]
|
||||
pub me_d2c_ack_flush_immediate: bool,
|
||||
|
||||
/// Copy buffer size for client->DC direction in direct relay.
|
||||
#[serde(default = "default_direct_relay_copy_buf_c2s_bytes")]
|
||||
pub direct_relay_copy_buf_c2s_bytes: usize,
|
||||
|
||||
/// Copy buffer size for DC->client direction in direct relay.
|
||||
#[serde(default = "default_direct_relay_copy_buf_s2c_bytes")]
|
||||
pub direct_relay_copy_buf_s2c_bytes: usize,
|
||||
|
||||
/// Max pending ciphertext buffer per client writer (bytes).
|
||||
/// Controls FakeTLS backpressure vs throughput.
|
||||
#[serde(default = "default_crypto_pending_buffer")]
|
||||
@@ -461,10 +588,47 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_me_adaptive_floor_min_writers_single_endpoint")]
|
||||
pub me_adaptive_floor_min_writers_single_endpoint: u8,
|
||||
|
||||
/// Minimum writer target for multi-endpoint DC groups in adaptive floor mode.
|
||||
#[serde(default = "default_me_adaptive_floor_min_writers_multi_endpoint")]
|
||||
pub me_adaptive_floor_min_writers_multi_endpoint: u8,
|
||||
|
||||
/// Grace period in seconds to hold static floor after activity in adaptive mode.
|
||||
#[serde(default = "default_me_adaptive_floor_recover_grace_secs")]
|
||||
pub me_adaptive_floor_recover_grace_secs: u64,
|
||||
|
||||
/// Global ME writer budget per logical CPU core in adaptive mode.
|
||||
#[serde(default = "default_me_adaptive_floor_writers_per_core_total")]
|
||||
pub me_adaptive_floor_writers_per_core_total: u16,
|
||||
|
||||
/// Override logical CPU core count for adaptive floor calculations.
|
||||
/// Set to 0 to use runtime auto-detection.
|
||||
#[serde(default = "default_me_adaptive_floor_cpu_cores_override")]
|
||||
pub me_adaptive_floor_cpu_cores_override: u16,
|
||||
|
||||
/// Per-core max extra writers above base required floor for single-endpoint DC groups.
|
||||
#[serde(default = "default_me_adaptive_floor_max_extra_writers_single_per_core")]
|
||||
pub me_adaptive_floor_max_extra_writers_single_per_core: u16,
|
||||
|
||||
/// Per-core max extra writers above base required floor for multi-endpoint DC groups.
|
||||
#[serde(default = "default_me_adaptive_floor_max_extra_writers_multi_per_core")]
|
||||
pub me_adaptive_floor_max_extra_writers_multi_per_core: u16,
|
||||
|
||||
/// Hard cap for active ME writers per logical CPU core.
|
||||
#[serde(default = "default_me_adaptive_floor_max_active_writers_per_core")]
|
||||
pub me_adaptive_floor_max_active_writers_per_core: u16,
|
||||
|
||||
/// Hard cap for warm ME writers per logical CPU core.
|
||||
#[serde(default = "default_me_adaptive_floor_max_warm_writers_per_core")]
|
||||
pub me_adaptive_floor_max_warm_writers_per_core: u16,
|
||||
|
||||
/// Hard global cap for active ME writers.
|
||||
#[serde(default = "default_me_adaptive_floor_max_active_writers_global")]
|
||||
pub me_adaptive_floor_max_active_writers_global: u32,
|
||||
|
||||
/// Hard global cap for warm ME writers.
|
||||
#[serde(default = "default_me_adaptive_floor_max_warm_writers_global")]
|
||||
pub me_adaptive_floor_max_warm_writers_global: u32,
|
||||
|
||||
/// Connect attempts for the selected upstream before returning error/fallback.
|
||||
#[serde(default = "default_upstream_connect_retry_attempts")]
|
||||
pub upstream_connect_retry_attempts: u32,
|
||||
@@ -473,6 +637,10 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_upstream_connect_retry_backoff_ms")]
|
||||
pub upstream_connect_retry_backoff_ms: u64,
|
||||
|
||||
/// Total wall-clock budget in milliseconds for one upstream connect request across retries.
|
||||
#[serde(default = "default_upstream_connect_budget_ms")]
|
||||
pub upstream_connect_budget_ms: u64,
|
||||
|
||||
/// Consecutive failed requests before upstream is marked unhealthy.
|
||||
#[serde(default = "default_upstream_unhealthy_fail_threshold")]
|
||||
pub upstream_unhealthy_fail_threshold: u32,
|
||||
@@ -489,6 +657,10 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_unknown_dc_log_path")]
|
||||
pub unknown_dc_log_path: Option<String>,
|
||||
|
||||
/// Enable unknown-DC file logging.
|
||||
#[serde(default = "default_unknown_dc_file_log_enabled")]
|
||||
pub unknown_dc_file_log_enabled: bool,
|
||||
|
||||
#[serde(default)]
|
||||
pub log_level: LogLevel,
|
||||
|
||||
@@ -516,6 +688,38 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_me_route_backpressure_high_watermark_pct")]
|
||||
pub me_route_backpressure_high_watermark_pct: u8,
|
||||
|
||||
/// Health monitor interval in milliseconds while writer coverage is degraded.
|
||||
#[serde(default = "default_me_health_interval_ms_unhealthy")]
|
||||
pub me_health_interval_ms_unhealthy: u64,
|
||||
|
||||
/// Health monitor interval in milliseconds while writer coverage is stable.
|
||||
#[serde(default = "default_me_health_interval_ms_healthy")]
|
||||
pub me_health_interval_ms_healthy: u64,
|
||||
|
||||
/// Poll interval in milliseconds for conditional-admission state checks.
|
||||
#[serde(default = "default_me_admission_poll_ms")]
|
||||
pub me_admission_poll_ms: u64,
|
||||
|
||||
/// Cooldown for repetitive ME warning logs in milliseconds.
|
||||
#[serde(default = "default_me_warn_rate_limit_ms")]
|
||||
pub me_warn_rate_limit_ms: u64,
|
||||
|
||||
/// ME route behavior when no writer is immediately available.
|
||||
#[serde(default)]
|
||||
pub me_route_no_writer_mode: MeRouteNoWriterMode,
|
||||
|
||||
/// Maximum wait time in milliseconds for async-recovery failfast mode.
|
||||
#[serde(default = "default_me_route_no_writer_wait_ms")]
|
||||
pub me_route_no_writer_wait_ms: u64,
|
||||
|
||||
/// Number of inline recovery attempts in legacy mode.
|
||||
#[serde(default = "default_me_route_inline_recovery_attempts")]
|
||||
pub me_route_inline_recovery_attempts: u32,
|
||||
|
||||
/// Maximum wait time in milliseconds for inline recovery in legacy mode.
|
||||
#[serde(default = "default_me_route_inline_recovery_wait_ms")]
|
||||
pub me_route_inline_recovery_wait_ms: u64,
|
||||
|
||||
/// [general.links] — proxy link generation overrides.
|
||||
#[serde(default)]
|
||||
pub links: LinksConfig,
|
||||
@@ -634,6 +838,14 @@ pub struct GeneralConfig {
|
||||
#[serde(default = "default_me_deterministic_writer_sort")]
|
||||
pub me_deterministic_writer_sort: bool,
|
||||
|
||||
/// Writer selection mode for ME route bind path.
|
||||
#[serde(default)]
|
||||
pub me_writer_pick_mode: MeWriterPickMode,
|
||||
|
||||
/// Number of candidates sampled by writer picker in `p2c` mode.
|
||||
#[serde(default = "default_me_writer_pick_sample_size")]
|
||||
pub me_writer_pick_sample_size: u8,
|
||||
|
||||
/// Enable NTP drift check at startup.
|
||||
#[serde(default = "default_ntp_check")]
|
||||
pub ntp_check: bool,
|
||||
@@ -660,6 +872,8 @@ impl Default for GeneralConfig {
|
||||
use_middle_proxy: default_true(),
|
||||
ad_tag: None,
|
||||
proxy_secret_path: default_proxy_secret_path(),
|
||||
proxy_config_v4_cache_path: default_proxy_config_v4_cache_path(),
|
||||
proxy_config_v6_cache_path: default_proxy_config_v6_cache_path(),
|
||||
middle_proxy_nat_ip: None,
|
||||
middle_proxy_nat_probe: default_true(),
|
||||
middle_proxy_nat_stun: default_middle_proxy_nat_stun(),
|
||||
@@ -667,11 +881,23 @@ impl Default for GeneralConfig {
|
||||
stun_nat_probe_concurrency: default_stun_nat_probe_concurrency(),
|
||||
middle_proxy_pool_size: default_pool_size(),
|
||||
middle_proxy_warm_standby: default_middle_proxy_warm_standby(),
|
||||
me_init_retry_attempts: default_me_init_retry_attempts(),
|
||||
me2dc_fallback: default_me2dc_fallback(),
|
||||
me_keepalive_enabled: default_true(),
|
||||
me_keepalive_interval_secs: default_keepalive_interval(),
|
||||
me_keepalive_jitter_secs: default_keepalive_jitter(),
|
||||
me_keepalive_payload_random: default_true(),
|
||||
rpc_proxy_req_every: default_rpc_proxy_req_every(),
|
||||
me_writer_cmd_channel_capacity: default_me_writer_cmd_channel_capacity(),
|
||||
me_route_channel_capacity: default_me_route_channel_capacity(),
|
||||
me_c2me_channel_capacity: default_me_c2me_channel_capacity(),
|
||||
me_reader_route_data_wait_ms: default_me_reader_route_data_wait_ms(),
|
||||
me_d2c_flush_batch_max_frames: default_me_d2c_flush_batch_max_frames(),
|
||||
me_d2c_flush_batch_max_bytes: default_me_d2c_flush_batch_max_bytes(),
|
||||
me_d2c_flush_batch_max_delay_us: default_me_d2c_flush_batch_max_delay_us(),
|
||||
me_d2c_ack_flush_immediate: default_me_d2c_ack_flush_immediate(),
|
||||
direct_relay_copy_buf_c2s_bytes: default_direct_relay_copy_buf_c2s_bytes(),
|
||||
direct_relay_copy_buf_s2c_bytes: default_direct_relay_copy_buf_s2c_bytes(),
|
||||
me_warmup_stagger_enabled: default_true(),
|
||||
me_warmup_step_delay_ms: default_warmup_step_delay_ms(),
|
||||
me_warmup_step_jitter_ms: default_warmup_step_jitter_ms(),
|
||||
@@ -688,13 +914,24 @@ impl Default for GeneralConfig {
|
||||
me_floor_mode: MeFloorMode::default(),
|
||||
me_adaptive_floor_idle_secs: default_me_adaptive_floor_idle_secs(),
|
||||
me_adaptive_floor_min_writers_single_endpoint: default_me_adaptive_floor_min_writers_single_endpoint(),
|
||||
me_adaptive_floor_min_writers_multi_endpoint: default_me_adaptive_floor_min_writers_multi_endpoint(),
|
||||
me_adaptive_floor_recover_grace_secs: default_me_adaptive_floor_recover_grace_secs(),
|
||||
me_adaptive_floor_writers_per_core_total: default_me_adaptive_floor_writers_per_core_total(),
|
||||
me_adaptive_floor_cpu_cores_override: default_me_adaptive_floor_cpu_cores_override(),
|
||||
me_adaptive_floor_max_extra_writers_single_per_core: default_me_adaptive_floor_max_extra_writers_single_per_core(),
|
||||
me_adaptive_floor_max_extra_writers_multi_per_core: default_me_adaptive_floor_max_extra_writers_multi_per_core(),
|
||||
me_adaptive_floor_max_active_writers_per_core: default_me_adaptive_floor_max_active_writers_per_core(),
|
||||
me_adaptive_floor_max_warm_writers_per_core: default_me_adaptive_floor_max_warm_writers_per_core(),
|
||||
me_adaptive_floor_max_active_writers_global: default_me_adaptive_floor_max_active_writers_global(),
|
||||
me_adaptive_floor_max_warm_writers_global: default_me_adaptive_floor_max_warm_writers_global(),
|
||||
upstream_connect_retry_attempts: default_upstream_connect_retry_attempts(),
|
||||
upstream_connect_retry_backoff_ms: default_upstream_connect_retry_backoff_ms(),
|
||||
upstream_connect_budget_ms: default_upstream_connect_budget_ms(),
|
||||
upstream_unhealthy_fail_threshold: default_upstream_unhealthy_fail_threshold(),
|
||||
upstream_connect_failfast_hard_errors: default_upstream_connect_failfast_hard_errors(),
|
||||
stun_iface_mismatch_ignore: false,
|
||||
unknown_dc_log_path: default_unknown_dc_log_path(),
|
||||
unknown_dc_file_log_enabled: default_unknown_dc_file_log_enabled(),
|
||||
log_level: LogLevel::Normal,
|
||||
disable_colors: false,
|
||||
telemetry: TelemetryConfig::default(),
|
||||
@@ -702,6 +939,14 @@ impl Default for GeneralConfig {
|
||||
me_route_backpressure_base_timeout_ms: default_me_route_backpressure_base_timeout_ms(),
|
||||
me_route_backpressure_high_timeout_ms: default_me_route_backpressure_high_timeout_ms(),
|
||||
me_route_backpressure_high_watermark_pct: default_me_route_backpressure_high_watermark_pct(),
|
||||
me_health_interval_ms_unhealthy: default_me_health_interval_ms_unhealthy(),
|
||||
me_health_interval_ms_healthy: default_me_health_interval_ms_healthy(),
|
||||
me_admission_poll_ms: default_me_admission_poll_ms(),
|
||||
me_warn_rate_limit_ms: default_me_warn_rate_limit_ms(),
|
||||
me_route_no_writer_mode: MeRouteNoWriterMode::default(),
|
||||
me_route_no_writer_wait_ms: default_me_route_no_writer_wait_ms(),
|
||||
me_route_inline_recovery_attempts: default_me_route_inline_recovery_attempts(),
|
||||
me_route_inline_recovery_wait_ms: default_me_route_inline_recovery_wait_ms(),
|
||||
links: LinksConfig::default(),
|
||||
crypto_pending_buffer: default_crypto_pending_buffer(),
|
||||
max_client_frame: default_max_client_frame(),
|
||||
@@ -738,6 +983,8 @@ impl Default for GeneralConfig {
|
||||
me_reinit_trigger_channel: default_me_reinit_trigger_channel(),
|
||||
me_reinit_coalesce_window_ms: default_me_reinit_coalesce_window_ms(),
|
||||
me_deterministic_writer_sort: default_me_deterministic_writer_sort(),
|
||||
me_writer_pick_mode: MeWriterPickMode::default(),
|
||||
me_writer_pick_sample_size: default_me_writer_pick_sample_size(),
|
||||
ntp_check: default_ntp_check(),
|
||||
ntp_servers: default_ntp_servers(),
|
||||
auto_degradation_enabled: default_true(),
|
||||
@@ -793,6 +1040,78 @@ impl Default for LinksConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// API settings for control-plane endpoints.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct ApiConfig {
|
||||
/// Enable or disable REST API.
|
||||
#[serde(default = "default_true")]
|
||||
pub enabled: bool,
|
||||
|
||||
/// Listen address for API in `IP:PORT` format.
|
||||
#[serde(default = "default_api_listen")]
|
||||
pub listen: String,
|
||||
|
||||
/// CIDR whitelist allowed to access API.
|
||||
#[serde(default = "default_api_whitelist")]
|
||||
pub whitelist: Vec<IpNetwork>,
|
||||
|
||||
/// Optional static value for `Authorization` header validation.
|
||||
/// Empty string disables header auth.
|
||||
#[serde(default)]
|
||||
pub auth_header: String,
|
||||
|
||||
/// Maximum accepted HTTP request body size in bytes.
|
||||
#[serde(default = "default_api_request_body_limit_bytes")]
|
||||
pub request_body_limit_bytes: usize,
|
||||
|
||||
/// Enable runtime snapshots that require read-lock aggregation on API request path.
|
||||
#[serde(default = "default_api_minimal_runtime_enabled")]
|
||||
pub minimal_runtime_enabled: bool,
|
||||
|
||||
/// Cache TTL for minimal runtime snapshots in milliseconds (0 disables caching).
|
||||
#[serde(default = "default_api_minimal_runtime_cache_ttl_ms")]
|
||||
pub minimal_runtime_cache_ttl_ms: u64,
|
||||
|
||||
/// Enables runtime edge endpoints with optional cached aggregation.
|
||||
#[serde(default = "default_api_runtime_edge_enabled")]
|
||||
pub runtime_edge_enabled: bool,
|
||||
|
||||
/// Cache TTL for runtime edge aggregation payloads in milliseconds.
|
||||
#[serde(default = "default_api_runtime_edge_cache_ttl_ms")]
|
||||
pub runtime_edge_cache_ttl_ms: u64,
|
||||
|
||||
/// Top-N limit for edge connection leaderboard payloads.
|
||||
#[serde(default = "default_api_runtime_edge_top_n")]
|
||||
pub runtime_edge_top_n: usize,
|
||||
|
||||
/// Ring-buffer capacity for runtime edge control-plane events.
|
||||
#[serde(default = "default_api_runtime_edge_events_capacity")]
|
||||
pub runtime_edge_events_capacity: usize,
|
||||
|
||||
/// Read-only mode: mutating endpoints are rejected.
|
||||
#[serde(default)]
|
||||
pub read_only: bool,
|
||||
}
|
||||
|
||||
impl Default for ApiConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: default_true(),
|
||||
listen: default_api_listen(),
|
||||
whitelist: default_api_whitelist(),
|
||||
auth_header: String::new(),
|
||||
request_body_limit_bytes: default_api_request_body_limit_bytes(),
|
||||
minimal_runtime_enabled: default_api_minimal_runtime_enabled(),
|
||||
minimal_runtime_cache_ttl_ms: default_api_minimal_runtime_cache_ttl_ms(),
|
||||
runtime_edge_enabled: default_api_runtime_edge_enabled(),
|
||||
runtime_edge_cache_ttl_ms: default_api_runtime_edge_cache_ttl_ms(),
|
||||
runtime_edge_top_n: default_api_runtime_edge_top_n(),
|
||||
runtime_edge_events_capacity: default_api_runtime_edge_events_capacity(),
|
||||
read_only: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ServerConfig {
|
||||
#[serde(default = "default_port")]
|
||||
@@ -822,12 +1141,19 @@ pub struct ServerConfig {
|
||||
#[serde(default)]
|
||||
pub proxy_protocol: bool,
|
||||
|
||||
/// Timeout in milliseconds for reading and parsing PROXY protocol headers.
|
||||
#[serde(default = "default_proxy_protocol_header_timeout_ms")]
|
||||
pub proxy_protocol_header_timeout_ms: u64,
|
||||
|
||||
#[serde(default)]
|
||||
pub metrics_port: Option<u16>,
|
||||
|
||||
#[serde(default = "default_metrics_whitelist")]
|
||||
pub metrics_whitelist: Vec<IpNetwork>,
|
||||
|
||||
#[serde(default, alias = "admin_api")]
|
||||
pub api: ApiConfig,
|
||||
|
||||
#[serde(default)]
|
||||
pub listeners: Vec<ListenerConfig>,
|
||||
}
|
||||
@@ -842,8 +1168,10 @@ impl Default for ServerConfig {
|
||||
listen_unix_sock_perm: None,
|
||||
listen_tcp: None,
|
||||
proxy_protocol: false,
|
||||
proxy_protocol_header_timeout_ms: default_proxy_protocol_header_timeout_ms(),
|
||||
metrics_port: None,
|
||||
metrics_whitelist: default_metrics_whitelist(),
|
||||
api: ApiConfig::default(),
|
||||
listeners: Vec::new(),
|
||||
}
|
||||
}
|
||||
@@ -989,6 +1317,12 @@ pub struct AccessConfig {
|
||||
#[serde(default)]
|
||||
pub user_max_unique_ips: HashMap<String, usize>,
|
||||
|
||||
#[serde(default)]
|
||||
pub user_max_unique_ips_mode: UserMaxUniqueIpsMode,
|
||||
|
||||
#[serde(default = "default_user_max_unique_ips_window_secs")]
|
||||
pub user_max_unique_ips_window_secs: u64,
|
||||
|
||||
#[serde(default = "default_replay_check_len")]
|
||||
pub replay_check_len: usize,
|
||||
|
||||
@@ -1008,6 +1342,8 @@ impl Default for AccessConfig {
|
||||
user_expirations: HashMap::new(),
|
||||
user_data_quota: HashMap::new(),
|
||||
user_max_unique_ips: HashMap::new(),
|
||||
user_max_unique_ips_mode: UserMaxUniqueIpsMode::default(),
|
||||
user_max_unique_ips_window_secs: default_user_max_unique_ips_window_secs(),
|
||||
replay_check_len: default_replay_check_len(),
|
||||
replay_window_secs: default_replay_window_secs(),
|
||||
ignore_time_skew: false,
|
||||
|
||||
@@ -21,6 +21,7 @@ struct SecureRandomInner {
|
||||
rng: StdRng,
|
||||
cipher: AesCtr,
|
||||
buffer: Vec<u8>,
|
||||
buffer_start: usize,
|
||||
}
|
||||
|
||||
impl Drop for SecureRandomInner {
|
||||
@@ -48,6 +49,7 @@ impl SecureRandom {
|
||||
rng,
|
||||
cipher,
|
||||
buffer: Vec::with_capacity(1024),
|
||||
buffer_start: 0,
|
||||
}),
|
||||
}
|
||||
}
|
||||
@@ -59,16 +61,29 @@ impl SecureRandom {
|
||||
|
||||
let mut written = 0usize;
|
||||
while written < out.len() {
|
||||
if inner.buffer_start >= inner.buffer.len() {
|
||||
inner.buffer.clear();
|
||||
inner.buffer_start = 0;
|
||||
}
|
||||
|
||||
if inner.buffer.is_empty() {
|
||||
let mut chunk = vec![0u8; CHUNK_SIZE];
|
||||
inner.rng.fill_bytes(&mut chunk);
|
||||
inner.cipher.apply(&mut chunk);
|
||||
inner.buffer.extend_from_slice(&chunk);
|
||||
inner.buffer_start = 0;
|
||||
}
|
||||
|
||||
let take = (out.len() - written).min(inner.buffer.len());
|
||||
out[written..written + take].copy_from_slice(&inner.buffer[..take]);
|
||||
inner.buffer.drain(..take);
|
||||
let available = inner.buffer.len().saturating_sub(inner.buffer_start);
|
||||
let take = (out.len() - written).min(available);
|
||||
let start = inner.buffer_start;
|
||||
let end = start + take;
|
||||
out[written..written + take].copy_from_slice(&inner.buffer[start..end]);
|
||||
inner.buffer_start = end;
|
||||
if inner.buffer_start >= inner.buffer.len() {
|
||||
inner.buffer.clear();
|
||||
inner.buffer_start = 0;
|
||||
}
|
||||
written += take;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,252 +1,330 @@
|
||||
// src/ip_tracker.rs
|
||||
// IP address tracking and limiting for users
|
||||
// IP address tracking and per-user unique IP limiting.
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
/// Трекер уникальных IP-адресов для каждого пользователя MTProxy
|
||||
///
|
||||
/// Предоставляет thread-safe механизм для:
|
||||
/// - Отслеживания активных IP-адресов каждого пользователя
|
||||
/// - Ограничения количества уникальных IP на пользователя
|
||||
/// - Автоматической очистки при отключении клиентов
|
||||
use crate::config::UserMaxUniqueIpsMode;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UserIpTracker {
|
||||
/// Маппинг: Имя пользователя -> Множество активных IP-адресов
|
||||
active_ips: Arc<RwLock<HashMap<String, HashSet<IpAddr>>>>,
|
||||
|
||||
/// Маппинг: Имя пользователя -> Максимально разрешенное количество уникальных IP
|
||||
active_ips: Arc<RwLock<HashMap<String, HashMap<IpAddr, usize>>>>,
|
||||
recent_ips: Arc<RwLock<HashMap<String, HashMap<IpAddr, Instant>>>>,
|
||||
max_ips: Arc<RwLock<HashMap<String, usize>>>,
|
||||
limit_mode: Arc<RwLock<UserMaxUniqueIpsMode>>,
|
||||
limit_window: Arc<RwLock<Duration>>,
|
||||
last_compact_epoch_secs: Arc<AtomicU64>,
|
||||
}
|
||||
|
||||
impl UserIpTracker {
|
||||
/// Создать новый пустой трекер
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
active_ips: Arc::new(RwLock::new(HashMap::new())),
|
||||
recent_ips: Arc::new(RwLock::new(HashMap::new())),
|
||||
max_ips: Arc::new(RwLock::new(HashMap::new())),
|
||||
limit_mode: Arc::new(RwLock::new(UserMaxUniqueIpsMode::ActiveWindow)),
|
||||
limit_window: Arc::new(RwLock::new(Duration::from_secs(30))),
|
||||
last_compact_epoch_secs: Arc::new(AtomicU64::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Установить лимит уникальных IP для конкретного пользователя
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `username` - Имя пользователя
|
||||
/// * `max_ips` - Максимальное количество одновременно активных IP-адресов
|
||||
fn now_epoch_secs() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
async fn maybe_compact_empty_users(&self) {
|
||||
const COMPACT_INTERVAL_SECS: u64 = 60;
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
let last_compact_epoch_secs = self.last_compact_epoch_secs.load(Ordering::Relaxed);
|
||||
if now_epoch_secs.saturating_sub(last_compact_epoch_secs) < COMPACT_INTERVAL_SECS {
|
||||
return;
|
||||
}
|
||||
if self
|
||||
.last_compact_epoch_secs
|
||||
.compare_exchange(
|
||||
last_compact_epoch_secs,
|
||||
now_epoch_secs,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Relaxed,
|
||||
)
|
||||
.is_err()
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
let mut active_ips = self.active_ips.write().await;
|
||||
let mut recent_ips = self.recent_ips.write().await;
|
||||
let mut users = Vec::<String>::with_capacity(active_ips.len().saturating_add(recent_ips.len()));
|
||||
users.extend(active_ips.keys().cloned());
|
||||
for user in recent_ips.keys() {
|
||||
if !active_ips.contains_key(user) {
|
||||
users.push(user.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for user in users {
|
||||
let active_empty = active_ips.get(&user).map(|ips| ips.is_empty()).unwrap_or(true);
|
||||
let recent_empty = recent_ips.get(&user).map(|ips| ips.is_empty()).unwrap_or(true);
|
||||
if active_empty && recent_empty {
|
||||
active_ips.remove(&user);
|
||||
recent_ips.remove(&user);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn set_limit_policy(&self, mode: UserMaxUniqueIpsMode, window_secs: u64) {
|
||||
{
|
||||
let mut current_mode = self.limit_mode.write().await;
|
||||
*current_mode = mode;
|
||||
}
|
||||
let mut current_window = self.limit_window.write().await;
|
||||
*current_window = Duration::from_secs(window_secs.max(1));
|
||||
}
|
||||
|
||||
pub async fn set_user_limit(&self, username: &str, max_ips: usize) {
|
||||
let mut limits = self.max_ips.write().await;
|
||||
limits.insert(username.to_string(), max_ips);
|
||||
}
|
||||
|
||||
/// Загрузить лимиты из конфигурации
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `limits` - HashMap с лимитами из config.toml
|
||||
pub async fn load_limits(&self, limits: &HashMap<String, usize>) {
|
||||
let mut max_ips = self.max_ips.write().await;
|
||||
for (user, limit) in limits {
|
||||
max_ips.insert(user.clone(), *limit);
|
||||
}
|
||||
pub async fn remove_user_limit(&self, username: &str) {
|
||||
let mut limits = self.max_ips.write().await;
|
||||
limits.remove(username);
|
||||
}
|
||||
|
||||
pub async fn load_limits(&self, limits: &HashMap<String, usize>) {
|
||||
let mut max_ips = self.max_ips.write().await;
|
||||
max_ips.clone_from(limits);
|
||||
}
|
||||
|
||||
fn prune_recent(user_recent: &mut HashMap<IpAddr, Instant>, now: Instant, window: Duration) {
|
||||
if user_recent.is_empty() {
|
||||
return;
|
||||
}
|
||||
user_recent.retain(|_, seen_at| now.duration_since(*seen_at) <= window);
|
||||
}
|
||||
|
||||
/// Проверить, может ли пользователь подключиться с данного IP-адреса
|
||||
/// и добавить IP в список активных, если проверка успешна
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `username` - Имя пользователя
|
||||
/// * `ip` - IP-адрес клиента
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Ok(())` - Подключение разрешено, IP добавлен в активные
|
||||
/// * `Err(String)` - Подключение отклонено с описанием причины
|
||||
pub async fn check_and_add(&self, username: &str, ip: IpAddr) -> Result<(), String> {
|
||||
// Получаем лимит для пользователя
|
||||
let max_ips = self.max_ips.read().await;
|
||||
let limit = match max_ips.get(username) {
|
||||
Some(limit) => *limit,
|
||||
None => {
|
||||
// Если лимит не задан - разрешаем безлимитный доступ
|
||||
drop(max_ips);
|
||||
let mut active_ips = self.active_ips.write().await;
|
||||
let user_ips = active_ips
|
||||
.entry(username.to_string())
|
||||
.or_insert_with(HashSet::new);
|
||||
user_ips.insert(ip);
|
||||
return Ok(());
|
||||
}
|
||||
self.maybe_compact_empty_users().await;
|
||||
let limit = {
|
||||
let max_ips = self.max_ips.read().await;
|
||||
max_ips.get(username).copied()
|
||||
};
|
||||
drop(max_ips);
|
||||
let mode = *self.limit_mode.read().await;
|
||||
let window = *self.limit_window.read().await;
|
||||
let now = Instant::now();
|
||||
|
||||
// Проверяем и обновляем активные IP
|
||||
let mut active_ips = self.active_ips.write().await;
|
||||
let user_ips = active_ips
|
||||
let user_active = active_ips
|
||||
.entry(username.to_string())
|
||||
.or_insert_with(HashSet::new);
|
||||
.or_insert_with(HashMap::new);
|
||||
|
||||
// Если IP уже есть в списке - это повторное подключение, разрешаем
|
||||
if user_ips.contains(&ip) {
|
||||
let mut recent_ips = self.recent_ips.write().await;
|
||||
let user_recent = recent_ips
|
||||
.entry(username.to_string())
|
||||
.or_insert_with(HashMap::new);
|
||||
Self::prune_recent(user_recent, now, window);
|
||||
|
||||
if let Some(count) = user_active.get_mut(&ip) {
|
||||
*count = count.saturating_add(1);
|
||||
user_recent.insert(ip, now);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Проверяем, не превышен ли лимит
|
||||
if user_ips.len() >= limit {
|
||||
return Err(format!(
|
||||
"IP limit reached for user '{}': {}/{} unique IPs already connected",
|
||||
username,
|
||||
user_ips.len(),
|
||||
limit
|
||||
));
|
||||
if let Some(limit) = limit {
|
||||
let active_limit_reached = user_active.len() >= limit;
|
||||
let recent_limit_reached = user_recent.len() >= limit;
|
||||
let deny = match mode {
|
||||
UserMaxUniqueIpsMode::ActiveWindow => active_limit_reached,
|
||||
UserMaxUniqueIpsMode::TimeWindow => recent_limit_reached,
|
||||
UserMaxUniqueIpsMode::Combined => active_limit_reached || recent_limit_reached,
|
||||
};
|
||||
|
||||
if deny {
|
||||
return Err(format!(
|
||||
"IP limit reached for user '{}': active={}/{} recent={}/{} mode={:?}",
|
||||
username,
|
||||
user_active.len(),
|
||||
limit,
|
||||
user_recent.len(),
|
||||
limit,
|
||||
mode
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Лимит не превышен - добавляем новый IP
|
||||
user_ips.insert(ip);
|
||||
user_active.insert(ip, 1);
|
||||
user_recent.insert(ip, now);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Удалить IP-адрес из списка активных при отключении клиента
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `username` - Имя пользователя
|
||||
/// * `ip` - IP-адрес отключившегося клиента
|
||||
pub async fn remove_ip(&self, username: &str, ip: IpAddr) {
|
||||
self.maybe_compact_empty_users().await;
|
||||
let mut active_ips = self.active_ips.write().await;
|
||||
|
||||
if let Some(user_ips) = active_ips.get_mut(username) {
|
||||
user_ips.remove(&ip);
|
||||
|
||||
// Если у пользователя не осталось активных IP - удаляем запись
|
||||
// для экономии памяти
|
||||
if let Some(count) = user_ips.get_mut(&ip) {
|
||||
if *count > 1 {
|
||||
*count -= 1;
|
||||
} else {
|
||||
user_ips.remove(&ip);
|
||||
}
|
||||
}
|
||||
if user_ips.is_empty() {
|
||||
active_ips.remove(username);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Получить текущее количество активных IP-адресов для пользователя
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `username` - Имя пользователя
|
||||
///
|
||||
/// # Returns
|
||||
/// Количество уникальных активных IP-адресов
|
||||
pub async fn get_active_ip_count(&self, username: &str) -> usize {
|
||||
let active_ips = self.active_ips.read().await;
|
||||
active_ips
|
||||
.get(username)
|
||||
.map(|ips| ips.len())
|
||||
.unwrap_or(0)
|
||||
pub async fn get_recent_counts_for_users(&self, users: &[String]) -> HashMap<String, usize> {
|
||||
let window = *self.limit_window.read().await;
|
||||
let now = Instant::now();
|
||||
let recent_ips = self.recent_ips.read().await;
|
||||
|
||||
let mut counts = HashMap::with_capacity(users.len());
|
||||
for user in users {
|
||||
let count = if let Some(user_recent) = recent_ips.get(user) {
|
||||
user_recent
|
||||
.values()
|
||||
.filter(|seen_at| now.duration_since(**seen_at) <= window)
|
||||
.count()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
counts.insert(user.clone(), count);
|
||||
}
|
||||
counts
|
||||
}
|
||||
|
||||
pub async fn get_active_ips_for_users(&self, users: &[String]) -> HashMap<String, Vec<IpAddr>> {
|
||||
let active_ips = self.active_ips.read().await;
|
||||
let mut out = HashMap::with_capacity(users.len());
|
||||
for user in users {
|
||||
let mut ips = active_ips
|
||||
.get(user)
|
||||
.map(|per_ip| per_ip.keys().copied().collect::<Vec<_>>())
|
||||
.unwrap_or_else(Vec::new);
|
||||
ips.sort();
|
||||
out.insert(user.clone(), ips);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
pub async fn get_recent_ips_for_users(&self, users: &[String]) -> HashMap<String, Vec<IpAddr>> {
|
||||
let window = *self.limit_window.read().await;
|
||||
let now = Instant::now();
|
||||
let recent_ips = self.recent_ips.read().await;
|
||||
|
||||
let mut out = HashMap::with_capacity(users.len());
|
||||
for user in users {
|
||||
let mut ips = if let Some(user_recent) = recent_ips.get(user) {
|
||||
user_recent
|
||||
.iter()
|
||||
.filter(|(_, seen_at)| now.duration_since(**seen_at) <= window)
|
||||
.map(|(ip, _)| *ip)
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
ips.sort();
|
||||
out.insert(user.clone(), ips);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
pub async fn get_active_ip_count(&self, username: &str) -> usize {
|
||||
let active_ips = self.active_ips.read().await;
|
||||
active_ips.get(username).map(|ips| ips.len()).unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Получить список всех активных IP-адресов для пользователя
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `username` - Имя пользователя
|
||||
///
|
||||
/// # Returns
|
||||
/// Вектор с активными IP-адресами
|
||||
pub async fn get_active_ips(&self, username: &str) -> Vec<IpAddr> {
|
||||
let active_ips = self.active_ips.read().await;
|
||||
active_ips
|
||||
.get(username)
|
||||
.map(|ips| ips.iter().copied().collect())
|
||||
.map(|ips| ips.keys().copied().collect())
|
||||
.unwrap_or_else(Vec::new)
|
||||
}
|
||||
|
||||
/// Получить статистику по всем пользователям
|
||||
///
|
||||
/// # Returns
|
||||
/// Вектор кортежей: (имя_пользователя, количество_активных_IP, лимит)
|
||||
pub async fn get_stats(&self) -> Vec<(String, usize, usize)> {
|
||||
let active_ips = self.active_ips.read().await;
|
||||
let max_ips = self.max_ips.read().await;
|
||||
|
||||
let mut stats = Vec::new();
|
||||
|
||||
// Собираем статистику по пользователям с активными подключениями
|
||||
for (username, user_ips) in active_ips.iter() {
|
||||
let limit = max_ips.get(username).copied().unwrap_or(0);
|
||||
stats.push((username.clone(), user_ips.len(), limit));
|
||||
}
|
||||
|
||||
stats.sort_by(|a, b| a.0.cmp(&b.0)); // Сортируем по имени пользователя
|
||||
|
||||
stats.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
stats
|
||||
}
|
||||
|
||||
/// Очистить все активные IP для пользователя (при необходимости)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `username` - Имя пользователя
|
||||
pub async fn clear_user_ips(&self, username: &str) {
|
||||
let mut active_ips = self.active_ips.write().await;
|
||||
active_ips.remove(username);
|
||||
drop(active_ips);
|
||||
|
||||
let mut recent_ips = self.recent_ips.write().await;
|
||||
recent_ips.remove(username);
|
||||
}
|
||||
|
||||
/// Очистить всю статистику (использовать с осторожностью!)
|
||||
pub async fn clear_all(&self) {
|
||||
let mut active_ips = self.active_ips.write().await;
|
||||
active_ips.clear();
|
||||
drop(active_ips);
|
||||
|
||||
let mut recent_ips = self.recent_ips.write().await;
|
||||
recent_ips.clear();
|
||||
}
|
||||
|
||||
/// Проверить, подключен ли пользователь с данного IP
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `username` - Имя пользователя
|
||||
/// * `ip` - IP-адрес для проверки
|
||||
///
|
||||
/// # Returns
|
||||
/// `true` если IP активен, `false` если нет
|
||||
pub async fn is_ip_active(&self, username: &str, ip: IpAddr) -> bool {
|
||||
let active_ips = self.active_ips.read().await;
|
||||
active_ips
|
||||
.get(username)
|
||||
.map(|ips| ips.contains(&ip))
|
||||
.map(|ips| ips.contains_key(&ip))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Получить лимит для пользователя
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `username` - Имя пользователя
|
||||
///
|
||||
/// # Returns
|
||||
/// Лимит IP-адресов или None, если лимит не установлен
|
||||
pub async fn get_user_limit(&self, username: &str) -> Option<usize> {
|
||||
let max_ips = self.max_ips.read().await;
|
||||
max_ips.get(username).copied()
|
||||
}
|
||||
|
||||
/// Форматировать статистику в читаемый текст
|
||||
///
|
||||
/// # Returns
|
||||
/// Строка со статистикой для логов или мониторинга
|
||||
pub async fn format_stats(&self) -> String {
|
||||
let stats = self.get_stats().await;
|
||||
|
||||
|
||||
if stats.is_empty() {
|
||||
return String::from("No active users");
|
||||
}
|
||||
|
||||
|
||||
let mut output = String::from("User IP Statistics:\n");
|
||||
output.push_str("==================\n");
|
||||
|
||||
|
||||
for (username, active_count, limit) in stats {
|
||||
output.push_str(&format!(
|
||||
"User: {:<20} Active IPs: {}/{}\n",
|
||||
username,
|
||||
active_count,
|
||||
if limit > 0 { limit.to_string() } else { "unlimited".to_string() }
|
||||
if limit > 0 {
|
||||
limit.to_string()
|
||||
} else {
|
||||
"unlimited".to_string()
|
||||
}
|
||||
));
|
||||
|
||||
|
||||
let ips = self.get_active_ips(&username).await;
|
||||
for ip in ips {
|
||||
output.push_str(&format!(" └─ {}\n", ip));
|
||||
output.push_str(&format!(" - {}\n", ip));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
output
|
||||
}
|
||||
}
|
||||
@@ -257,10 +335,6 @@ impl Default for UserIpTracker {
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// ТЕСТЫ
|
||||
// ============================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -283,17 +357,33 @@ mod tests {
|
||||
let ip2 = test_ipv4(192, 168, 1, 2);
|
||||
let ip3 = test_ipv4(192, 168, 1, 3);
|
||||
|
||||
// Первые два IP должны быть приняты
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_ok());
|
||||
|
||||
// Третий IP должен быть отклонен
|
||||
assert!(tracker.check_and_add("test_user", ip3).await.is_err());
|
||||
|
||||
// Проверяем счетчик
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_active_window_rejects_new_ip_and_keeps_existing_session() {
|
||||
let tracker = UserIpTracker::new();
|
||||
tracker.set_user_limit("test_user", 1).await;
|
||||
tracker
|
||||
.set_limit_policy(UserMaxUniqueIpsMode::ActiveWindow, 30)
|
||||
.await;
|
||||
|
||||
let ip1 = test_ipv4(10, 10, 10, 1);
|
||||
let ip2 = test_ipv4(10, 10, 10, 2);
|
||||
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
assert!(tracker.is_ip_active("test_user", ip1).await);
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
|
||||
|
||||
// Existing session remains active; only new unique IP is denied.
|
||||
assert!(tracker.is_ip_active("test_user", ip1).await);
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_reconnection_from_same_ip() {
|
||||
let tracker = UserIpTracker::new();
|
||||
@@ -301,16 +391,29 @@ mod tests {
|
||||
|
||||
let ip1 = test_ipv4(192, 168, 1, 1);
|
||||
|
||||
// Первое подключение
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
|
||||
// Повторное подключение с того же IP должно пройти
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
|
||||
// Счетчик не должен увеличиться
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_same_ip_disconnect_keeps_active_while_other_session_alive() {
|
||||
let tracker = UserIpTracker::new();
|
||||
tracker.set_user_limit("test_user", 2).await;
|
||||
|
||||
let ip1 = test_ipv4(192, 168, 1, 1);
|
||||
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
|
||||
|
||||
tracker.remove_ip("test_user", ip1).await;
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
|
||||
|
||||
tracker.remove_ip("test_user", ip1).await;
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ip_removal() {
|
||||
let tracker = UserIpTracker::new();
|
||||
@@ -320,36 +423,28 @@ mod tests {
|
||||
let ip2 = test_ipv4(192, 168, 1, 2);
|
||||
let ip3 = test_ipv4(192, 168, 1, 3);
|
||||
|
||||
// Добавляем два IP
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_ok());
|
||||
|
||||
// Третий не должен пройти
|
||||
assert!(tracker.check_and_add("test_user", ip3).await.is_err());
|
||||
|
||||
// Удаляем первый IP
|
||||
tracker.remove_ip("test_user", ip1).await;
|
||||
|
||||
// Теперь третий должен пройти
|
||||
|
||||
assert!(tracker.check_and_add("test_user", ip3).await.is_ok());
|
||||
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_no_limit() {
|
||||
let tracker = UserIpTracker::new();
|
||||
// Не устанавливаем лимит для test_user
|
||||
|
||||
let ip1 = test_ipv4(192, 168, 1, 1);
|
||||
let ip2 = test_ipv4(192, 168, 1, 2);
|
||||
let ip3 = test_ipv4(192, 168, 1, 3);
|
||||
|
||||
// Без лимита все IP должны проходить
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_ok());
|
||||
assert!(tracker.check_and_add("test_user", ip3).await.is_ok());
|
||||
|
||||
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 3);
|
||||
}
|
||||
|
||||
@@ -362,11 +457,9 @@ mod tests {
|
||||
let ip1 = test_ipv4(192, 168, 1, 1);
|
||||
let ip2 = test_ipv4(192, 168, 1, 2);
|
||||
|
||||
// user1 может использовать 2 IP
|
||||
assert!(tracker.check_and_add("user1", ip1).await.is_ok());
|
||||
assert!(tracker.check_and_add("user1", ip2).await.is_ok());
|
||||
|
||||
// user2 может использовать только 1 IP
|
||||
assert!(tracker.check_and_add("user2", ip1).await.is_ok());
|
||||
assert!(tracker.check_and_add("user2", ip2).await.is_err());
|
||||
}
|
||||
@@ -379,10 +472,9 @@ mod tests {
|
||||
let ipv4 = test_ipv4(192, 168, 1, 1);
|
||||
let ipv6 = test_ipv6();
|
||||
|
||||
// Должны работать оба типа адресов
|
||||
assert!(tracker.check_and_add("test_user", ipv4).await.is_ok());
|
||||
assert!(tracker.check_and_add("test_user", ipv6).await.is_ok());
|
||||
|
||||
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 2);
|
||||
}
|
||||
|
||||
@@ -417,8 +509,7 @@ mod tests {
|
||||
|
||||
let stats = tracker.get_stats().await;
|
||||
assert_eq!(stats.len(), 2);
|
||||
|
||||
// Проверяем наличие обоих пользователей в статистике
|
||||
|
||||
assert!(stats.iter().any(|(name, _, _)| name == "user1"));
|
||||
assert!(stats.iter().any(|(name, _, _)| name == "user2"));
|
||||
}
|
||||
@@ -427,10 +518,10 @@ mod tests {
|
||||
async fn test_clear_user_ips() {
|
||||
let tracker = UserIpTracker::new();
|
||||
let ip1 = test_ipv4(192, 168, 1, 1);
|
||||
|
||||
|
||||
tracker.check_and_add("test_user", ip1).await.unwrap();
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 1);
|
||||
|
||||
|
||||
tracker.clear_user_ips("test_user").await;
|
||||
assert_eq!(tracker.get_active_ip_count("test_user").await, 0);
|
||||
}
|
||||
@@ -440,9 +531,9 @@ mod tests {
|
||||
let tracker = UserIpTracker::new();
|
||||
let ip1 = test_ipv4(192, 168, 1, 1);
|
||||
let ip2 = test_ipv4(192, 168, 1, 2);
|
||||
|
||||
|
||||
tracker.check_and_add("test_user", ip1).await.unwrap();
|
||||
|
||||
|
||||
assert!(tracker.is_ip_active("test_user", ip1).await);
|
||||
assert!(!tracker.is_ip_active("test_user", ip2).await);
|
||||
}
|
||||
@@ -450,15 +541,85 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_load_limits_from_config() {
|
||||
let tracker = UserIpTracker::new();
|
||||
|
||||
|
||||
let mut config_limits = HashMap::new();
|
||||
config_limits.insert("user1".to_string(), 5);
|
||||
config_limits.insert("user2".to_string(), 3);
|
||||
|
||||
|
||||
tracker.load_limits(&config_limits).await;
|
||||
|
||||
|
||||
assert_eq!(tracker.get_user_limit("user1").await, Some(5));
|
||||
assert_eq!(tracker.get_user_limit("user2").await, Some(3));
|
||||
assert_eq!(tracker.get_user_limit("user3").await, None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_load_limits_replaces_previous_map() {
|
||||
let tracker = UserIpTracker::new();
|
||||
|
||||
let mut first = HashMap::new();
|
||||
first.insert("user1".to_string(), 2);
|
||||
first.insert("user2".to_string(), 3);
|
||||
tracker.load_limits(&first).await;
|
||||
|
||||
let mut second = HashMap::new();
|
||||
second.insert("user2".to_string(), 5);
|
||||
tracker.load_limits(&second).await;
|
||||
|
||||
assert_eq!(tracker.get_user_limit("user1").await, None);
|
||||
assert_eq!(tracker.get_user_limit("user2").await, Some(5));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_time_window_mode_blocks_recent_ip_churn() {
|
||||
let tracker = UserIpTracker::new();
|
||||
tracker.set_user_limit("test_user", 1).await;
|
||||
tracker
|
||||
.set_limit_policy(UserMaxUniqueIpsMode::TimeWindow, 30)
|
||||
.await;
|
||||
|
||||
let ip1 = test_ipv4(10, 0, 0, 1);
|
||||
let ip2 = test_ipv4(10, 0, 0, 2);
|
||||
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
tracker.remove_ip("test_user", ip1).await;
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_combined_mode_enforces_active_and_recent_limits() {
|
||||
let tracker = UserIpTracker::new();
|
||||
tracker.set_user_limit("test_user", 1).await;
|
||||
tracker
|
||||
.set_limit_policy(UserMaxUniqueIpsMode::Combined, 30)
|
||||
.await;
|
||||
|
||||
let ip1 = test_ipv4(10, 0, 1, 1);
|
||||
let ip2 = test_ipv4(10, 0, 1, 2);
|
||||
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
|
||||
|
||||
tracker.remove_ip("test_user", ip1).await;
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_time_window_expires() {
|
||||
let tracker = UserIpTracker::new();
|
||||
tracker.set_user_limit("test_user", 1).await;
|
||||
tracker
|
||||
.set_limit_policy(UserMaxUniqueIpsMode::TimeWindow, 1)
|
||||
.await;
|
||||
|
||||
let ip1 = test_ipv4(10, 1, 0, 1);
|
||||
let ip2 = test_ipv4(10, 1, 0, 2);
|
||||
|
||||
assert!(tracker.check_and_add("test_user", ip1).await.is_ok());
|
||||
tracker.remove_ip("test_user", ip1).await;
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_err());
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(1100)).await;
|
||||
assert!(tracker.check_and_add("test_user", ip2).await.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
130
src/maestro/admission.rs
Normal file
130
src/maestro/admission.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use tokio::sync::watch;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::proxy::route_mode::{RelayRouteMode, RouteRuntimeController};
|
||||
use crate::transport::middle_proxy::MePool;
|
||||
|
||||
const STARTUP_FALLBACK_AFTER: Duration = Duration::from_secs(80);
|
||||
const RUNTIME_FALLBACK_AFTER: Duration = Duration::from_secs(6);
|
||||
|
||||
pub(crate) async fn configure_admission_gate(
|
||||
config: &Arc<ProxyConfig>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
route_runtime: Arc<RouteRuntimeController>,
|
||||
admission_tx: &watch::Sender<bool>,
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
) {
|
||||
if config.general.use_middle_proxy {
|
||||
if let Some(pool) = me_pool.as_ref() {
|
||||
let initial_ready = pool.admission_ready_conditional_cast().await;
|
||||
admission_tx.send_replace(initial_ready);
|
||||
let _ = route_runtime.set_mode(RelayRouteMode::Middle);
|
||||
if initial_ready {
|
||||
info!("Conditional-admission gate: open / ME pool READY");
|
||||
} else {
|
||||
warn!("Conditional-admission gate: closed / ME pool is NOT ready)");
|
||||
}
|
||||
|
||||
let pool_for_gate = pool.clone();
|
||||
let admission_tx_gate = admission_tx.clone();
|
||||
let route_runtime_gate = route_runtime.clone();
|
||||
let mut config_rx_gate = config_rx.clone();
|
||||
let mut admission_poll_ms = config.general.me_admission_poll_ms.max(1);
|
||||
let mut fallback_enabled = config.general.me2dc_fallback;
|
||||
tokio::spawn(async move {
|
||||
let mut gate_open = initial_ready;
|
||||
let mut route_mode = RelayRouteMode::Middle;
|
||||
let mut ready_observed = initial_ready;
|
||||
let mut not_ready_since = if initial_ready {
|
||||
None
|
||||
} else {
|
||||
Some(Instant::now())
|
||||
};
|
||||
loop {
|
||||
tokio::select! {
|
||||
changed = config_rx_gate.changed() => {
|
||||
if changed.is_err() {
|
||||
break;
|
||||
}
|
||||
let cfg = config_rx_gate.borrow_and_update().clone();
|
||||
admission_poll_ms = cfg.general.me_admission_poll_ms.max(1);
|
||||
fallback_enabled = cfg.general.me2dc_fallback;
|
||||
continue;
|
||||
}
|
||||
_ = tokio::time::sleep(Duration::from_millis(admission_poll_ms)) => {}
|
||||
}
|
||||
let ready = pool_for_gate.admission_ready_conditional_cast().await;
|
||||
let now = Instant::now();
|
||||
let (next_gate_open, next_route_mode, next_fallback_active) = if ready {
|
||||
ready_observed = true;
|
||||
not_ready_since = None;
|
||||
(true, RelayRouteMode::Middle, false)
|
||||
} else {
|
||||
let not_ready_started_at = *not_ready_since.get_or_insert(now);
|
||||
let not_ready_for = now.saturating_duration_since(not_ready_started_at);
|
||||
let fallback_after = if ready_observed {
|
||||
RUNTIME_FALLBACK_AFTER
|
||||
} else {
|
||||
STARTUP_FALLBACK_AFTER
|
||||
};
|
||||
if fallback_enabled && not_ready_for > fallback_after {
|
||||
(true, RelayRouteMode::Direct, true)
|
||||
} else {
|
||||
(false, RelayRouteMode::Middle, false)
|
||||
}
|
||||
};
|
||||
|
||||
if next_route_mode != route_mode {
|
||||
route_mode = next_route_mode;
|
||||
if let Some(snapshot) = route_runtime_gate.set_mode(route_mode) {
|
||||
if matches!(route_mode, RelayRouteMode::Middle) {
|
||||
info!(
|
||||
target_mode = route_mode.as_str(),
|
||||
cutover_generation = snapshot.generation,
|
||||
"Middle-End routing restored for new sessions"
|
||||
);
|
||||
} else {
|
||||
let fallback_after = if ready_observed {
|
||||
RUNTIME_FALLBACK_AFTER
|
||||
} else {
|
||||
STARTUP_FALLBACK_AFTER
|
||||
};
|
||||
warn!(
|
||||
target_mode = route_mode.as_str(),
|
||||
cutover_generation = snapshot.generation,
|
||||
grace_secs = fallback_after.as_secs(),
|
||||
"ME pool stayed not-ready beyond grace; routing new sessions via Direct-DC"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if next_gate_open != gate_open {
|
||||
gate_open = next_gate_open;
|
||||
admission_tx_gate.send_replace(gate_open);
|
||||
if gate_open {
|
||||
if next_fallback_active {
|
||||
warn!("Conditional-admission gate opened in ME fallback mode");
|
||||
} else {
|
||||
info!("Conditional-admission gate opened / ME pool READY");
|
||||
}
|
||||
} else {
|
||||
warn!("Conditional-admission gate closed / ME pool is NOT ready");
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
admission_tx.send_replace(false);
|
||||
let _ = route_runtime.set_mode(RelayRouteMode::Direct);
|
||||
warn!("Conditional-admission gate: closed / ME pool is UNAVAILABLE");
|
||||
}
|
||||
} else {
|
||||
admission_tx.send_replace(true);
|
||||
let _ = route_runtime.set_mode(RelayRouteMode::Direct);
|
||||
}
|
||||
}
|
||||
220
src/maestro/connectivity.rs
Normal file
220
src/maestro/connectivity.rs
Normal file
@@ -0,0 +1,220 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::info;
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::probe::NetworkDecision;
|
||||
use crate::startup::{
|
||||
COMPONENT_DC_CONNECTIVITY_PING, COMPONENT_ME_CONNECTIVITY_PING, COMPONENT_RUNTIME_READY,
|
||||
StartupTracker,
|
||||
};
|
||||
use crate::transport::middle_proxy::{
|
||||
MePingFamily, MePingSample, MePool, format_me_route, format_sample_line, run_me_ping,
|
||||
};
|
||||
use crate::transport::UpstreamManager;
|
||||
|
||||
pub(crate) async fn run_startup_connectivity(
|
||||
config: &Arc<ProxyConfig>,
|
||||
me_pool: &Option<Arc<MePool>>,
|
||||
rng: Arc<SecureRandom>,
|
||||
startup_tracker: &Arc<StartupTracker>,
|
||||
upstream_manager: Arc<UpstreamManager>,
|
||||
prefer_ipv6: bool,
|
||||
decision: &NetworkDecision,
|
||||
process_started_at: Instant,
|
||||
api_me_pool: Arc<RwLock<Option<Arc<MePool>>>>,
|
||||
) {
|
||||
if me_pool.is_some() {
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_ME_CONNECTIVITY_PING,
|
||||
Some("run startup ME connectivity check".to_string()),
|
||||
)
|
||||
.await;
|
||||
} else {
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_CONNECTIVITY_PING,
|
||||
Some("ME pool is not available".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
if let Some(pool) = me_pool {
|
||||
let me_results = run_me_ping(pool, &rng).await;
|
||||
|
||||
let v4_ok = me_results.iter().any(|r| {
|
||||
matches!(r.family, MePingFamily::V4)
|
||||
&& r.samples.iter().any(|s| s.error.is_none() && s.handshake_ms.is_some())
|
||||
});
|
||||
let v6_ok = me_results.iter().any(|r| {
|
||||
matches!(r.family, MePingFamily::V6)
|
||||
&& r.samples.iter().any(|s| s.error.is_none() && s.handshake_ms.is_some())
|
||||
});
|
||||
|
||||
info!("================= Telegram ME Connectivity =================");
|
||||
if v4_ok && v6_ok {
|
||||
info!(" IPv4 and IPv6 available");
|
||||
} else if v4_ok {
|
||||
info!(" IPv4 only / IPv6 unavailable");
|
||||
} else if v6_ok {
|
||||
info!(" IPv6 only / IPv4 unavailable");
|
||||
} else {
|
||||
info!(" No ME connectivity");
|
||||
}
|
||||
let me_route =
|
||||
format_me_route(&config.upstreams, &me_results, prefer_ipv6, v4_ok, v6_ok).await;
|
||||
info!(" via {}", me_route);
|
||||
info!("============================================================");
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
let mut grouped: BTreeMap<i32, Vec<MePingSample>> = BTreeMap::new();
|
||||
for report in me_results {
|
||||
for s in report.samples {
|
||||
grouped.entry(s.dc).or_default().push(s);
|
||||
}
|
||||
}
|
||||
|
||||
let family_order = if prefer_ipv6 {
|
||||
vec![MePingFamily::V6, MePingFamily::V4]
|
||||
} else {
|
||||
vec![MePingFamily::V4, MePingFamily::V6]
|
||||
};
|
||||
|
||||
for (dc, samples) in grouped {
|
||||
for family in &family_order {
|
||||
let fam_samples: Vec<&MePingSample> = samples
|
||||
.iter()
|
||||
.filter(|s| matches!(s.family, f if &f == family))
|
||||
.collect();
|
||||
if fam_samples.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let fam_label = match family {
|
||||
MePingFamily::V4 => "IPv4",
|
||||
MePingFamily::V6 => "IPv6",
|
||||
};
|
||||
info!(" DC{} [{}]", dc, fam_label);
|
||||
for sample in fam_samples {
|
||||
let line = format_sample_line(sample);
|
||||
info!("{}", line);
|
||||
}
|
||||
}
|
||||
}
|
||||
info!("============================================================");
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_ME_CONNECTIVITY_PING,
|
||||
Some("startup ME connectivity check completed".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
info!("================= Telegram DC Connectivity =================");
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_DC_CONNECTIVITY_PING,
|
||||
Some("run startup DC connectivity check".to_string()),
|
||||
)
|
||||
.await;
|
||||
|
||||
let ping_results = upstream_manager
|
||||
.ping_all_dcs(
|
||||
prefer_ipv6,
|
||||
&config.dc_overrides,
|
||||
decision.ipv4_dc,
|
||||
decision.ipv6_dc,
|
||||
)
|
||||
.await;
|
||||
|
||||
for upstream_result in &ping_results {
|
||||
let v6_works = upstream_result.v6_results.iter().any(|r| r.rtt_ms.is_some());
|
||||
let v4_works = upstream_result.v4_results.iter().any(|r| r.rtt_ms.is_some());
|
||||
|
||||
if upstream_result.both_available {
|
||||
if prefer_ipv6 {
|
||||
info!(" IPv6 in use / IPv4 is fallback");
|
||||
} else {
|
||||
info!(" IPv4 in use / IPv6 is fallback");
|
||||
}
|
||||
} else if v6_works && !v4_works {
|
||||
info!(" IPv6 only / IPv4 unavailable");
|
||||
} else if v4_works && !v6_works {
|
||||
info!(" IPv4 only / IPv6 unavailable");
|
||||
} else if !v6_works && !v4_works {
|
||||
info!(" No DC connectivity");
|
||||
}
|
||||
|
||||
info!(" via {}", upstream_result.upstream_name);
|
||||
info!("============================================================");
|
||||
|
||||
if v6_works {
|
||||
for dc in &upstream_result.v6_results {
|
||||
let addr_str = format!("{}:{}", dc.dc_addr.ip(), dc.dc_addr.port());
|
||||
match &dc.rtt_ms {
|
||||
Some(rtt) => {
|
||||
info!(" DC{} [IPv6] {} - {:.0} ms", dc.dc_idx, addr_str, rtt);
|
||||
}
|
||||
None => {
|
||||
let err = dc.error.as_deref().unwrap_or("fail");
|
||||
info!(" DC{} [IPv6] {} - FAIL ({})", dc.dc_idx, addr_str, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("============================================================");
|
||||
}
|
||||
|
||||
if v4_works {
|
||||
for dc in &upstream_result.v4_results {
|
||||
let addr_str = format!("{}:{}", dc.dc_addr.ip(), dc.dc_addr.port());
|
||||
match &dc.rtt_ms {
|
||||
Some(rtt) => {
|
||||
info!(
|
||||
" DC{} [IPv4] {}\t\t\t\t{:.0} ms",
|
||||
dc.dc_idx, addr_str, rtt
|
||||
);
|
||||
}
|
||||
None => {
|
||||
let err = dc.error.as_deref().unwrap_or("fail");
|
||||
info!(
|
||||
" DC{} [IPv4] {}:\t\t\t\tFAIL ({})",
|
||||
dc.dc_idx, addr_str, err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("============================================================");
|
||||
}
|
||||
}
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_DC_CONNECTIVITY_PING,
|
||||
Some("startup DC connectivity check completed".to_string()),
|
||||
)
|
||||
.await;
|
||||
|
||||
let initialized_secs = process_started_at.elapsed().as_secs();
|
||||
let second_suffix = if initialized_secs == 1 { "" } else { "s" };
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_RUNTIME_READY,
|
||||
Some("finalize startup runtime state".to_string()),
|
||||
)
|
||||
.await;
|
||||
info!("===================== Telegram Startup =====================");
|
||||
info!(
|
||||
" DC/ME Initialized in {} second{}",
|
||||
initialized_secs, second_suffix
|
||||
);
|
||||
info!("============================================================");
|
||||
|
||||
if let Some(pool) = me_pool {
|
||||
pool.set_runtime_ready(true);
|
||||
}
|
||||
*api_me_pool.write().await = me_pool.clone();
|
||||
}
|
||||
320
src/maestro/helpers.rs
Normal file
320
src/maestro/helpers.rs
Normal file
@@ -0,0 +1,320 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::sync::watch;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::cli;
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::transport::middle_proxy::{
|
||||
ProxyConfigData, fetch_proxy_config_with_raw, load_proxy_config_cache, save_proxy_config_cache,
|
||||
};
|
||||
|
||||
pub(crate) fn parse_cli() -> (String, bool, Option<String>) {
|
||||
let mut config_path = "config.toml".to_string();
|
||||
let mut silent = false;
|
||||
let mut log_level: Option<String> = None;
|
||||
|
||||
let args: Vec<String> = std::env::args().skip(1).collect();
|
||||
|
||||
// Check for --init first (handled before tokio)
|
||||
if let Some(init_opts) = cli::parse_init_args(&args) {
|
||||
if let Err(e) = cli::run_init(init_opts) {
|
||||
eprintln!("[telemt] Init failed: {}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
std::process::exit(0);
|
||||
}
|
||||
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--silent" | "-s" => {
|
||||
silent = true;
|
||||
}
|
||||
"--log-level" => {
|
||||
i += 1;
|
||||
if i < args.len() {
|
||||
log_level = Some(args[i].clone());
|
||||
}
|
||||
}
|
||||
s if s.starts_with("--log-level=") => {
|
||||
log_level = Some(s.trim_start_matches("--log-level=").to_string());
|
||||
}
|
||||
"--help" | "-h" => {
|
||||
eprintln!("Usage: telemt [config.toml] [OPTIONS]");
|
||||
eprintln!();
|
||||
eprintln!("Options:");
|
||||
eprintln!(" --silent, -s Suppress info logs");
|
||||
eprintln!(" --log-level <LEVEL> debug|verbose|normal|silent");
|
||||
eprintln!(" --help, -h Show this help");
|
||||
eprintln!();
|
||||
eprintln!("Setup (fire-and-forget):");
|
||||
eprintln!(
|
||||
" --init Generate config, install systemd service, start"
|
||||
);
|
||||
eprintln!(" --port <PORT> Listen port (default: 443)");
|
||||
eprintln!(
|
||||
" --domain <DOMAIN> TLS domain for masking (default: www.google.com)"
|
||||
);
|
||||
eprintln!(
|
||||
" --secret <HEX> 32-char hex secret (auto-generated if omitted)"
|
||||
);
|
||||
eprintln!(" --user <NAME> Username (default: user)");
|
||||
eprintln!(" --config-dir <DIR> Config directory (default: /etc/telemt)");
|
||||
eprintln!(" --no-start Don't start the service after install");
|
||||
std::process::exit(0);
|
||||
}
|
||||
"--version" | "-V" => {
|
||||
println!("telemt {}", env!("CARGO_PKG_VERSION"));
|
||||
std::process::exit(0);
|
||||
}
|
||||
s if !s.starts_with('-') => {
|
||||
config_path = s.to_string();
|
||||
}
|
||||
other => {
|
||||
eprintln!("Unknown option: {}", other);
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
(config_path, silent, log_level)
|
||||
}
|
||||
|
||||
pub(crate) fn print_proxy_links(host: &str, port: u16, config: &ProxyConfig) {
|
||||
info!(target: "telemt::links", "--- Proxy Links ({}) ---", host);
|
||||
for user_name in config.general.links.show.resolve_users(&config.access.users) {
|
||||
if let Some(secret) = config.access.users.get(user_name) {
|
||||
info!(target: "telemt::links", "User: {}", user_name);
|
||||
if config.general.modes.classic {
|
||||
info!(
|
||||
target: "telemt::links",
|
||||
" Classic: tg://proxy?server={}&port={}&secret={}",
|
||||
host, port, secret
|
||||
);
|
||||
}
|
||||
if config.general.modes.secure {
|
||||
info!(
|
||||
target: "telemt::links",
|
||||
" DD: tg://proxy?server={}&port={}&secret=dd{}",
|
||||
host, port, secret
|
||||
);
|
||||
}
|
||||
if config.general.modes.tls {
|
||||
let mut domains = Vec::with_capacity(1 + config.censorship.tls_domains.len());
|
||||
domains.push(config.censorship.tls_domain.clone());
|
||||
for d in &config.censorship.tls_domains {
|
||||
if !domains.contains(d) {
|
||||
domains.push(d.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for domain in domains {
|
||||
let domain_hex = hex::encode(&domain);
|
||||
info!(
|
||||
target: "telemt::links",
|
||||
" EE-TLS: tg://proxy?server={}&port={}&secret=ee{}{}",
|
||||
host, port, secret, domain_hex
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!(target: "telemt::links", "User '{}' in show_link not found", user_name);
|
||||
}
|
||||
}
|
||||
info!(target: "telemt::links", "------------------------");
|
||||
}
|
||||
|
||||
pub(crate) async fn write_beobachten_snapshot(path: &str, payload: &str) -> std::io::Result<()> {
|
||||
if let Some(parent) = std::path::Path::new(path).parent()
|
||||
&& !parent.as_os_str().is_empty()
|
||||
{
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
tokio::fs::write(path, payload).await
|
||||
}
|
||||
|
||||
pub(crate) fn unit_label(value: u64, singular: &'static str, plural: &'static str) -> &'static str {
|
||||
if value == 1 { singular } else { plural }
|
||||
}
|
||||
|
||||
pub(crate) fn format_uptime(total_secs: u64) -> String {
|
||||
const SECS_PER_MINUTE: u64 = 60;
|
||||
const SECS_PER_HOUR: u64 = 60 * SECS_PER_MINUTE;
|
||||
const SECS_PER_DAY: u64 = 24 * SECS_PER_HOUR;
|
||||
const SECS_PER_MONTH: u64 = 30 * SECS_PER_DAY;
|
||||
const SECS_PER_YEAR: u64 = 12 * SECS_PER_MONTH;
|
||||
|
||||
let mut remaining = total_secs;
|
||||
let years = remaining / SECS_PER_YEAR;
|
||||
remaining %= SECS_PER_YEAR;
|
||||
let months = remaining / SECS_PER_MONTH;
|
||||
remaining %= SECS_PER_MONTH;
|
||||
let days = remaining / SECS_PER_DAY;
|
||||
remaining %= SECS_PER_DAY;
|
||||
let hours = remaining / SECS_PER_HOUR;
|
||||
remaining %= SECS_PER_HOUR;
|
||||
let minutes = remaining / SECS_PER_MINUTE;
|
||||
let seconds = remaining % SECS_PER_MINUTE;
|
||||
|
||||
let mut parts = Vec::new();
|
||||
if total_secs > SECS_PER_YEAR {
|
||||
parts.push(format!("{} {}", years, unit_label(years, "year", "years")));
|
||||
}
|
||||
if total_secs > SECS_PER_MONTH {
|
||||
parts.push(format!(
|
||||
"{} {}",
|
||||
months,
|
||||
unit_label(months, "month", "months")
|
||||
));
|
||||
}
|
||||
if total_secs > SECS_PER_DAY {
|
||||
parts.push(format!("{} {}", days, unit_label(days, "day", "days")));
|
||||
}
|
||||
if total_secs > SECS_PER_HOUR {
|
||||
parts.push(format!("{} {}", hours, unit_label(hours, "hour", "hours")));
|
||||
}
|
||||
if total_secs > SECS_PER_MINUTE {
|
||||
parts.push(format!(
|
||||
"{} {}",
|
||||
minutes,
|
||||
unit_label(minutes, "minute", "minutes")
|
||||
));
|
||||
}
|
||||
parts.push(format!(
|
||||
"{} {}",
|
||||
seconds,
|
||||
unit_label(seconds, "second", "seconds")
|
||||
));
|
||||
|
||||
format!("{} / {} seconds", parts.join(", "), total_secs)
|
||||
}
|
||||
|
||||
pub(crate) async fn wait_until_admission_open(admission_rx: &mut watch::Receiver<bool>) -> bool {
|
||||
loop {
|
||||
if *admission_rx.borrow() {
|
||||
return true;
|
||||
}
|
||||
if admission_rx.changed().await.is_err() {
|
||||
return *admission_rx.borrow();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_expected_handshake_eof(err: &crate::error::ProxyError) -> bool {
|
||||
err.to_string().contains("expected 64 bytes, got 0")
|
||||
}
|
||||
|
||||
pub(crate) async fn load_startup_proxy_config_snapshot(
|
||||
url: &str,
|
||||
cache_path: Option<&str>,
|
||||
me2dc_fallback: bool,
|
||||
label: &'static str,
|
||||
) -> Option<ProxyConfigData> {
|
||||
loop {
|
||||
match fetch_proxy_config_with_raw(url).await {
|
||||
Ok((cfg, raw)) => {
|
||||
if !cfg.map.is_empty() {
|
||||
if let Some(path) = cache_path
|
||||
&& let Err(e) = save_proxy_config_cache(path, &raw).await
|
||||
{
|
||||
warn!(error = %e, path, snapshot = label, "Failed to store startup proxy-config cache");
|
||||
}
|
||||
return Some(cfg);
|
||||
}
|
||||
|
||||
warn!(snapshot = label, url, "Startup proxy-config is empty; trying disk cache");
|
||||
if let Some(path) = cache_path {
|
||||
match load_proxy_config_cache(path).await {
|
||||
Ok(cached) if !cached.map.is_empty() => {
|
||||
info!(
|
||||
snapshot = label,
|
||||
path,
|
||||
proxy_for_lines = cached.proxy_for_lines,
|
||||
"Loaded startup proxy-config from disk cache"
|
||||
);
|
||||
return Some(cached);
|
||||
}
|
||||
Ok(_) => {
|
||||
warn!(
|
||||
snapshot = label,
|
||||
path,
|
||||
"Startup proxy-config cache is empty; ignoring cache file"
|
||||
);
|
||||
}
|
||||
Err(cache_err) => {
|
||||
debug!(
|
||||
snapshot = label,
|
||||
path,
|
||||
error = %cache_err,
|
||||
"Startup proxy-config cache unavailable"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if me2dc_fallback {
|
||||
error!(
|
||||
snapshot = label,
|
||||
"Startup proxy-config unavailable and no saved config found; falling back to direct mode"
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
warn!(
|
||||
snapshot = label,
|
||||
retry_in_secs = 2,
|
||||
"Startup proxy-config unavailable and no saved config found; retrying because me2dc_fallback=false"
|
||||
);
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
Err(fetch_err) => {
|
||||
if let Some(path) = cache_path {
|
||||
match load_proxy_config_cache(path).await {
|
||||
Ok(cached) if !cached.map.is_empty() => {
|
||||
info!(
|
||||
snapshot = label,
|
||||
path,
|
||||
proxy_for_lines = cached.proxy_for_lines,
|
||||
"Loaded startup proxy-config from disk cache"
|
||||
);
|
||||
return Some(cached);
|
||||
}
|
||||
Ok(_) => {
|
||||
warn!(
|
||||
snapshot = label,
|
||||
path,
|
||||
"Startup proxy-config cache is empty; ignoring cache file"
|
||||
);
|
||||
}
|
||||
Err(cache_err) => {
|
||||
debug!(
|
||||
snapshot = label,
|
||||
path,
|
||||
error = %cache_err,
|
||||
"Startup proxy-config cache unavailable"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if me2dc_fallback {
|
||||
error!(
|
||||
snapshot = label,
|
||||
error = %fetch_err,
|
||||
"Startup proxy-config unavailable and no cached data; falling back to direct mode"
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
warn!(
|
||||
snapshot = label,
|
||||
error = %fetch_err,
|
||||
retry_in_secs = 2,
|
||||
"Startup proxy-config unavailable; retrying because me2dc_fallback=false"
|
||||
);
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
465
src/maestro/listeners.rs
Normal file
465
src/maestro/listeners.rs
Normal file
@@ -0,0 +1,465 @@
|
||||
use std::error::Error;
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::net::TcpListener;
|
||||
#[cfg(unix)]
|
||||
use tokio::net::UnixListener;
|
||||
use tokio::sync::{Semaphore, watch};
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::proxy::route_mode::{ROUTE_SWITCH_ERROR_MSG, RouteRuntimeController};
|
||||
use crate::proxy::ClientHandler;
|
||||
use crate::startup::{COMPONENT_LISTENERS_BIND, StartupTracker};
|
||||
use crate::stats::beobachten::BeobachtenStore;
|
||||
use crate::stats::{ReplayChecker, Stats};
|
||||
use crate::stream::BufferPool;
|
||||
use crate::tls_front::TlsFrontCache;
|
||||
use crate::transport::middle_proxy::MePool;
|
||||
use crate::transport::{
|
||||
ListenOptions, UpstreamManager, create_listener, find_listener_processes,
|
||||
};
|
||||
|
||||
use super::helpers::{is_expected_handshake_eof, print_proxy_links, wait_until_admission_open};
|
||||
|
||||
pub(crate) struct BoundListeners {
|
||||
pub(crate) listeners: Vec<(TcpListener, bool)>,
|
||||
pub(crate) has_unix_listener: bool,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn bind_listeners(
|
||||
config: &Arc<ProxyConfig>,
|
||||
decision_ipv4_dc: bool,
|
||||
decision_ipv6_dc: bool,
|
||||
detected_ip_v4: Option<IpAddr>,
|
||||
detected_ip_v6: Option<IpAddr>,
|
||||
startup_tracker: &Arc<StartupTracker>,
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
admission_rx: watch::Receiver<bool>,
|
||||
stats: Arc<Stats>,
|
||||
upstream_manager: Arc<UpstreamManager>,
|
||||
replay_checker: Arc<ReplayChecker>,
|
||||
buffer_pool: Arc<BufferPool>,
|
||||
rng: Arc<SecureRandom>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
route_runtime: Arc<RouteRuntimeController>,
|
||||
tls_cache: Option<Arc<TlsFrontCache>>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
max_connections: Arc<Semaphore>,
|
||||
) -> Result<BoundListeners, Box<dyn Error>> {
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_LISTENERS_BIND,
|
||||
Some("bind TCP/Unix listeners".to_string()),
|
||||
)
|
||||
.await;
|
||||
let mut listeners = Vec::new();
|
||||
|
||||
for listener_conf in &config.server.listeners {
|
||||
let addr = SocketAddr::new(listener_conf.ip, config.server.port);
|
||||
if addr.is_ipv4() && !decision_ipv4_dc {
|
||||
warn!(%addr, "Skipping IPv4 listener: IPv4 disabled by [network]");
|
||||
continue;
|
||||
}
|
||||
if addr.is_ipv6() && !decision_ipv6_dc {
|
||||
warn!(%addr, "Skipping IPv6 listener: IPv6 disabled by [network]");
|
||||
continue;
|
||||
}
|
||||
let options = ListenOptions {
|
||||
reuse_port: listener_conf.reuse_allow,
|
||||
ipv6_only: listener_conf.ip.is_ipv6(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
match create_listener(addr, &options) {
|
||||
Ok(socket) => {
|
||||
let listener = TcpListener::from_std(socket.into())?;
|
||||
info!("Listening on {}", addr);
|
||||
let listener_proxy_protocol =
|
||||
listener_conf.proxy_protocol.unwrap_or(config.server.proxy_protocol);
|
||||
|
||||
let public_host = if let Some(ref announce) = listener_conf.announce {
|
||||
announce.clone()
|
||||
} else if listener_conf.ip.is_unspecified() {
|
||||
if listener_conf.ip.is_ipv4() {
|
||||
detected_ip_v4
|
||||
.map(|ip| ip.to_string())
|
||||
.unwrap_or_else(|| listener_conf.ip.to_string())
|
||||
} else {
|
||||
detected_ip_v6
|
||||
.map(|ip| ip.to_string())
|
||||
.unwrap_or_else(|| listener_conf.ip.to_string())
|
||||
}
|
||||
} else {
|
||||
listener_conf.ip.to_string()
|
||||
};
|
||||
|
||||
if config.general.links.public_host.is_none() && !config.general.links.show.is_empty() {
|
||||
let link_port = config.general.links.public_port.unwrap_or(config.server.port);
|
||||
print_proxy_links(&public_host, link_port, config);
|
||||
}
|
||||
|
||||
listeners.push((listener, listener_proxy_protocol));
|
||||
}
|
||||
Err(e) => {
|
||||
if e.kind() == std::io::ErrorKind::AddrInUse {
|
||||
let owners = find_listener_processes(addr);
|
||||
if owners.is_empty() {
|
||||
error!(
|
||||
%addr,
|
||||
"Failed to bind: address already in use (owner process unresolved)"
|
||||
);
|
||||
} else {
|
||||
for owner in owners {
|
||||
error!(
|
||||
%addr,
|
||||
pid = owner.pid,
|
||||
process = %owner.process,
|
||||
"Failed to bind: address already in use"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !listener_conf.reuse_allow {
|
||||
error!(
|
||||
%addr,
|
||||
"reuse_allow=false; set [[server.listeners]].reuse_allow=true to allow multi-instance listening"
|
||||
);
|
||||
}
|
||||
} else {
|
||||
error!("Failed to bind to {}: {}", addr, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !config.general.links.show.is_empty()
|
||||
&& (config.general.links.public_host.is_some() || listeners.is_empty())
|
||||
{
|
||||
let (host, port) = if let Some(ref h) = config.general.links.public_host {
|
||||
(
|
||||
h.clone(),
|
||||
config.general.links.public_port.unwrap_or(config.server.port),
|
||||
)
|
||||
} else {
|
||||
let ip = detected_ip_v4
|
||||
.or(detected_ip_v6)
|
||||
.map(|ip| ip.to_string());
|
||||
if ip.is_none() {
|
||||
warn!(
|
||||
"show_link is configured but public IP could not be detected. Set public_host in config."
|
||||
);
|
||||
}
|
||||
(
|
||||
ip.unwrap_or_else(|| "UNKNOWN".to_string()),
|
||||
config.general.links.public_port.unwrap_or(config.server.port),
|
||||
)
|
||||
};
|
||||
|
||||
print_proxy_links(&host, port, config);
|
||||
}
|
||||
|
||||
let mut has_unix_listener = false;
|
||||
#[cfg(unix)]
|
||||
if let Some(ref unix_path) = config.server.listen_unix_sock {
|
||||
let _ = tokio::fs::remove_file(unix_path).await;
|
||||
|
||||
let unix_listener = UnixListener::bind(unix_path)?;
|
||||
|
||||
if let Some(ref perm_str) = config.server.listen_unix_sock_perm {
|
||||
match u32::from_str_radix(perm_str.trim_start_matches('0'), 8) {
|
||||
Ok(mode) => {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let perms = std::fs::Permissions::from_mode(mode);
|
||||
if let Err(e) = std::fs::set_permissions(unix_path, perms) {
|
||||
error!("Failed to set unix socket permissions to {}: {}", perm_str, e);
|
||||
} else {
|
||||
info!("Listening on unix:{} (mode {})", unix_path, perm_str);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Invalid listen_unix_sock_perm '{}': {}. Ignoring.", perm_str, e);
|
||||
info!("Listening on unix:{}", unix_path);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info!("Listening on unix:{}", unix_path);
|
||||
}
|
||||
|
||||
has_unix_listener = true;
|
||||
|
||||
let mut config_rx_unix: watch::Receiver<Arc<ProxyConfig>> = config_rx.clone();
|
||||
let mut admission_rx_unix = admission_rx.clone();
|
||||
let stats = stats.clone();
|
||||
let upstream_manager = upstream_manager.clone();
|
||||
let replay_checker = replay_checker.clone();
|
||||
let buffer_pool = buffer_pool.clone();
|
||||
let rng = rng.clone();
|
||||
let me_pool = me_pool.clone();
|
||||
let route_runtime = route_runtime.clone();
|
||||
let tls_cache = tls_cache.clone();
|
||||
let ip_tracker = ip_tracker.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let max_connections_unix = max_connections.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let unix_conn_counter = Arc::new(std::sync::atomic::AtomicU64::new(1));
|
||||
|
||||
loop {
|
||||
if !wait_until_admission_open(&mut admission_rx_unix).await {
|
||||
warn!("Conditional-admission gate channel closed for unix listener");
|
||||
break;
|
||||
}
|
||||
match unix_listener.accept().await {
|
||||
Ok((stream, _)) => {
|
||||
let permit = match max_connections_unix.clone().acquire_owned().await {
|
||||
Ok(permit) => permit,
|
||||
Err(_) => {
|
||||
error!("Connection limiter is closed");
|
||||
break;
|
||||
}
|
||||
};
|
||||
let conn_id =
|
||||
unix_conn_counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
let fake_peer =
|
||||
SocketAddr::from(([127, 0, 0, 1], (conn_id % 65535) as u16));
|
||||
|
||||
let config = config_rx_unix.borrow_and_update().clone();
|
||||
let stats = stats.clone();
|
||||
let upstream_manager = upstream_manager.clone();
|
||||
let replay_checker = replay_checker.clone();
|
||||
let buffer_pool = buffer_pool.clone();
|
||||
let rng = rng.clone();
|
||||
let me_pool = me_pool.clone();
|
||||
let route_runtime = route_runtime.clone();
|
||||
let tls_cache = tls_cache.clone();
|
||||
let ip_tracker = ip_tracker.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let proxy_protocol_enabled = config.server.proxy_protocol;
|
||||
|
||||
tokio::spawn(async move {
|
||||
let _permit = permit;
|
||||
if let Err(e) = crate::proxy::client::handle_client_stream(
|
||||
stream,
|
||||
fake_peer,
|
||||
config,
|
||||
stats,
|
||||
upstream_manager,
|
||||
replay_checker,
|
||||
buffer_pool,
|
||||
rng,
|
||||
me_pool,
|
||||
route_runtime,
|
||||
tls_cache,
|
||||
ip_tracker,
|
||||
beobachten,
|
||||
proxy_protocol_enabled,
|
||||
)
|
||||
.await
|
||||
{
|
||||
debug!(error = %e, "Unix socket connection error");
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Unix socket accept error: {}", e);
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_LISTENERS_BIND,
|
||||
Some(format!(
|
||||
"listeners configured tcp={} unix={}",
|
||||
listeners.len(),
|
||||
has_unix_listener
|
||||
)),
|
||||
)
|
||||
.await;
|
||||
|
||||
Ok(BoundListeners {
|
||||
listeners,
|
||||
has_unix_listener,
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn spawn_tcp_accept_loops(
|
||||
listeners: Vec<(TcpListener, bool)>,
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
admission_rx: watch::Receiver<bool>,
|
||||
stats: Arc<Stats>,
|
||||
upstream_manager: Arc<UpstreamManager>,
|
||||
replay_checker: Arc<ReplayChecker>,
|
||||
buffer_pool: Arc<BufferPool>,
|
||||
rng: Arc<SecureRandom>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
route_runtime: Arc<RouteRuntimeController>,
|
||||
tls_cache: Option<Arc<TlsFrontCache>>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
max_connections: Arc<Semaphore>,
|
||||
) {
|
||||
for (listener, listener_proxy_protocol) in listeners {
|
||||
let mut config_rx: watch::Receiver<Arc<ProxyConfig>> = config_rx.clone();
|
||||
let mut admission_rx_tcp = admission_rx.clone();
|
||||
let stats = stats.clone();
|
||||
let upstream_manager = upstream_manager.clone();
|
||||
let replay_checker = replay_checker.clone();
|
||||
let buffer_pool = buffer_pool.clone();
|
||||
let rng = rng.clone();
|
||||
let me_pool = me_pool.clone();
|
||||
let route_runtime = route_runtime.clone();
|
||||
let tls_cache = tls_cache.clone();
|
||||
let ip_tracker = ip_tracker.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let max_connections_tcp = max_connections.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
if !wait_until_admission_open(&mut admission_rx_tcp).await {
|
||||
warn!("Conditional-admission gate channel closed for tcp listener");
|
||||
break;
|
||||
}
|
||||
match listener.accept().await {
|
||||
Ok((stream, peer_addr)) => {
|
||||
let permit = match max_connections_tcp.clone().acquire_owned().await {
|
||||
Ok(permit) => permit,
|
||||
Err(_) => {
|
||||
error!("Connection limiter is closed");
|
||||
break;
|
||||
}
|
||||
};
|
||||
let config = config_rx.borrow_and_update().clone();
|
||||
let stats = stats.clone();
|
||||
let upstream_manager = upstream_manager.clone();
|
||||
let replay_checker = replay_checker.clone();
|
||||
let buffer_pool = buffer_pool.clone();
|
||||
let rng = rng.clone();
|
||||
let me_pool = me_pool.clone();
|
||||
let route_runtime = route_runtime.clone();
|
||||
let tls_cache = tls_cache.clone();
|
||||
let ip_tracker = ip_tracker.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let proxy_protocol_enabled = listener_proxy_protocol;
|
||||
let real_peer_report = Arc::new(std::sync::Mutex::new(None));
|
||||
let real_peer_report_for_handler = real_peer_report.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let _permit = permit;
|
||||
if let Err(e) = ClientHandler::new(
|
||||
stream,
|
||||
peer_addr,
|
||||
config,
|
||||
stats,
|
||||
upstream_manager,
|
||||
replay_checker,
|
||||
buffer_pool,
|
||||
rng,
|
||||
me_pool,
|
||||
route_runtime,
|
||||
tls_cache,
|
||||
ip_tracker,
|
||||
beobachten,
|
||||
proxy_protocol_enabled,
|
||||
real_peer_report_for_handler,
|
||||
)
|
||||
.run()
|
||||
.await
|
||||
{
|
||||
let real_peer = match real_peer_report.lock() {
|
||||
Ok(guard) => *guard,
|
||||
Err(_) => None,
|
||||
};
|
||||
let peer_closed = matches!(
|
||||
&e,
|
||||
crate::error::ProxyError::Io(ioe)
|
||||
if matches!(
|
||||
ioe.kind(),
|
||||
std::io::ErrorKind::ConnectionReset
|
||||
| std::io::ErrorKind::ConnectionAborted
|
||||
| std::io::ErrorKind::BrokenPipe
|
||||
| std::io::ErrorKind::NotConnected
|
||||
)
|
||||
) || matches!(
|
||||
&e,
|
||||
crate::error::ProxyError::Stream(
|
||||
crate::error::StreamError::Io(ioe)
|
||||
)
|
||||
if matches!(
|
||||
ioe.kind(),
|
||||
std::io::ErrorKind::ConnectionReset
|
||||
| std::io::ErrorKind::ConnectionAborted
|
||||
| std::io::ErrorKind::BrokenPipe
|
||||
| std::io::ErrorKind::NotConnected
|
||||
)
|
||||
);
|
||||
|
||||
let me_closed = matches!(
|
||||
&e,
|
||||
crate::error::ProxyError::Proxy(msg) if msg == "ME connection lost"
|
||||
);
|
||||
let route_switched = matches!(
|
||||
&e,
|
||||
crate::error::ProxyError::Proxy(msg) if msg == ROUTE_SWITCH_ERROR_MSG
|
||||
);
|
||||
|
||||
match (peer_closed, me_closed) {
|
||||
(true, _) => {
|
||||
if let Some(real_peer) = real_peer {
|
||||
debug!(peer = %peer_addr, real_peer = %real_peer, error = %e, "Connection closed by client");
|
||||
} else {
|
||||
debug!(peer = %peer_addr, error = %e, "Connection closed by client");
|
||||
}
|
||||
}
|
||||
(_, true) => {
|
||||
if let Some(real_peer) = real_peer {
|
||||
warn!(peer = %peer_addr, real_peer = %real_peer, error = %e, "Connection closed: Middle-End dropped session");
|
||||
} else {
|
||||
warn!(peer = %peer_addr, error = %e, "Connection closed: Middle-End dropped session");
|
||||
}
|
||||
}
|
||||
_ if route_switched => {
|
||||
if let Some(real_peer) = real_peer {
|
||||
info!(peer = %peer_addr, real_peer = %real_peer, error = %e, "Connection closed by controlled route cutover");
|
||||
} else {
|
||||
info!(peer = %peer_addr, error = %e, "Connection closed by controlled route cutover");
|
||||
}
|
||||
}
|
||||
_ if is_expected_handshake_eof(&e) => {
|
||||
if let Some(real_peer) = real_peer {
|
||||
info!(peer = %peer_addr, real_peer = %real_peer, error = %e, "Connection closed during initial handshake");
|
||||
} else {
|
||||
info!(peer = %peer_addr, error = %e, "Connection closed during initial handshake");
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if let Some(real_peer) = real_peer {
|
||||
warn!(peer = %peer_addr, real_peer = %real_peer, error = %e, "Connection closed with error");
|
||||
} else {
|
||||
warn!(peer = %peer_addr, error = %e, "Connection closed with error");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Accept error: {}", e);
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
515
src/maestro/me_startup.rs
Normal file
515
src/maestro/me_startup.rs
Normal file
@@ -0,0 +1,515 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::probe::{NetworkDecision, NetworkProbe};
|
||||
use crate::startup::{
|
||||
COMPONENT_ME_POOL_CONSTRUCT, COMPONENT_ME_POOL_INIT_STAGE1, COMPONENT_ME_PROXY_CONFIG_V4,
|
||||
COMPONENT_ME_PROXY_CONFIG_V6, COMPONENT_ME_SECRET_FETCH, StartupMeStatus, StartupTracker,
|
||||
};
|
||||
use crate::stats::Stats;
|
||||
use crate::transport::middle_proxy::MePool;
|
||||
use crate::transport::UpstreamManager;
|
||||
|
||||
use super::helpers::load_startup_proxy_config_snapshot;
|
||||
|
||||
pub(crate) async fn initialize_me_pool(
|
||||
use_middle_proxy: bool,
|
||||
config: &ProxyConfig,
|
||||
decision: &NetworkDecision,
|
||||
probe: &NetworkProbe,
|
||||
startup_tracker: &Arc<StartupTracker>,
|
||||
upstream_manager: Arc<UpstreamManager>,
|
||||
rng: Arc<SecureRandom>,
|
||||
stats: Arc<Stats>,
|
||||
api_me_pool: Arc<RwLock<Option<Arc<MePool>>>>,
|
||||
) -> Option<Arc<MePool>> {
|
||||
if !use_middle_proxy {
|
||||
return None;
|
||||
}
|
||||
|
||||
info!("=== Middle Proxy Mode ===");
|
||||
let me_nat_probe = config.general.middle_proxy_nat_probe && config.network.stun_use;
|
||||
if config.general.middle_proxy_nat_probe && !config.network.stun_use {
|
||||
info!("Middle-proxy STUN probing disabled by network.stun_use=false");
|
||||
}
|
||||
|
||||
let me2dc_fallback = config.general.me2dc_fallback;
|
||||
let me_init_retry_attempts = config.general.me_init_retry_attempts;
|
||||
let me_init_warn_after_attempts: u32 = 3;
|
||||
|
||||
// Global ad_tag (pool default). Used when user has no per-user tag in access.user_ad_tags.
|
||||
let proxy_tag = config
|
||||
.general
|
||||
.ad_tag
|
||||
.as_ref()
|
||||
.map(|tag| hex::decode(tag).expect("general.ad_tag must be validated before startup"));
|
||||
|
||||
// =============================================================
|
||||
// CRITICAL: Download Telegram proxy-secret (NOT user secret!)
|
||||
//
|
||||
// C MTProxy uses TWO separate secrets:
|
||||
// -S flag = 16-byte user secret for client obfuscation
|
||||
// --aes-pwd = 32-512 byte binary file for ME RPC auth
|
||||
//
|
||||
// proxy-secret is from: https://core.telegram.org/getProxySecret
|
||||
// =============================================================
|
||||
let proxy_secret_path = config.general.proxy_secret_path.as_deref();
|
||||
let pool_size = config.general.middle_proxy_pool_size.max(1);
|
||||
let proxy_secret = loop {
|
||||
match crate::transport::middle_proxy::fetch_proxy_secret(
|
||||
proxy_secret_path,
|
||||
config.general.proxy_secret_len_max,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(proxy_secret) => break Some(proxy_secret),
|
||||
Err(e) => {
|
||||
startup_tracker.set_me_last_error(Some(e.to_string())).await;
|
||||
if me2dc_fallback {
|
||||
error!(
|
||||
error = %e,
|
||||
"ME startup failed: proxy-secret is unavailable and no saved secret found; falling back to direct mode"
|
||||
);
|
||||
break None;
|
||||
}
|
||||
|
||||
warn!(
|
||||
error = %e,
|
||||
retry_in_secs = 2,
|
||||
"ME startup failed: proxy-secret is unavailable and no saved secret found; retrying because me2dc_fallback=false"
|
||||
);
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
}
|
||||
};
|
||||
match proxy_secret {
|
||||
Some(proxy_secret) => {
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_ME_SECRET_FETCH,
|
||||
Some("proxy-secret loaded".to_string()),
|
||||
)
|
||||
.await;
|
||||
info!(
|
||||
secret_len = proxy_secret.len(),
|
||||
key_sig = format_args!(
|
||||
"0x{:08x}",
|
||||
if proxy_secret.len() >= 4 {
|
||||
u32::from_le_bytes([
|
||||
proxy_secret[0],
|
||||
proxy_secret[1],
|
||||
proxy_secret[2],
|
||||
proxy_secret[3],
|
||||
])
|
||||
} else {
|
||||
0
|
||||
}
|
||||
),
|
||||
"Proxy-secret loaded"
|
||||
);
|
||||
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V4,
|
||||
Some("load startup proxy-config v4".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Initializing, COMPONENT_ME_PROXY_CONFIG_V4)
|
||||
.await;
|
||||
let cfg_v4 = load_startup_proxy_config_snapshot(
|
||||
"https://core.telegram.org/getProxyConfig",
|
||||
config.general.proxy_config_v4_cache_path.as_deref(),
|
||||
me2dc_fallback,
|
||||
"getProxyConfig",
|
||||
)
|
||||
.await;
|
||||
if cfg_v4.is_some() {
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V4,
|
||||
Some("proxy-config v4 loaded".to_string()),
|
||||
)
|
||||
.await;
|
||||
} else {
|
||||
startup_tracker
|
||||
.fail_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V4,
|
||||
Some("proxy-config v4 unavailable".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V6,
|
||||
Some("load startup proxy-config v6".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Initializing, COMPONENT_ME_PROXY_CONFIG_V6)
|
||||
.await;
|
||||
let cfg_v6 = load_startup_proxy_config_snapshot(
|
||||
"https://core.telegram.org/getProxyConfigV6",
|
||||
config.general.proxy_config_v6_cache_path.as_deref(),
|
||||
me2dc_fallback,
|
||||
"getProxyConfigV6",
|
||||
)
|
||||
.await;
|
||||
if cfg_v6.is_some() {
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V6,
|
||||
Some("proxy-config v6 loaded".to_string()),
|
||||
)
|
||||
.await;
|
||||
} else {
|
||||
startup_tracker
|
||||
.fail_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V6,
|
||||
Some("proxy-config v6 unavailable".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
if let (Some(cfg_v4), Some(cfg_v6)) = (cfg_v4, cfg_v6) {
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_ME_POOL_CONSTRUCT,
|
||||
Some("construct ME pool".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Initializing, COMPONENT_ME_POOL_CONSTRUCT)
|
||||
.await;
|
||||
let pool = MePool::new(
|
||||
proxy_tag.clone(),
|
||||
proxy_secret,
|
||||
config.general.middle_proxy_nat_ip,
|
||||
me_nat_probe,
|
||||
None,
|
||||
config.network.stun_servers.clone(),
|
||||
config.general.stun_nat_probe_concurrency,
|
||||
probe.detected_ipv6,
|
||||
config.timeouts.me_one_retry,
|
||||
config.timeouts.me_one_timeout_ms,
|
||||
cfg_v4.map.clone(),
|
||||
cfg_v6.map.clone(),
|
||||
cfg_v4.default_dc.or(cfg_v6.default_dc),
|
||||
decision.clone(),
|
||||
Some(upstream_manager.clone()),
|
||||
rng.clone(),
|
||||
stats.clone(),
|
||||
config.general.me_keepalive_enabled,
|
||||
config.general.me_keepalive_interval_secs,
|
||||
config.general.me_keepalive_jitter_secs,
|
||||
config.general.me_keepalive_payload_random,
|
||||
config.general.rpc_proxy_req_every,
|
||||
config.general.me_warmup_stagger_enabled,
|
||||
config.general.me_warmup_step_delay_ms,
|
||||
config.general.me_warmup_step_jitter_ms,
|
||||
config.general.me_reconnect_max_concurrent_per_dc,
|
||||
config.general.me_reconnect_backoff_base_ms,
|
||||
config.general.me_reconnect_backoff_cap_ms,
|
||||
config.general.me_reconnect_fast_retry_count,
|
||||
config.general.me_single_endpoint_shadow_writers,
|
||||
config.general.me_single_endpoint_outage_mode_enabled,
|
||||
config.general.me_single_endpoint_outage_disable_quarantine,
|
||||
config.general.me_single_endpoint_outage_backoff_min_ms,
|
||||
config.general.me_single_endpoint_outage_backoff_max_ms,
|
||||
config.general.me_single_endpoint_shadow_rotate_every_secs,
|
||||
config.general.me_floor_mode,
|
||||
config.general.me_adaptive_floor_idle_secs,
|
||||
config.general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
config.general.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
config.general.me_adaptive_floor_recover_grace_secs,
|
||||
config.general.me_adaptive_floor_writers_per_core_total,
|
||||
config.general.me_adaptive_floor_cpu_cores_override,
|
||||
config.general.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
config.general.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
config.general.me_adaptive_floor_max_active_writers_per_core,
|
||||
config.general.me_adaptive_floor_max_warm_writers_per_core,
|
||||
config.general.me_adaptive_floor_max_active_writers_global,
|
||||
config.general.me_adaptive_floor_max_warm_writers_global,
|
||||
config.general.hardswap,
|
||||
config.general.me_pool_drain_ttl_secs,
|
||||
config.general.effective_me_pool_force_close_secs(),
|
||||
config.general.me_pool_min_fresh_ratio,
|
||||
config.general.me_hardswap_warmup_delay_min_ms,
|
||||
config.general.me_hardswap_warmup_delay_max_ms,
|
||||
config.general.me_hardswap_warmup_extra_passes,
|
||||
config.general.me_hardswap_warmup_pass_backoff_base_ms,
|
||||
config.general.me_bind_stale_mode,
|
||||
config.general.me_bind_stale_ttl_secs,
|
||||
config.general.me_secret_atomic_snapshot,
|
||||
config.general.me_deterministic_writer_sort,
|
||||
config.general.me_writer_pick_mode,
|
||||
config.general.me_writer_pick_sample_size,
|
||||
config.general.me_socks_kdf_policy,
|
||||
config.general.me_writer_cmd_channel_capacity,
|
||||
config.general.me_route_channel_capacity,
|
||||
config.general.me_route_backpressure_base_timeout_ms,
|
||||
config.general.me_route_backpressure_high_timeout_ms,
|
||||
config.general.me_route_backpressure_high_watermark_pct,
|
||||
config.general.me_reader_route_data_wait_ms,
|
||||
config.general.me_health_interval_ms_unhealthy,
|
||||
config.general.me_health_interval_ms_healthy,
|
||||
config.general.me_warn_rate_limit_ms,
|
||||
config.general.me_route_no_writer_mode,
|
||||
config.general.me_route_no_writer_wait_ms,
|
||||
config.general.me_route_inline_recovery_attempts,
|
||||
config.general.me_route_inline_recovery_wait_ms,
|
||||
);
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_ME_POOL_CONSTRUCT,
|
||||
Some("ME pool object created".to_string()),
|
||||
)
|
||||
.await;
|
||||
*api_me_pool.write().await = Some(pool.clone());
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
Some("initialize ME pool writers".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Initializing, COMPONENT_ME_POOL_INIT_STAGE1)
|
||||
.await;
|
||||
|
||||
if me2dc_fallback {
|
||||
let pool_bg = pool.clone();
|
||||
let rng_bg = rng.clone();
|
||||
let startup_tracker_bg = startup_tracker.clone();
|
||||
let retry_limit = if me_init_retry_attempts == 0 {
|
||||
String::from("unlimited")
|
||||
} else {
|
||||
me_init_retry_attempts.to_string()
|
||||
};
|
||||
std::thread::spawn(move || {
|
||||
let runtime = match tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
{
|
||||
Ok(runtime) => runtime,
|
||||
Err(error) => {
|
||||
error!(error = %error, "Failed to build background runtime for ME initialization");
|
||||
return;
|
||||
}
|
||||
};
|
||||
runtime.block_on(async move {
|
||||
let mut init_attempt: u32 = 0;
|
||||
loop {
|
||||
init_attempt = init_attempt.saturating_add(1);
|
||||
startup_tracker_bg.set_me_init_attempt(init_attempt).await;
|
||||
match pool_bg.init(pool_size, &rng_bg).await {
|
||||
Ok(()) => {
|
||||
startup_tracker_bg.set_me_last_error(None).await;
|
||||
startup_tracker_bg
|
||||
.complete_component(
|
||||
COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
Some("ME pool initialized".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker_bg
|
||||
.set_me_status(StartupMeStatus::Ready, "ready")
|
||||
.await;
|
||||
info!(
|
||||
attempt = init_attempt,
|
||||
"Middle-End pool initialized successfully"
|
||||
);
|
||||
|
||||
let pool_health = pool_bg.clone();
|
||||
let rng_health = rng_bg.clone();
|
||||
let min_conns = pool_size;
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_health_monitor(
|
||||
pool_health,
|
||||
rng_health,
|
||||
min_conns,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
startup_tracker_bg.set_me_last_error(Some(e.to_string())).await;
|
||||
if init_attempt >= me_init_warn_after_attempts {
|
||||
warn!(
|
||||
error = %e,
|
||||
attempt = init_attempt,
|
||||
retry_limit = %retry_limit,
|
||||
retry_in_secs = 2,
|
||||
"ME pool is not ready yet; retrying background initialization"
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
error = %e,
|
||||
attempt = init_attempt,
|
||||
retry_limit = %retry_limit,
|
||||
retry_in_secs = 2,
|
||||
"ME pool startup warmup: retrying background initialization"
|
||||
);
|
||||
}
|
||||
pool_bg.reset_stun_state();
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Initializing, "background_init")
|
||||
.await;
|
||||
info!(
|
||||
startup_grace_secs = 80,
|
||||
"ME pool initialization continues in background; startup continues with conditional Direct fallback"
|
||||
);
|
||||
Some(pool)
|
||||
} else {
|
||||
let mut init_attempt: u32 = 0;
|
||||
loop {
|
||||
init_attempt = init_attempt.saturating_add(1);
|
||||
startup_tracker.set_me_init_attempt(init_attempt).await;
|
||||
match pool.init(pool_size, &rng).await {
|
||||
Ok(()) => {
|
||||
startup_tracker.set_me_last_error(None).await;
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
Some("ME pool initialized".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Ready, "ready")
|
||||
.await;
|
||||
info!(
|
||||
attempt = init_attempt,
|
||||
"Middle-End pool initialized successfully"
|
||||
);
|
||||
|
||||
let pool_clone = pool.clone();
|
||||
let rng_clone = rng.clone();
|
||||
let min_conns = pool_size;
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_health_monitor(
|
||||
pool_clone, rng_clone, min_conns,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
break Some(pool);
|
||||
}
|
||||
Err(e) => {
|
||||
startup_tracker.set_me_last_error(Some(e.to_string())).await;
|
||||
let retries_limited = me_init_retry_attempts > 0;
|
||||
if retries_limited && init_attempt >= me_init_retry_attempts {
|
||||
startup_tracker
|
||||
.fail_component(
|
||||
COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
Some("ME init retry budget exhausted".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Failed, "failed")
|
||||
.await;
|
||||
error!(
|
||||
error = %e,
|
||||
attempt = init_attempt,
|
||||
retry_limit = me_init_retry_attempts,
|
||||
"ME pool init retries exhausted; startup cannot continue in middle-proxy mode"
|
||||
);
|
||||
break None;
|
||||
}
|
||||
|
||||
let retry_limit = if me_init_retry_attempts == 0 {
|
||||
String::from("unlimited")
|
||||
} else {
|
||||
me_init_retry_attempts.to_string()
|
||||
};
|
||||
if init_attempt >= me_init_warn_after_attempts {
|
||||
warn!(
|
||||
error = %e,
|
||||
attempt = init_attempt,
|
||||
retry_limit = retry_limit,
|
||||
me2dc_fallback = me2dc_fallback,
|
||||
retry_in_secs = 2,
|
||||
"ME pool is not ready yet; retrying startup initialization"
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
error = %e,
|
||||
attempt = init_attempt,
|
||||
retry_limit = retry_limit,
|
||||
me2dc_fallback = me2dc_fallback,
|
||||
retry_in_secs = 2,
|
||||
"ME pool startup warmup: retrying initialization"
|
||||
);
|
||||
}
|
||||
pool.reset_stun_state();
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_POOL_CONSTRUCT,
|
||||
Some("ME configs are incomplete".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.fail_component(
|
||||
COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
Some("ME configs are incomplete".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Failed, "failed")
|
||||
.await;
|
||||
None
|
||||
}
|
||||
}
|
||||
None => {
|
||||
startup_tracker
|
||||
.fail_component(
|
||||
COMPONENT_ME_SECRET_FETCH,
|
||||
Some("proxy-secret unavailable".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V4,
|
||||
Some("proxy-secret unavailable".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V6,
|
||||
Some("proxy-secret unavailable".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_POOL_CONSTRUCT,
|
||||
Some("proxy-secret unavailable".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.fail_component(
|
||||
COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
Some("proxy-secret unavailable".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Failed, "failed")
|
||||
.await;
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
553
src/maestro/mod.rs
Normal file
553
src/maestro/mod.rs
Normal file
@@ -0,0 +1,553 @@
|
||||
//! telemt — Telegram MTProto Proxy
|
||||
|
||||
#![allow(unused_assignments)]
|
||||
|
||||
// Runtime orchestration modules.
|
||||
// - helpers: CLI and shared startup/runtime helper routines.
|
||||
// - tls_bootstrap: TLS front cache bootstrap and refresh tasks.
|
||||
// - me_startup: Middle-End secret/config fetch and pool initialization.
|
||||
// - connectivity: startup ME/DC connectivity diagnostics.
|
||||
// - runtime_tasks: hot-reload and background task orchestration.
|
||||
// - admission: conditional-cast gate and route mode switching.
|
||||
// - listeners: TCP/Unix listener bind and accept-loop orchestration.
|
||||
// - shutdown: graceful shutdown sequence and uptime logging.
|
||||
mod helpers;
|
||||
mod admission;
|
||||
mod connectivity;
|
||||
mod listeners;
|
||||
mod me_startup;
|
||||
mod runtime_tasks;
|
||||
mod shutdown;
|
||||
mod tls_bootstrap;
|
||||
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
use tokio::sync::{RwLock, Semaphore, watch};
|
||||
use tracing::{error, info, warn};
|
||||
use tracing_subscriber::{EnvFilter, fmt, prelude::*, reload};
|
||||
|
||||
use crate::api;
|
||||
use crate::config::{LogLevel, ProxyConfig};
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::network::probe::{decide_network_capabilities, log_probe_result, run_probe};
|
||||
use crate::proxy::route_mode::{RelayRouteMode, RouteRuntimeController};
|
||||
use crate::stats::beobachten::BeobachtenStore;
|
||||
use crate::stats::telemetry::TelemetryPolicy;
|
||||
use crate::stats::{ReplayChecker, Stats};
|
||||
use crate::startup::{
|
||||
COMPONENT_API_BOOTSTRAP, COMPONENT_CONFIG_LOAD,
|
||||
COMPONENT_ME_POOL_CONSTRUCT, COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
COMPONENT_ME_PROXY_CONFIG_V4, COMPONENT_ME_PROXY_CONFIG_V6, COMPONENT_ME_SECRET_FETCH,
|
||||
COMPONENT_NETWORK_PROBE, COMPONENT_TRACING_INIT, StartupMeStatus, StartupTracker,
|
||||
};
|
||||
use crate::stream::BufferPool;
|
||||
use crate::transport::middle_proxy::MePool;
|
||||
use crate::transport::UpstreamManager;
|
||||
use helpers::parse_cli;
|
||||
|
||||
/// Runs the full telemt runtime startup pipeline and blocks until shutdown.
|
||||
pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
let process_started_at = Instant::now();
|
||||
let process_started_at_epoch_secs = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
let startup_tracker = Arc::new(StartupTracker::new(process_started_at_epoch_secs));
|
||||
startup_tracker
|
||||
.start_component(COMPONENT_CONFIG_LOAD, Some("load and validate config".to_string()))
|
||||
.await;
|
||||
let (config_path, cli_silent, cli_log_level) = parse_cli();
|
||||
|
||||
let mut config = match ProxyConfig::load(&config_path) {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
if std::path::Path::new(&config_path).exists() {
|
||||
eprintln!("[telemt] Error: {}", e);
|
||||
std::process::exit(1);
|
||||
} else {
|
||||
let default = ProxyConfig::default();
|
||||
std::fs::write(&config_path, toml::to_string_pretty(&default).unwrap()).unwrap();
|
||||
eprintln!("[telemt] Created default config at {}", config_path);
|
||||
default
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = config.validate() {
|
||||
eprintln!("[telemt] Invalid config: {}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
if let Err(e) = crate::network::dns_overrides::install_entries(&config.network.dns_overrides) {
|
||||
eprintln!("[telemt] Invalid network.dns_overrides: {}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
startup_tracker
|
||||
.complete_component(COMPONENT_CONFIG_LOAD, Some("config is ready".to_string()))
|
||||
.await;
|
||||
|
||||
let has_rust_log = std::env::var("RUST_LOG").is_ok();
|
||||
let effective_log_level = if cli_silent {
|
||||
LogLevel::Silent
|
||||
} else if let Some(ref s) = cli_log_level {
|
||||
LogLevel::from_str_loose(s)
|
||||
} else {
|
||||
config.general.log_level.clone()
|
||||
};
|
||||
|
||||
let (filter_layer, filter_handle) = reload::Layer::new(EnvFilter::new("info"));
|
||||
startup_tracker
|
||||
.start_component(COMPONENT_TRACING_INIT, Some("initialize tracing subscriber".to_string()))
|
||||
.await;
|
||||
|
||||
// Configure color output based on config
|
||||
let fmt_layer = if config.general.disable_colors {
|
||||
fmt::Layer::default().with_ansi(false)
|
||||
} else {
|
||||
fmt::Layer::default().with_ansi(true)
|
||||
};
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(filter_layer)
|
||||
.with(fmt_layer)
|
||||
.init();
|
||||
startup_tracker
|
||||
.complete_component(COMPONENT_TRACING_INIT, Some("tracing initialized".to_string()))
|
||||
.await;
|
||||
|
||||
info!("Telemt MTProxy v{}", env!("CARGO_PKG_VERSION"));
|
||||
info!("Log level: {}", effective_log_level);
|
||||
if config.general.disable_colors {
|
||||
info!("Colors: disabled");
|
||||
}
|
||||
info!(
|
||||
"Modes: classic={} secure={} tls={}",
|
||||
config.general.modes.classic, config.general.modes.secure, config.general.modes.tls
|
||||
);
|
||||
if config.general.modes.classic {
|
||||
warn!("Classic mode is vulnerable to DPI detection; enable only for legacy clients");
|
||||
}
|
||||
info!("TLS domain: {}", config.censorship.tls_domain);
|
||||
if let Some(ref sock) = config.censorship.mask_unix_sock {
|
||||
info!("Mask: {} -> unix:{}", config.censorship.mask, sock);
|
||||
if !std::path::Path::new(sock).exists() {
|
||||
warn!(
|
||||
"Unix socket '{}' does not exist yet. Masking will fail until it appears.",
|
||||
sock
|
||||
);
|
||||
}
|
||||
} else {
|
||||
info!(
|
||||
"Mask: {} -> {}:{}",
|
||||
config.censorship.mask,
|
||||
config
|
||||
.censorship
|
||||
.mask_host
|
||||
.as_deref()
|
||||
.unwrap_or(&config.censorship.tls_domain),
|
||||
config.censorship.mask_port
|
||||
);
|
||||
}
|
||||
|
||||
if config.censorship.tls_domain == "www.google.com" {
|
||||
warn!("Using default tls_domain. Consider setting a custom domain.");
|
||||
}
|
||||
|
||||
let stats = Arc::new(Stats::new());
|
||||
stats.apply_telemetry_policy(TelemetryPolicy::from_config(&config.general.telemetry));
|
||||
|
||||
let upstream_manager = Arc::new(UpstreamManager::new(
|
||||
config.upstreams.clone(),
|
||||
config.general.upstream_connect_retry_attempts,
|
||||
config.general.upstream_connect_retry_backoff_ms,
|
||||
config.general.upstream_connect_budget_ms,
|
||||
config.general.upstream_unhealthy_fail_threshold,
|
||||
config.general.upstream_connect_failfast_hard_errors,
|
||||
stats.clone(),
|
||||
));
|
||||
let ip_tracker = Arc::new(UserIpTracker::new());
|
||||
ip_tracker.load_limits(&config.access.user_max_unique_ips).await;
|
||||
ip_tracker
|
||||
.set_limit_policy(
|
||||
config.access.user_max_unique_ips_mode,
|
||||
config.access.user_max_unique_ips_window_secs,
|
||||
)
|
||||
.await;
|
||||
if !config.access.user_max_unique_ips.is_empty() {
|
||||
info!(
|
||||
"IP limits configured for {} users",
|
||||
config.access.user_max_unique_ips.len()
|
||||
);
|
||||
}
|
||||
if !config.network.dns_overrides.is_empty() {
|
||||
info!(
|
||||
"Runtime DNS overrides configured: {} entries",
|
||||
config.network.dns_overrides.len()
|
||||
);
|
||||
}
|
||||
|
||||
let (api_config_tx, api_config_rx) = watch::channel(Arc::new(config.clone()));
|
||||
let (detected_ips_tx, detected_ips_rx) = watch::channel((None::<IpAddr>, None::<IpAddr>));
|
||||
let initial_admission_open = !config.general.use_middle_proxy;
|
||||
let (admission_tx, admission_rx) = watch::channel(initial_admission_open);
|
||||
let initial_route_mode = if config.general.use_middle_proxy {
|
||||
RelayRouteMode::Middle
|
||||
} else {
|
||||
RelayRouteMode::Direct
|
||||
};
|
||||
let route_runtime = Arc::new(RouteRuntimeController::new(initial_route_mode));
|
||||
let api_me_pool = Arc::new(RwLock::new(None::<Arc<MePool>>));
|
||||
startup_tracker
|
||||
.start_component(COMPONENT_API_BOOTSTRAP, Some("spawn API listener task".to_string()))
|
||||
.await;
|
||||
|
||||
if config.server.api.enabled {
|
||||
let listen = match config.server.api.listen.parse::<SocketAddr>() {
|
||||
Ok(listen) => listen,
|
||||
Err(error) => {
|
||||
warn!(
|
||||
error = %error,
|
||||
listen = %config.server.api.listen,
|
||||
"Invalid server.api.listen; API is disabled"
|
||||
);
|
||||
SocketAddr::from(([127, 0, 0, 1], 0))
|
||||
}
|
||||
};
|
||||
if listen.port() != 0 {
|
||||
let stats_api = stats.clone();
|
||||
let ip_tracker_api = ip_tracker.clone();
|
||||
let me_pool_api = api_me_pool.clone();
|
||||
let upstream_manager_api = upstream_manager.clone();
|
||||
let route_runtime_api = route_runtime.clone();
|
||||
let config_rx_api = api_config_rx.clone();
|
||||
let admission_rx_api = admission_rx.clone();
|
||||
let config_path_api = std::path::PathBuf::from(&config_path);
|
||||
let startup_tracker_api = startup_tracker.clone();
|
||||
let detected_ips_rx_api = detected_ips_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
api::serve(
|
||||
listen,
|
||||
stats_api,
|
||||
ip_tracker_api,
|
||||
me_pool_api,
|
||||
route_runtime_api,
|
||||
upstream_manager_api,
|
||||
config_rx_api,
|
||||
admission_rx_api,
|
||||
config_path_api,
|
||||
detected_ips_rx_api,
|
||||
process_started_at_epoch_secs,
|
||||
startup_tracker_api,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_API_BOOTSTRAP,
|
||||
Some(format!("api task spawned on {}", listen)),
|
||||
)
|
||||
.await;
|
||||
} else {
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_API_BOOTSTRAP,
|
||||
Some("server.api.listen has zero port".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
} else {
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_API_BOOTSTRAP,
|
||||
Some("server.api.enabled is false".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let mut tls_domains = Vec::with_capacity(1 + config.censorship.tls_domains.len());
|
||||
tls_domains.push(config.censorship.tls_domain.clone());
|
||||
for d in &config.censorship.tls_domains {
|
||||
if !tls_domains.contains(d) {
|
||||
tls_domains.push(d.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let tls_cache = tls_bootstrap::bootstrap_tls_front(
|
||||
&config,
|
||||
&tls_domains,
|
||||
upstream_manager.clone(),
|
||||
&startup_tracker,
|
||||
)
|
||||
.await;
|
||||
|
||||
startup_tracker
|
||||
.start_component(COMPONENT_NETWORK_PROBE, Some("probe network capabilities".to_string()))
|
||||
.await;
|
||||
let probe = run_probe(
|
||||
&config.network,
|
||||
config.general.middle_proxy_nat_probe,
|
||||
config.general.stun_nat_probe_concurrency,
|
||||
)
|
||||
.await?;
|
||||
detected_ips_tx.send_replace((
|
||||
probe.detected_ipv4.map(IpAddr::V4),
|
||||
probe.detected_ipv6.map(IpAddr::V6),
|
||||
));
|
||||
let decision = decide_network_capabilities(&config.network, &probe);
|
||||
log_probe_result(&probe, &decision);
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_NETWORK_PROBE,
|
||||
Some("network capabilities determined".to_string()),
|
||||
)
|
||||
.await;
|
||||
|
||||
let prefer_ipv6 = decision.prefer_ipv6();
|
||||
let mut use_middle_proxy = config.general.use_middle_proxy;
|
||||
let beobachten = Arc::new(BeobachtenStore::new());
|
||||
let rng = Arc::new(SecureRandom::new());
|
||||
|
||||
// Connection concurrency limit
|
||||
let max_connections = Arc::new(Semaphore::new(10_000));
|
||||
|
||||
let me2dc_fallback = config.general.me2dc_fallback;
|
||||
let me_init_retry_attempts = config.general.me_init_retry_attempts;
|
||||
if use_middle_proxy && !decision.ipv4_me && !decision.ipv6_me {
|
||||
if me2dc_fallback {
|
||||
warn!("No usable IP family for Middle Proxy detected; falling back to direct DC");
|
||||
use_middle_proxy = false;
|
||||
} else {
|
||||
warn!(
|
||||
"No usable IP family for Middle Proxy detected; me2dc_fallback=false, ME init retries stay active"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if use_middle_proxy {
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Initializing, COMPONENT_ME_SECRET_FETCH)
|
||||
.await;
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_ME_SECRET_FETCH,
|
||||
Some("fetch proxy-secret from source/cache".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_me_retry_limit(if !me2dc_fallback || me_init_retry_attempts == 0 {
|
||||
"unlimited".to_string()
|
||||
} else {
|
||||
me_init_retry_attempts.to_string()
|
||||
})
|
||||
.await;
|
||||
} else {
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Skipped, "skipped")
|
||||
.await;
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_SECRET_FETCH,
|
||||
Some("middle proxy mode disabled".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V4,
|
||||
Some("middle proxy mode disabled".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_PROXY_CONFIG_V6,
|
||||
Some("middle proxy mode disabled".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_POOL_CONSTRUCT,
|
||||
Some("middle proxy mode disabled".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_ME_POOL_INIT_STAGE1,
|
||||
Some("middle proxy mode disabled".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let me_pool: Option<Arc<MePool>> = me_startup::initialize_me_pool(
|
||||
use_middle_proxy,
|
||||
&config,
|
||||
&decision,
|
||||
&probe,
|
||||
&startup_tracker,
|
||||
upstream_manager.clone(),
|
||||
rng.clone(),
|
||||
stats.clone(),
|
||||
api_me_pool.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// If ME failed to initialize, force direct-only mode.
|
||||
if me_pool.is_some() {
|
||||
startup_tracker
|
||||
.set_transport_mode("middle_proxy")
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_degraded(false)
|
||||
.await;
|
||||
info!("Transport: Middle-End Proxy - all DC-over-RPC");
|
||||
} else {
|
||||
let _ = use_middle_proxy;
|
||||
use_middle_proxy = false;
|
||||
// Make runtime config reflect direct-only mode for handlers.
|
||||
config.general.use_middle_proxy = false;
|
||||
startup_tracker
|
||||
.set_transport_mode("direct")
|
||||
.await;
|
||||
startup_tracker
|
||||
.set_degraded(true)
|
||||
.await;
|
||||
if me2dc_fallback {
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Failed, "fallback_to_direct")
|
||||
.await;
|
||||
} else {
|
||||
startup_tracker
|
||||
.set_me_status(StartupMeStatus::Skipped, "skipped")
|
||||
.await;
|
||||
}
|
||||
info!("Transport: Direct DC - TCP - standard DC-over-TCP");
|
||||
}
|
||||
|
||||
// Freeze config after possible fallback decision
|
||||
let config = Arc::new(config);
|
||||
|
||||
let replay_checker = Arc::new(ReplayChecker::new(
|
||||
config.access.replay_check_len,
|
||||
Duration::from_secs(config.access.replay_window_secs),
|
||||
));
|
||||
|
||||
let buffer_pool = Arc::new(BufferPool::with_config(16 * 1024, 4096));
|
||||
|
||||
connectivity::run_startup_connectivity(
|
||||
&config,
|
||||
&me_pool,
|
||||
rng.clone(),
|
||||
&startup_tracker,
|
||||
upstream_manager.clone(),
|
||||
prefer_ipv6,
|
||||
&decision,
|
||||
process_started_at,
|
||||
api_me_pool.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let runtime_watches = runtime_tasks::spawn_runtime_tasks(
|
||||
&config,
|
||||
&config_path,
|
||||
&probe,
|
||||
prefer_ipv6,
|
||||
decision.ipv4_dc,
|
||||
decision.ipv6_dc,
|
||||
&startup_tracker,
|
||||
stats.clone(),
|
||||
upstream_manager.clone(),
|
||||
replay_checker.clone(),
|
||||
me_pool.clone(),
|
||||
rng.clone(),
|
||||
ip_tracker.clone(),
|
||||
beobachten.clone(),
|
||||
api_config_tx.clone(),
|
||||
me_pool.clone(),
|
||||
)
|
||||
.await;
|
||||
let config_rx = runtime_watches.config_rx;
|
||||
let log_level_rx = runtime_watches.log_level_rx;
|
||||
let detected_ip_v4 = runtime_watches.detected_ip_v4;
|
||||
let detected_ip_v6 = runtime_watches.detected_ip_v6;
|
||||
|
||||
admission::configure_admission_gate(
|
||||
&config,
|
||||
me_pool.clone(),
|
||||
route_runtime.clone(),
|
||||
&admission_tx,
|
||||
config_rx.clone(),
|
||||
)
|
||||
.await;
|
||||
let _admission_tx_hold = admission_tx;
|
||||
|
||||
let bound = listeners::bind_listeners(
|
||||
&config,
|
||||
decision.ipv4_dc,
|
||||
decision.ipv6_dc,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
&startup_tracker,
|
||||
config_rx.clone(),
|
||||
admission_rx.clone(),
|
||||
stats.clone(),
|
||||
upstream_manager.clone(),
|
||||
replay_checker.clone(),
|
||||
buffer_pool.clone(),
|
||||
rng.clone(),
|
||||
me_pool.clone(),
|
||||
route_runtime.clone(),
|
||||
tls_cache.clone(),
|
||||
ip_tracker.clone(),
|
||||
beobachten.clone(),
|
||||
max_connections.clone(),
|
||||
)
|
||||
.await?;
|
||||
let listeners = bound.listeners;
|
||||
let has_unix_listener = bound.has_unix_listener;
|
||||
|
||||
if listeners.is_empty() && !has_unix_listener {
|
||||
error!("No listeners. Exiting.");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
runtime_tasks::apply_runtime_log_filter(
|
||||
has_rust_log,
|
||||
&effective_log_level,
|
||||
filter_handle,
|
||||
log_level_rx,
|
||||
)
|
||||
.await;
|
||||
|
||||
runtime_tasks::spawn_metrics_if_configured(
|
||||
&config,
|
||||
&startup_tracker,
|
||||
stats.clone(),
|
||||
beobachten.clone(),
|
||||
ip_tracker.clone(),
|
||||
config_rx.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
runtime_tasks::mark_runtime_ready(&startup_tracker).await;
|
||||
|
||||
listeners::spawn_tcp_accept_loops(
|
||||
listeners,
|
||||
config_rx.clone(),
|
||||
admission_rx.clone(),
|
||||
stats.clone(),
|
||||
upstream_manager.clone(),
|
||||
replay_checker.clone(),
|
||||
buffer_pool.clone(),
|
||||
rng.clone(),
|
||||
me_pool.clone(),
|
||||
route_runtime.clone(),
|
||||
tls_cache.clone(),
|
||||
ip_tracker.clone(),
|
||||
beobachten.clone(),
|
||||
max_connections.clone(),
|
||||
);
|
||||
|
||||
shutdown::wait_for_shutdown(process_started_at, me_pool).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
317
src/maestro/runtime_tasks.rs
Normal file
317
src/maestro/runtime_tasks.rs
Normal file
@@ -0,0 +1,317 @@
|
||||
use std::net::IpAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use tokio::sync::{mpsc, watch};
|
||||
use tracing::{debug, warn};
|
||||
use tracing_subscriber::reload;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use crate::config::{LogLevel, ProxyConfig};
|
||||
use crate::config::hot_reload::spawn_config_watcher;
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::metrics;
|
||||
use crate::network::probe::NetworkProbe;
|
||||
use crate::startup::{COMPONENT_CONFIG_WATCHER_START, COMPONENT_METRICS_START, COMPONENT_RUNTIME_READY, StartupTracker};
|
||||
use crate::stats::beobachten::BeobachtenStore;
|
||||
use crate::stats::telemetry::TelemetryPolicy;
|
||||
use crate::stats::{ReplayChecker, Stats};
|
||||
use crate::transport::middle_proxy::{MePool, MeReinitTrigger};
|
||||
use crate::transport::UpstreamManager;
|
||||
|
||||
use super::helpers::write_beobachten_snapshot;
|
||||
|
||||
pub(crate) struct RuntimeWatches {
|
||||
pub(crate) config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
pub(crate) log_level_rx: watch::Receiver<LogLevel>,
|
||||
pub(crate) detected_ip_v4: Option<IpAddr>,
|
||||
pub(crate) detected_ip_v6: Option<IpAddr>,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn spawn_runtime_tasks(
|
||||
config: &Arc<ProxyConfig>,
|
||||
config_path: &str,
|
||||
probe: &NetworkProbe,
|
||||
prefer_ipv6: bool,
|
||||
decision_ipv4_dc: bool,
|
||||
decision_ipv6_dc: bool,
|
||||
startup_tracker: &Arc<StartupTracker>,
|
||||
stats: Arc<Stats>,
|
||||
upstream_manager: Arc<UpstreamManager>,
|
||||
replay_checker: Arc<ReplayChecker>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
rng: Arc<SecureRandom>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
api_config_tx: watch::Sender<Arc<ProxyConfig>>,
|
||||
me_pool_for_policy: Option<Arc<MePool>>,
|
||||
) -> RuntimeWatches {
|
||||
let um_clone = upstream_manager.clone();
|
||||
let dc_overrides_for_health = config.dc_overrides.clone();
|
||||
tokio::spawn(async move {
|
||||
um_clone
|
||||
.run_health_checks(
|
||||
prefer_ipv6,
|
||||
decision_ipv4_dc,
|
||||
decision_ipv6_dc,
|
||||
dc_overrides_for_health,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
let rc_clone = replay_checker.clone();
|
||||
tokio::spawn(async move {
|
||||
rc_clone.run_periodic_cleanup().await;
|
||||
});
|
||||
|
||||
let detected_ip_v4: Option<IpAddr> = probe.detected_ipv4.map(IpAddr::V4);
|
||||
let detected_ip_v6: Option<IpAddr> = probe.detected_ipv6.map(IpAddr::V6);
|
||||
debug!(
|
||||
"Detected IPs: v4={:?} v6={:?}",
|
||||
detected_ip_v4, detected_ip_v6
|
||||
);
|
||||
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_CONFIG_WATCHER_START,
|
||||
Some("spawn config hot-reload watcher".to_string()),
|
||||
)
|
||||
.await;
|
||||
let (config_rx, log_level_rx): (
|
||||
watch::Receiver<Arc<ProxyConfig>>,
|
||||
watch::Receiver<LogLevel>,
|
||||
) = spawn_config_watcher(
|
||||
PathBuf::from(config_path),
|
||||
config.clone(),
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
);
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_CONFIG_WATCHER_START,
|
||||
Some("config hot-reload watcher started".to_string()),
|
||||
)
|
||||
.await;
|
||||
let mut config_rx_api_bridge = config_rx.clone();
|
||||
let api_config_tx_bridge = api_config_tx.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
if config_rx_api_bridge.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
let cfg = config_rx_api_bridge.borrow_and_update().clone();
|
||||
api_config_tx_bridge.send_replace(cfg);
|
||||
}
|
||||
});
|
||||
|
||||
let stats_policy = stats.clone();
|
||||
let mut config_rx_policy = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
if config_rx_policy.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
let cfg = config_rx_policy.borrow_and_update().clone();
|
||||
stats_policy.apply_telemetry_policy(TelemetryPolicy::from_config(&cfg.general.telemetry));
|
||||
if let Some(pool) = &me_pool_for_policy {
|
||||
pool.update_runtime_transport_policy(
|
||||
cfg.general.me_socks_kdf_policy,
|
||||
cfg.general.me_route_backpressure_base_timeout_ms,
|
||||
cfg.general.me_route_backpressure_high_timeout_ms,
|
||||
cfg.general.me_route_backpressure_high_watermark_pct,
|
||||
cfg.general.me_reader_route_data_wait_ms,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let ip_tracker_policy = ip_tracker.clone();
|
||||
let mut config_rx_ip_limits = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut prev_limits = config_rx_ip_limits.borrow().access.user_max_unique_ips.clone();
|
||||
let mut prev_mode = config_rx_ip_limits.borrow().access.user_max_unique_ips_mode;
|
||||
let mut prev_window = config_rx_ip_limits
|
||||
.borrow()
|
||||
.access
|
||||
.user_max_unique_ips_window_secs;
|
||||
|
||||
loop {
|
||||
if config_rx_ip_limits.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
let cfg = config_rx_ip_limits.borrow_and_update().clone();
|
||||
|
||||
if prev_limits != cfg.access.user_max_unique_ips {
|
||||
ip_tracker_policy.load_limits(&cfg.access.user_max_unique_ips).await;
|
||||
prev_limits = cfg.access.user_max_unique_ips.clone();
|
||||
}
|
||||
|
||||
if prev_mode != cfg.access.user_max_unique_ips_mode
|
||||
|| prev_window != cfg.access.user_max_unique_ips_window_secs
|
||||
{
|
||||
ip_tracker_policy
|
||||
.set_limit_policy(
|
||||
cfg.access.user_max_unique_ips_mode,
|
||||
cfg.access.user_max_unique_ips_window_secs,
|
||||
)
|
||||
.await;
|
||||
prev_mode = cfg.access.user_max_unique_ips_mode;
|
||||
prev_window = cfg.access.user_max_unique_ips_window_secs;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let beobachten_writer = beobachten.clone();
|
||||
let config_rx_beobachten = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
let cfg = config_rx_beobachten.borrow().clone();
|
||||
let sleep_secs = cfg.general.beobachten_flush_secs.max(1);
|
||||
|
||||
if cfg.general.beobachten {
|
||||
let ttl = std::time::Duration::from_secs(cfg.general.beobachten_minutes.saturating_mul(60));
|
||||
let path = cfg.general.beobachten_file.clone();
|
||||
let snapshot = beobachten_writer.snapshot_text(ttl);
|
||||
if let Err(e) = write_beobachten_snapshot(&path, &snapshot).await {
|
||||
warn!(error = %e, path = %path, "Failed to flush beobachten snapshot");
|
||||
}
|
||||
}
|
||||
|
||||
tokio::time::sleep(std::time::Duration::from_secs(sleep_secs)).await;
|
||||
}
|
||||
});
|
||||
|
||||
if let Some(pool) = me_pool {
|
||||
let reinit_trigger_capacity = config.general.me_reinit_trigger_channel.max(1);
|
||||
let (reinit_tx, reinit_rx) = mpsc::channel::<MeReinitTrigger>(reinit_trigger_capacity);
|
||||
|
||||
let pool_clone_sched = pool.clone();
|
||||
let rng_clone_sched = rng.clone();
|
||||
let config_rx_clone_sched = config_rx.clone();
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_reinit_scheduler(
|
||||
pool_clone_sched,
|
||||
rng_clone_sched,
|
||||
config_rx_clone_sched,
|
||||
reinit_rx,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
let pool_clone = pool.clone();
|
||||
let config_rx_clone = config_rx.clone();
|
||||
let reinit_tx_updater = reinit_tx.clone();
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_config_updater(
|
||||
pool_clone,
|
||||
config_rx_clone,
|
||||
reinit_tx_updater,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
let config_rx_clone_rot = config_rx.clone();
|
||||
let reinit_tx_rotation = reinit_tx.clone();
|
||||
tokio::spawn(async move {
|
||||
crate::transport::middle_proxy::me_rotation_task(config_rx_clone_rot, reinit_tx_rotation)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
|
||||
RuntimeWatches {
|
||||
config_rx,
|
||||
log_level_rx,
|
||||
detected_ip_v4,
|
||||
detected_ip_v6,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn apply_runtime_log_filter(
|
||||
has_rust_log: bool,
|
||||
effective_log_level: &LogLevel,
|
||||
filter_handle: reload::Handle<EnvFilter, tracing_subscriber::Registry>,
|
||||
mut log_level_rx: watch::Receiver<LogLevel>,
|
||||
) {
|
||||
let runtime_filter = if has_rust_log {
|
||||
EnvFilter::from_default_env()
|
||||
} else if matches!(effective_log_level, LogLevel::Silent) {
|
||||
EnvFilter::new("warn,telemt::links=info")
|
||||
} else {
|
||||
EnvFilter::new(effective_log_level.to_filter_str())
|
||||
};
|
||||
filter_handle
|
||||
.reload(runtime_filter)
|
||||
.expect("Failed to switch log filter");
|
||||
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
if log_level_rx.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
let level = log_level_rx.borrow_and_update().clone();
|
||||
let new_filter = tracing_subscriber::EnvFilter::new(level.to_filter_str());
|
||||
if let Err(e) = filter_handle.reload(new_filter) {
|
||||
tracing::error!("config reload: failed to update log filter: {}", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
pub(crate) async fn spawn_metrics_if_configured(
|
||||
config: &Arc<ProxyConfig>,
|
||||
startup_tracker: &Arc<StartupTracker>,
|
||||
stats: Arc<Stats>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
config_rx: watch::Receiver<Arc<ProxyConfig>>,
|
||||
) {
|
||||
if let Some(port) = config.server.metrics_port {
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_METRICS_START,
|
||||
Some(format!("spawn metrics endpoint on {}", port)),
|
||||
)
|
||||
.await;
|
||||
let stats = stats.clone();
|
||||
let beobachten = beobachten.clone();
|
||||
let config_rx_metrics = config_rx.clone();
|
||||
let ip_tracker_metrics = ip_tracker.clone();
|
||||
let whitelist = config.server.metrics_whitelist.clone();
|
||||
tokio::spawn(async move {
|
||||
metrics::serve(
|
||||
port,
|
||||
stats,
|
||||
beobachten,
|
||||
ip_tracker_metrics,
|
||||
config_rx_metrics,
|
||||
whitelist,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_METRICS_START,
|
||||
Some("metrics task spawned".to_string()),
|
||||
)
|
||||
.await;
|
||||
} else {
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_METRICS_START,
|
||||
Some("server.metrics_port is not configured".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn mark_runtime_ready(startup_tracker: &Arc<StartupTracker>) {
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_RUNTIME_READY,
|
||||
Some("startup pipeline is fully initialized".to_string()),
|
||||
)
|
||||
.await;
|
||||
startup_tracker.mark_ready().await;
|
||||
}
|
||||
42
src/maestro/shutdown.rs
Normal file
42
src/maestro/shutdown.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use tokio::signal;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use crate::transport::middle_proxy::MePool;
|
||||
|
||||
use super::helpers::{format_uptime, unit_label};
|
||||
|
||||
pub(crate) async fn wait_for_shutdown(process_started_at: Instant, me_pool: Option<Arc<MePool>>) {
|
||||
match signal::ctrl_c().await {
|
||||
Ok(()) => {
|
||||
let shutdown_started_at = Instant::now();
|
||||
info!("Shutting down...");
|
||||
let uptime_secs = process_started_at.elapsed().as_secs();
|
||||
info!("Uptime: {}", format_uptime(uptime_secs));
|
||||
if let Some(pool) = &me_pool {
|
||||
match tokio::time::timeout(Duration::from_secs(2), pool.shutdown_send_close_conn_all())
|
||||
.await
|
||||
{
|
||||
Ok(total) => {
|
||||
info!(
|
||||
close_conn_sent = total,
|
||||
"ME shutdown: RPC_CLOSE_CONN broadcast completed"
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
warn!("ME shutdown: RPC_CLOSE_CONN broadcast timed out");
|
||||
}
|
||||
}
|
||||
}
|
||||
let shutdown_secs = shutdown_started_at.elapsed().as_secs();
|
||||
info!(
|
||||
"Shutdown completed successfully in {} {}.",
|
||||
shutdown_secs,
|
||||
unit_label(shutdown_secs, "second", "seconds")
|
||||
);
|
||||
}
|
||||
Err(e) => error!("Signal error: {}", e),
|
||||
}
|
||||
}
|
||||
165
src/maestro/tls_bootstrap.rs
Normal file
165
src/maestro/tls_bootstrap.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::Rng;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::startup::{COMPONENT_TLS_FRONT_BOOTSTRAP, StartupTracker};
|
||||
use crate::tls_front::TlsFrontCache;
|
||||
use crate::transport::UpstreamManager;
|
||||
|
||||
pub(crate) async fn bootstrap_tls_front(
|
||||
config: &ProxyConfig,
|
||||
tls_domains: &[String],
|
||||
upstream_manager: Arc<UpstreamManager>,
|
||||
startup_tracker: &Arc<StartupTracker>,
|
||||
) -> Option<Arc<TlsFrontCache>> {
|
||||
startup_tracker
|
||||
.start_component(
|
||||
COMPONENT_TLS_FRONT_BOOTSTRAP,
|
||||
Some("initialize TLS front cache/bootstrap tasks".to_string()),
|
||||
)
|
||||
.await;
|
||||
|
||||
let tls_cache: Option<Arc<TlsFrontCache>> = if config.censorship.tls_emulation {
|
||||
let cache = Arc::new(TlsFrontCache::new(
|
||||
tls_domains,
|
||||
config.censorship.fake_cert_len,
|
||||
&config.censorship.tls_front_dir,
|
||||
));
|
||||
cache.load_from_disk().await;
|
||||
|
||||
let port = config.censorship.mask_port;
|
||||
let proxy_protocol = config.censorship.mask_proxy_protocol;
|
||||
let mask_host = config
|
||||
.censorship
|
||||
.mask_host
|
||||
.clone()
|
||||
.unwrap_or_else(|| config.censorship.tls_domain.clone());
|
||||
let mask_unix_sock = config.censorship.mask_unix_sock.clone();
|
||||
let fetch_timeout = Duration::from_secs(5);
|
||||
|
||||
let cache_initial = cache.clone();
|
||||
let domains_initial = tls_domains.to_vec();
|
||||
let host_initial = mask_host.clone();
|
||||
let unix_sock_initial = mask_unix_sock.clone();
|
||||
let upstream_initial = upstream_manager.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut join = tokio::task::JoinSet::new();
|
||||
for domain in domains_initial {
|
||||
let cache_domain = cache_initial.clone();
|
||||
let host_domain = host_initial.clone();
|
||||
let unix_sock_domain = unix_sock_initial.clone();
|
||||
let upstream_domain = upstream_initial.clone();
|
||||
join.spawn(async move {
|
||||
match crate::tls_front::fetcher::fetch_real_tls(
|
||||
&host_domain,
|
||||
port,
|
||||
&domain,
|
||||
fetch_timeout,
|
||||
Some(upstream_domain),
|
||||
proxy_protocol,
|
||||
unix_sock_domain.as_deref(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(res) => cache_domain.update_from_fetch(&domain, res).await,
|
||||
Err(e) => {
|
||||
warn!(domain = %domain, error = %e, "TLS emulation initial fetch failed")
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
while let Some(res) = join.join_next().await {
|
||||
if let Err(e) = res {
|
||||
warn!(error = %e, "TLS emulation initial fetch task join failed");
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let cache_timeout = cache.clone();
|
||||
let domains_timeout = tls_domains.to_vec();
|
||||
let fake_cert_len = config.censorship.fake_cert_len;
|
||||
tokio::spawn(async move {
|
||||
tokio::time::sleep(fetch_timeout).await;
|
||||
for domain in domains_timeout {
|
||||
let cached = cache_timeout.get(&domain).await;
|
||||
if cached.domain == "default" {
|
||||
warn!(
|
||||
domain = %domain,
|
||||
timeout_secs = fetch_timeout.as_secs(),
|
||||
fake_cert_len,
|
||||
"TLS-front fetch not ready within timeout; using cache/default fake cert fallback"
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let cache_refresh = cache.clone();
|
||||
let domains_refresh = tls_domains.to_vec();
|
||||
let host_refresh = mask_host.clone();
|
||||
let unix_sock_refresh = mask_unix_sock.clone();
|
||||
let upstream_refresh = upstream_manager.clone();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
let base_secs = rand::rng().random_range(4 * 3600..=6 * 3600);
|
||||
let jitter_secs = rand::rng().random_range(0..=7200);
|
||||
tokio::time::sleep(Duration::from_secs(base_secs + jitter_secs)).await;
|
||||
|
||||
let mut join = tokio::task::JoinSet::new();
|
||||
for domain in domains_refresh.clone() {
|
||||
let cache_domain = cache_refresh.clone();
|
||||
let host_domain = host_refresh.clone();
|
||||
let unix_sock_domain = unix_sock_refresh.clone();
|
||||
let upstream_domain = upstream_refresh.clone();
|
||||
join.spawn(async move {
|
||||
match crate::tls_front::fetcher::fetch_real_tls(
|
||||
&host_domain,
|
||||
port,
|
||||
&domain,
|
||||
fetch_timeout,
|
||||
Some(upstream_domain),
|
||||
proxy_protocol,
|
||||
unix_sock_domain.as_deref(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(res) => cache_domain.update_from_fetch(&domain, res).await,
|
||||
Err(e) => {
|
||||
warn!(domain = %domain, error = %e, "TLS emulation refresh failed")
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
while let Some(res) = join.join_next().await {
|
||||
if let Err(e) = res {
|
||||
warn!(error = %e, "TLS emulation refresh task join failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Some(cache)
|
||||
} else {
|
||||
startup_tracker
|
||||
.skip_component(
|
||||
COMPONENT_TLS_FRONT_BOOTSTRAP,
|
||||
Some("censorship.tls_emulation is false".to_string()),
|
||||
)
|
||||
.await;
|
||||
None
|
||||
};
|
||||
|
||||
if tls_cache.is_some() {
|
||||
startup_tracker
|
||||
.complete_component(
|
||||
COMPONENT_TLS_FRONT_BOOTSTRAP,
|
||||
Some("tls front cache is initialized".to_string()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
tls_cache
|
||||
}
|
||||
1251
src/main.rs
1251
src/main.rs
File diff suppressed because it is too large
Load Diff
531
src/metrics.rs
531
src/metrics.rs
@@ -17,6 +17,7 @@ use crate::config::ProxyConfig;
|
||||
use crate::ip_tracker::UserIpTracker;
|
||||
use crate::stats::beobachten::BeobachtenStore;
|
||||
use crate::stats::Stats;
|
||||
use crate::transport::{ListenOptions, create_listener};
|
||||
|
||||
pub async fn serve(
|
||||
port: u16,
|
||||
@@ -26,16 +27,90 @@ pub async fn serve(
|
||||
config_rx: tokio::sync::watch::Receiver<Arc<ProxyConfig>>,
|
||||
whitelist: Vec<IpNetwork>,
|
||||
) {
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], port));
|
||||
let listener = match TcpListener::bind(addr).await {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
warn!(error = %e, "Failed to bind metrics on {}", addr);
|
||||
return;
|
||||
}
|
||||
};
|
||||
info!("Metrics endpoint: http://{}/metrics and /beobachten", addr);
|
||||
let whitelist = Arc::new(whitelist);
|
||||
let mut listener_v4 = None;
|
||||
let mut listener_v6 = None;
|
||||
|
||||
let addr_v4 = SocketAddr::from(([0, 0, 0, 0], port));
|
||||
match bind_metrics_listener(addr_v4, false) {
|
||||
Ok(listener) => {
|
||||
info!("Metrics endpoint: http://{}/metrics and /beobachten", addr_v4);
|
||||
listener_v4 = Some(listener);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "Failed to bind metrics on {}", addr_v4);
|
||||
}
|
||||
}
|
||||
|
||||
let addr_v6 = SocketAddr::from(([0, 0, 0, 0, 0, 0, 0, 0], port));
|
||||
match bind_metrics_listener(addr_v6, true) {
|
||||
Ok(listener) => {
|
||||
info!("Metrics endpoint: http://[::]:{}/metrics and /beobachten", port);
|
||||
listener_v6 = Some(listener);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "Failed to bind metrics on {}", addr_v6);
|
||||
}
|
||||
}
|
||||
|
||||
match (listener_v4, listener_v6) {
|
||||
(None, None) => {
|
||||
warn!("Metrics listener is unavailable on both IPv4 and IPv6");
|
||||
}
|
||||
(Some(listener), None) | (None, Some(listener)) => {
|
||||
serve_listener(
|
||||
listener, stats, beobachten, ip_tracker, config_rx, whitelist,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Some(listener4), Some(listener6)) => {
|
||||
let stats_v6 = stats.clone();
|
||||
let beobachten_v6 = beobachten.clone();
|
||||
let ip_tracker_v6 = ip_tracker.clone();
|
||||
let config_rx_v6 = config_rx.clone();
|
||||
let whitelist_v6 = whitelist.clone();
|
||||
tokio::spawn(async move {
|
||||
serve_listener(
|
||||
listener6,
|
||||
stats_v6,
|
||||
beobachten_v6,
|
||||
ip_tracker_v6,
|
||||
config_rx_v6,
|
||||
whitelist_v6,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
serve_listener(
|
||||
listener4,
|
||||
stats,
|
||||
beobachten,
|
||||
ip_tracker,
|
||||
config_rx,
|
||||
whitelist,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn bind_metrics_listener(addr: SocketAddr, ipv6_only: bool) -> std::io::Result<TcpListener> {
|
||||
let options = ListenOptions {
|
||||
reuse_port: false,
|
||||
ipv6_only,
|
||||
..Default::default()
|
||||
};
|
||||
let socket = create_listener(addr, &options)?;
|
||||
TcpListener::from_std(socket.into())
|
||||
}
|
||||
|
||||
async fn serve_listener(
|
||||
listener: TcpListener,
|
||||
stats: Arc<Stats>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
config_rx: tokio::sync::watch::Receiver<Arc<ProxyConfig>>,
|
||||
whitelist: Arc<Vec<IpNetwork>>,
|
||||
) {
|
||||
loop {
|
||||
let (stream, peer) = match listener.accept().await {
|
||||
Ok(v) => v,
|
||||
@@ -689,6 +764,135 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writer_pick_total ME writer-pick outcomes by mode and result"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_writer_pick_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"sorted_rr\",result=\"success_try\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_sorted_rr_success_try_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"sorted_rr\",result=\"success_fallback\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_sorted_rr_success_fallback_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"sorted_rr\",result=\"full\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_sorted_rr_full_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"sorted_rr\",result=\"closed\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_sorted_rr_closed_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"sorted_rr\",result=\"no_candidate\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_sorted_rr_no_candidate_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"p2c\",result=\"success_try\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_p2c_success_try_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"p2c\",result=\"success_fallback\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_p2c_success_fallback_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"p2c\",result=\"full\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_p2c_full_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"p2c\",result=\"closed\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_p2c_closed_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_total{{mode=\"p2c\",result=\"no_candidate\"}} {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_p2c_no_candidate_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writer_pick_blocking_fallback_total ME writer-pick blocking fallback attempts"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_writer_pick_blocking_fallback_total counter"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_blocking_fallback_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_blocking_fallback_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writer_pick_mode_switch_total Writer-pick mode switches via runtime updates"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_writer_pick_mode_switch_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writer_pick_mode_switch_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writer_pick_mode_switch_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_socks_kdf_policy_total SOCKS KDF policy outcomes"
|
||||
@@ -968,6 +1172,229 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_cpu_cores_detected Runtime detected logical CPU cores for adaptive floor"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_cpu_cores_detected gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_cpu_cores_detected {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_cpu_cores_detected_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_cpu_cores_effective Runtime effective logical CPU cores for adaptive floor"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_cpu_cores_effective gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_cpu_cores_effective {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_cpu_cores_effective_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_global_cap_raw Runtime raw global adaptive floor cap"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_global_cap_raw gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_global_cap_raw {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_global_cap_raw_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_global_cap_effective Runtime effective global adaptive floor cap"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_global_cap_effective gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_global_cap_effective {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_global_cap_effective_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_target_writers_total Runtime adaptive floor target writers total"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_target_writers_total gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_target_writers_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_target_writers_total_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_active_cap_configured Runtime configured active writer cap"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_active_cap_configured gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_active_cap_configured {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_active_cap_configured_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_active_cap_effective Runtime effective active writer cap"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_active_cap_effective gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_active_cap_effective {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_active_cap_effective_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_warm_cap_configured Runtime configured warm writer cap"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_warm_cap_configured gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_warm_cap_configured {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_warm_cap_configured_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_adaptive_floor_warm_cap_effective Runtime effective warm writer cap"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# TYPE telemt_me_adaptive_floor_warm_cap_effective gauge"
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_adaptive_floor_warm_cap_effective {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_warm_cap_effective_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writers_active_current Current non-draining active ME writers"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_writers_active_current gauge");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writers_active_current {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writers_active_current_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_writers_warm_current Current non-draining warm ME writers"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_writers_warm_current gauge");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_writers_warm_current {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_writers_warm_current_gauge()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_floor_cap_block_total Reconnect attempts blocked by adaptive floor caps"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_floor_cap_block_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_floor_cap_block_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_cap_block_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_floor_swap_idle_total Adaptive floor cap recovery via idle writer swap"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_floor_swap_idle_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_floor_swap_idle_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_swap_idle_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_floor_swap_idle_failed_total Failed idle swap attempts under adaptive floor caps"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_floor_swap_idle_failed_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_floor_swap_idle_failed_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_floor_swap_idle_failed_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_secure_padding_invalid_total Invalid secure frame lengths");
|
||||
let _ = writeln!(out, "# TYPE telemt_secure_padding_invalid_total counter");
|
||||
@@ -1199,6 +1626,48 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_no_writer_failfast_total ME route failfast errors due to missing writer in bounded wait window"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_no_writer_failfast_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_no_writer_failfast_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_no_writer_failfast_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_async_recovery_trigger_total Async ME recovery trigger attempts from route path"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_async_recovery_trigger_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_async_recovery_trigger_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_async_recovery_trigger_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_me_inline_recovery_total Legacy inline ME recovery attempts from route path"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_me_inline_recovery_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_me_inline_recovery_total {}",
|
||||
if me_allows_normal {
|
||||
stats.get_me_inline_recovery_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
|
||||
let unresolved_writer_losses = if me_allows_normal {
|
||||
stats
|
||||
@@ -1237,6 +1706,29 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
let _ = writeln!(out, "# TYPE telemt_user_msgs_from_client counter");
|
||||
let _ = writeln!(out, "# HELP telemt_user_msgs_to_client Per-user messages sent");
|
||||
let _ = writeln!(out, "# TYPE telemt_user_msgs_to_client counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_ip_reservation_rollback_total IP reservation rollbacks caused by later limit checks"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_ip_reservation_rollback_total counter");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_ip_reservation_rollback_total{{reason=\"tcp_limit\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_ip_reservation_rollback_tcp_limit_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_ip_reservation_rollback_total{{reason=\"quota_limit\"}} {}",
|
||||
if core_enabled {
|
||||
stats.get_ip_reservation_rollback_quota_limit_total()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_telemetry_user_series_suppressed User-labeled metric series suppression flag"
|
||||
@@ -1267,11 +1759,21 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
.collect();
|
||||
|
||||
let mut unique_users = BTreeSet::new();
|
||||
unique_users.extend(config.access.users.keys().cloned());
|
||||
unique_users.extend(config.access.user_max_unique_ips.keys().cloned());
|
||||
unique_users.extend(ip_counts.keys().cloned());
|
||||
let unique_users_vec: Vec<String> = unique_users.iter().cloned().collect();
|
||||
let recent_counts = ip_tracker
|
||||
.get_recent_counts_for_users(&unique_users_vec)
|
||||
.await;
|
||||
|
||||
let _ = writeln!(out, "# HELP telemt_user_unique_ips_current Per-user current number of unique active IPs");
|
||||
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_current gauge");
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"# HELP telemt_user_unique_ips_recent_window Per-user unique IPs seen in configured observation window"
|
||||
);
|
||||
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_recent_window gauge");
|
||||
let _ = writeln!(out, "# HELP telemt_user_unique_ips_limit Per-user configured unique IP limit (0 means unlimited)");
|
||||
let _ = writeln!(out, "# TYPE telemt_user_unique_ips_limit gauge");
|
||||
let _ = writeln!(out, "# HELP telemt_user_unique_ips_utilization Per-user unique IP usage ratio (0 for unlimited)");
|
||||
@@ -1286,6 +1788,12 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
|
||||
0.0
|
||||
};
|
||||
let _ = writeln!(out, "telemt_user_unique_ips_current{{user=\"{}\"}} {}", user, current);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"telemt_user_unique_ips_recent_window{{user=\"{}\"}} {}",
|
||||
user,
|
||||
recent_counts.get(&user).copied().unwrap_or(0)
|
||||
);
|
||||
let _ = writeln!(out, "telemt_user_unique_ips_limit{{user=\"{}\"}} {}", user, limit);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
@@ -1378,6 +1886,7 @@ mod tests {
|
||||
assert!(output.contains("telemt_user_msgs_from_client{user=\"alice\"} 1"));
|
||||
assert!(output.contains("telemt_user_msgs_to_client{user=\"alice\"} 2"));
|
||||
assert!(output.contains("telemt_user_unique_ips_current{user=\"alice\"} 1"));
|
||||
assert!(output.contains("telemt_user_unique_ips_recent_window{user=\"alice\"} 1"));
|
||||
assert!(output.contains("telemt_user_unique_ips_limit{user=\"alice\"} 4"));
|
||||
assert!(output.contains("telemt_user_unique_ips_utilization{user=\"alice\"} 0.250000"));
|
||||
}
|
||||
@@ -1391,7 +1900,8 @@ mod tests {
|
||||
assert!(output.contains("telemt_connections_total 0"));
|
||||
assert!(output.contains("telemt_connections_bad_total 0"));
|
||||
assert!(output.contains("telemt_handshake_timeouts_total 0"));
|
||||
assert!(!output.contains("user="));
|
||||
assert!(output.contains("telemt_user_unique_ips_current{user="));
|
||||
assert!(output.contains("telemt_user_unique_ips_recent_window{user="));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1412,6 +1922,7 @@ mod tests {
|
||||
"# TYPE telemt_me_writer_removed_unexpected_minus_restored_total gauge"
|
||||
));
|
||||
assert!(output.contains("# TYPE telemt_user_unique_ips_current gauge"));
|
||||
assert!(output.contains("# TYPE telemt_user_unique_ips_recent_window gauge"));
|
||||
assert!(output.contains("# TYPE telemt_user_unique_ips_limit gauge"));
|
||||
assert!(output.contains("# TYPE telemt_user_unique_ips_utilization gauge"));
|
||||
}
|
||||
|
||||
@@ -280,6 +280,14 @@ fn detect_local_ip_v6() -> Option<Ipv6Addr> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn detect_interface_ipv4() -> Option<Ipv4Addr> {
|
||||
detect_local_ip_v4()
|
||||
}
|
||||
|
||||
pub fn detect_interface_ipv6() -> Option<Ipv6Addr> {
|
||||
detect_local_ip_v6()
|
||||
}
|
||||
|
||||
pub fn is_bogon(ip: IpAddr) -> bool {
|
||||
match ip {
|
||||
IpAddr::V4(v4) => is_bogon_v4(v4),
|
||||
|
||||
@@ -39,6 +39,7 @@ use crate::proxy::direct_relay::handle_via_direct;
|
||||
use crate::proxy::handshake::{HandshakeSuccess, handle_mtproto_handshake, handle_tls_handshake};
|
||||
use crate::proxy::masking::handle_bad_client;
|
||||
use crate::proxy::middle_relay::handle_via_middle_proxy;
|
||||
use crate::proxy::route_mode::{RelayRouteMode, RouteRuntimeController};
|
||||
|
||||
fn beobachten_ttl(config: &ProxyConfig) -> Duration {
|
||||
Duration::from_secs(config.general.beobachten_minutes.saturating_mul(60))
|
||||
@@ -80,6 +81,7 @@ pub async fn handle_client_stream<S>(
|
||||
buffer_pool: Arc<BufferPool>,
|
||||
rng: Arc<SecureRandom>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
route_runtime: Arc<RouteRuntimeController>,
|
||||
tls_cache: Option<Arc<TlsFrontCache>>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
@@ -97,8 +99,11 @@ where
|
||||
.unwrap_or_else(|_| "0.0.0.0:443".parse().unwrap());
|
||||
|
||||
if proxy_protocol_enabled {
|
||||
match parse_proxy_protocol(&mut stream, peer).await {
|
||||
Ok(info) => {
|
||||
let proxy_header_timeout = Duration::from_millis(
|
||||
config.server.proxy_protocol_header_timeout_ms.max(1),
|
||||
);
|
||||
match timeout(proxy_header_timeout, parse_proxy_protocol(&mut stream, peer)).await {
|
||||
Ok(Ok(info)) => {
|
||||
debug!(
|
||||
peer = %peer,
|
||||
client = %info.src_addr,
|
||||
@@ -110,12 +115,18 @@ where
|
||||
local_addr = dst;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
Ok(Err(e)) => {
|
||||
stats.increment_connects_bad();
|
||||
warn!(peer = %peer, error = %e, "Invalid PROXY protocol header");
|
||||
record_beobachten_class(&beobachten, &config, peer.ip(), "other");
|
||||
return Err(e);
|
||||
}
|
||||
Err(_) => {
|
||||
stats.increment_connects_bad();
|
||||
warn!(peer = %peer, timeout_ms = proxy_header_timeout.as_millis(), "PROXY protocol header timeout");
|
||||
record_beobachten_class(&beobachten, &config, peer.ip(), "other");
|
||||
return Err(ProxyError::InvalidProxyProtocol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,7 +172,7 @@ where
|
||||
|
||||
let (read_half, write_half) = tokio::io::split(stream);
|
||||
|
||||
let (mut tls_reader, tls_writer, _tls_user) = match handle_tls_handshake(
|
||||
let (mut tls_reader, tls_writer, tls_user) = match handle_tls_handshake(
|
||||
&handshake, read_half, write_half, real_peer,
|
||||
&config, &replay_checker, &rng, tls_cache.clone(),
|
||||
).await {
|
||||
@@ -190,7 +201,7 @@ where
|
||||
|
||||
let (crypto_reader, crypto_writer, success) = match handle_mtproto_handshake(
|
||||
&mtproto_handshake, tls_reader, tls_writer, real_peer,
|
||||
&config, &replay_checker, true,
|
||||
&config, &replay_checker, true, Some(tls_user.as_str()),
|
||||
).await {
|
||||
HandshakeResult::Success(result) => result,
|
||||
HandshakeResult::BadClient { reader: _, writer: _ } => {
|
||||
@@ -205,6 +216,7 @@ where
|
||||
RunningClientHandler::handle_authenticated_static(
|
||||
crypto_reader, crypto_writer, success,
|
||||
upstream_manager, stats, config, buffer_pool, rng, me_pool,
|
||||
route_runtime.clone(),
|
||||
local_addr, real_peer, ip_tracker.clone(),
|
||||
),
|
||||
)))
|
||||
@@ -234,7 +246,7 @@ where
|
||||
|
||||
let (crypto_reader, crypto_writer, success) = match handle_mtproto_handshake(
|
||||
&handshake, read_half, write_half, real_peer,
|
||||
&config, &replay_checker, false,
|
||||
&config, &replay_checker, false, None,
|
||||
).await {
|
||||
HandshakeResult::Success(result) => result,
|
||||
HandshakeResult::BadClient { reader, writer } => {
|
||||
@@ -265,6 +277,7 @@ where
|
||||
buffer_pool,
|
||||
rng,
|
||||
me_pool,
|
||||
route_runtime.clone(),
|
||||
local_addr,
|
||||
real_peer,
|
||||
ip_tracker.clone(),
|
||||
@@ -308,6 +321,8 @@ pub struct ClientHandler;
|
||||
pub struct RunningClientHandler {
|
||||
stream: TcpStream,
|
||||
peer: SocketAddr,
|
||||
real_peer_from_proxy: Option<SocketAddr>,
|
||||
real_peer_report: Arc<std::sync::Mutex<Option<SocketAddr>>>,
|
||||
config: Arc<ProxyConfig>,
|
||||
stats: Arc<Stats>,
|
||||
replay_checker: Arc<ReplayChecker>,
|
||||
@@ -315,6 +330,7 @@ pub struct RunningClientHandler {
|
||||
buffer_pool: Arc<BufferPool>,
|
||||
rng: Arc<SecureRandom>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
route_runtime: Arc<RouteRuntimeController>,
|
||||
tls_cache: Option<Arc<TlsFrontCache>>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
@@ -332,14 +348,19 @@ impl ClientHandler {
|
||||
buffer_pool: Arc<BufferPool>,
|
||||
rng: Arc<SecureRandom>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
route_runtime: Arc<RouteRuntimeController>,
|
||||
tls_cache: Option<Arc<TlsFrontCache>>,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
beobachten: Arc<BeobachtenStore>,
|
||||
proxy_protocol_enabled: bool,
|
||||
real_peer_report: Arc<std::sync::Mutex<Option<SocketAddr>>>,
|
||||
) -> RunningClientHandler {
|
||||
let normalized_peer = normalize_ip(peer);
|
||||
RunningClientHandler {
|
||||
stream,
|
||||
peer,
|
||||
peer: normalized_peer,
|
||||
real_peer_from_proxy: None,
|
||||
real_peer_report,
|
||||
config,
|
||||
stats,
|
||||
replay_checker,
|
||||
@@ -347,6 +368,7 @@ impl ClientHandler {
|
||||
buffer_pool,
|
||||
rng,
|
||||
me_pool,
|
||||
route_runtime,
|
||||
tls_cache,
|
||||
ip_tracker,
|
||||
beobachten,
|
||||
@@ -356,10 +378,8 @@ impl ClientHandler {
|
||||
}
|
||||
|
||||
impl RunningClientHandler {
|
||||
pub async fn run(mut self) -> Result<()> {
|
||||
pub async fn run(self) -> Result<()> {
|
||||
self.stats.increment_connects_all();
|
||||
|
||||
self.peer = normalize_ip(self.peer);
|
||||
let peer = self.peer;
|
||||
let _ip_tracker = self.ip_tracker.clone();
|
||||
debug!(peer = %peer, "New connection");
|
||||
@@ -415,8 +435,16 @@ impl RunningClientHandler {
|
||||
let mut local_addr = self.stream.local_addr().map_err(ProxyError::Io)?;
|
||||
|
||||
if self.proxy_protocol_enabled {
|
||||
match parse_proxy_protocol(&mut self.stream, self.peer).await {
|
||||
Ok(info) => {
|
||||
let proxy_header_timeout = Duration::from_millis(
|
||||
self.config.server.proxy_protocol_header_timeout_ms.max(1),
|
||||
);
|
||||
match timeout(
|
||||
proxy_header_timeout,
|
||||
parse_proxy_protocol(&mut self.stream, self.peer),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(info)) => {
|
||||
debug!(
|
||||
peer = %self.peer,
|
||||
client = %info.src_addr,
|
||||
@@ -424,11 +452,15 @@ impl RunningClientHandler {
|
||||
"PROXY protocol header parsed"
|
||||
);
|
||||
self.peer = normalize_ip(info.src_addr);
|
||||
self.real_peer_from_proxy = Some(self.peer);
|
||||
if let Ok(mut slot) = self.real_peer_report.lock() {
|
||||
*slot = Some(self.peer);
|
||||
}
|
||||
if let Some(dst) = info.dst_addr {
|
||||
local_addr = dst;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
Ok(Err(e)) => {
|
||||
self.stats.increment_connects_bad();
|
||||
warn!(peer = %self.peer, error = %e, "Invalid PROXY protocol header");
|
||||
record_beobachten_class(
|
||||
@@ -439,6 +471,21 @@ impl RunningClientHandler {
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
Err(_) => {
|
||||
self.stats.increment_connects_bad();
|
||||
warn!(
|
||||
peer = %self.peer,
|
||||
timeout_ms = proxy_header_timeout.as_millis(),
|
||||
"PROXY protocol header timeout"
|
||||
);
|
||||
record_beobachten_class(
|
||||
&self.beobachten,
|
||||
&self.config,
|
||||
self.peer.ip(),
|
||||
"other",
|
||||
);
|
||||
return Err(ProxyError::InvalidProxyProtocol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -494,7 +541,7 @@ impl RunningClientHandler {
|
||||
|
||||
let (read_half, write_half) = self.stream.into_split();
|
||||
|
||||
let (mut tls_reader, tls_writer, _tls_user) = match handle_tls_handshake(
|
||||
let (mut tls_reader, tls_writer, tls_user) = match handle_tls_handshake(
|
||||
&handshake,
|
||||
read_half,
|
||||
write_half,
|
||||
@@ -538,6 +585,7 @@ impl RunningClientHandler {
|
||||
&config,
|
||||
&replay_checker,
|
||||
true,
|
||||
Some(tls_user.as_str()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -564,6 +612,7 @@ impl RunningClientHandler {
|
||||
buffer_pool,
|
||||
self.rng,
|
||||
self.me_pool,
|
||||
self.route_runtime.clone(),
|
||||
local_addr,
|
||||
peer,
|
||||
self.ip_tracker,
|
||||
@@ -611,6 +660,7 @@ impl RunningClientHandler {
|
||||
&config,
|
||||
&replay_checker,
|
||||
false,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -643,6 +693,7 @@ impl RunningClientHandler {
|
||||
buffer_pool,
|
||||
self.rng,
|
||||
self.me_pool,
|
||||
self.route_runtime.clone(),
|
||||
local_addr,
|
||||
peer,
|
||||
self.ip_tracker,
|
||||
@@ -664,6 +715,7 @@ impl RunningClientHandler {
|
||||
buffer_pool: Arc<BufferPool>,
|
||||
rng: Arc<SecureRandom>,
|
||||
me_pool: Option<Arc<MePool>>,
|
||||
route_runtime: Arc<RouteRuntimeController>,
|
||||
local_addr: SocketAddr,
|
||||
peer_addr: SocketAddr,
|
||||
ip_tracker: Arc<UserIpTracker>,
|
||||
@@ -672,42 +724,20 @@ impl RunningClientHandler {
|
||||
R: AsyncRead + Unpin + Send + 'static,
|
||||
W: AsyncWrite + Unpin + Send + 'static,
|
||||
{
|
||||
let user = &success.user;
|
||||
let user = success.user.clone();
|
||||
|
||||
if let Err(e) = Self::check_user_limits_static(user, &config, &stats, peer_addr, &ip_tracker).await {
|
||||
if let Err(e) = Self::check_user_limits_static(&user, &config, &stats, peer_addr, &ip_tracker).await {
|
||||
warn!(user = %user, error = %e, "User limit exceeded");
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
// IP Cleanup Guard: автоматически удаляет IP при выходе из scope
|
||||
struct IpCleanupGuard {
|
||||
tracker: Arc<UserIpTracker>,
|
||||
user: String,
|
||||
ip: std::net::IpAddr,
|
||||
}
|
||||
|
||||
impl Drop for IpCleanupGuard {
|
||||
fn drop(&mut self) {
|
||||
let tracker = self.tracker.clone();
|
||||
let user = self.user.clone();
|
||||
let ip = self.ip;
|
||||
tokio::spawn(async move {
|
||||
tracker.remove_ip(&user, ip).await;
|
||||
debug!(user = %user, ip = %ip, "IP cleaned up on disconnect");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let _cleanup = IpCleanupGuard {
|
||||
tracker: ip_tracker,
|
||||
user: user.clone(),
|
||||
ip: peer_addr.ip(),
|
||||
};
|
||||
|
||||
// Decide: middle proxy or direct
|
||||
if config.general.use_middle_proxy {
|
||||
let route_snapshot = route_runtime.snapshot();
|
||||
let session_id = rng.u64();
|
||||
let relay_result = if config.general.use_middle_proxy
|
||||
&& matches!(route_snapshot.mode, RelayRouteMode::Middle)
|
||||
{
|
||||
if let Some(ref pool) = me_pool {
|
||||
return handle_via_middle_proxy(
|
||||
handle_via_middle_proxy(
|
||||
client_reader,
|
||||
client_writer,
|
||||
success,
|
||||
@@ -717,24 +747,48 @@ impl RunningClientHandler {
|
||||
buffer_pool,
|
||||
local_addr,
|
||||
rng,
|
||||
route_runtime.subscribe(),
|
||||
route_snapshot,
|
||||
session_id,
|
||||
)
|
||||
.await;
|
||||
.await
|
||||
} else {
|
||||
warn!("use_middle_proxy=true but MePool not initialized, falling back to direct");
|
||||
handle_via_direct(
|
||||
client_reader,
|
||||
client_writer,
|
||||
success,
|
||||
upstream_manager,
|
||||
stats,
|
||||
config,
|
||||
buffer_pool,
|
||||
rng,
|
||||
route_runtime.subscribe(),
|
||||
route_snapshot,
|
||||
session_id,
|
||||
)
|
||||
.await
|
||||
}
|
||||
warn!("use_middle_proxy=true but MePool not initialized, falling back to direct");
|
||||
}
|
||||
} else {
|
||||
// Direct mode (original behavior)
|
||||
handle_via_direct(
|
||||
client_reader,
|
||||
client_writer,
|
||||
success,
|
||||
upstream_manager,
|
||||
stats,
|
||||
config,
|
||||
buffer_pool,
|
||||
rng,
|
||||
route_runtime.subscribe(),
|
||||
route_snapshot,
|
||||
session_id,
|
||||
)
|
||||
.await
|
||||
};
|
||||
|
||||
// Direct mode (original behavior)
|
||||
handle_via_direct(
|
||||
client_reader,
|
||||
client_writer,
|
||||
success,
|
||||
upstream_manager,
|
||||
stats,
|
||||
config,
|
||||
buffer_pool,
|
||||
rng,
|
||||
)
|
||||
.await
|
||||
ip_tracker.remove_ip(&user, peer_addr.ip()).await;
|
||||
relay_result
|
||||
}
|
||||
|
||||
async fn check_user_limits_static(
|
||||
@@ -752,22 +806,29 @@ impl RunningClientHandler {
|
||||
});
|
||||
}
|
||||
|
||||
let ip_reserved = match ip_tracker.check_and_add(user, peer_addr.ip()).await {
|
||||
Ok(()) => true,
|
||||
Err(reason) => {
|
||||
warn!(
|
||||
user = %user,
|
||||
ip = %peer_addr.ip(),
|
||||
reason = %reason,
|
||||
"IP limit exceeded"
|
||||
);
|
||||
return Err(ProxyError::ConnectionLimitExceeded {
|
||||
user: user.to_string(),
|
||||
});
|
||||
}
|
||||
};
|
||||
// IP limit check
|
||||
if let Err(reason) = ip_tracker.check_and_add(user, peer_addr.ip()).await {
|
||||
warn!(
|
||||
user = %user,
|
||||
ip = %peer_addr.ip(),
|
||||
reason = %reason,
|
||||
"IP limit exceeded"
|
||||
);
|
||||
return Err(ProxyError::ConnectionLimitExceeded {
|
||||
user: user.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(limit) = config.access.user_max_tcp_conns.get(user)
|
||||
&& stats.get_user_curr_connects(user) >= *limit as u64
|
||||
{
|
||||
if ip_reserved {
|
||||
ip_tracker.remove_ip(user, peer_addr.ip()).await;
|
||||
stats.increment_ip_reservation_rollback_tcp_limit_total();
|
||||
}
|
||||
return Err(ProxyError::ConnectionLimitExceeded {
|
||||
user: user.to_string(),
|
||||
});
|
||||
@@ -776,6 +837,10 @@ impl RunningClientHandler {
|
||||
if let Some(quota) = config.access.user_data_quota.get(user)
|
||||
&& stats.get_user_total_octets(user) >= *quota
|
||||
{
|
||||
if ip_reserved {
|
||||
ip_tracker.remove_ip(user, peer_addr.ip()).await;
|
||||
stats.increment_ip_reservation_rollback_quota_limit_total();
|
||||
}
|
||||
return Err(ProxyError::DataQuotaExceeded {
|
||||
user: user.to_string(),
|
||||
});
|
||||
|
||||
@@ -5,14 +5,19 @@ use std::sync::Arc;
|
||||
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::watch;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::error::Result;
|
||||
use crate::error::{ProxyError, Result};
|
||||
use crate::protocol::constants::*;
|
||||
use crate::proxy::handshake::{HandshakeSuccess, encrypt_tg_nonce_with_ciphers, generate_tg_nonce};
|
||||
use crate::proxy::relay::relay_bidirectional;
|
||||
use crate::proxy::route_mode::{
|
||||
RelayRouteMode, RouteCutoverState, ROUTE_SWITCH_ERROR_MSG, affected_cutover_state,
|
||||
cutover_stagger_delay,
|
||||
};
|
||||
use crate::stats::Stats;
|
||||
use crate::stream::{BufferPool, CryptoReader, CryptoWriter};
|
||||
use crate::transport::UpstreamManager;
|
||||
@@ -26,6 +31,9 @@ pub(crate) async fn handle_via_direct<R, W>(
|
||||
config: Arc<ProxyConfig>,
|
||||
buffer_pool: Arc<BufferPool>,
|
||||
rng: Arc<SecureRandom>,
|
||||
mut route_rx: watch::Receiver<RouteCutoverState>,
|
||||
route_snapshot: RouteCutoverState,
|
||||
session_id: u64,
|
||||
) -> Result<()>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + 'static,
|
||||
@@ -34,7 +42,7 @@ where
|
||||
let user = &success.user;
|
||||
let dc_addr = get_dc_addr_static(success.dc_idx, &config)?;
|
||||
|
||||
info!(
|
||||
debug!(
|
||||
user = %user,
|
||||
peer = %success.peer,
|
||||
dc = success.dc_idx,
|
||||
@@ -57,18 +65,50 @@ where
|
||||
|
||||
stats.increment_user_connects(user);
|
||||
stats.increment_user_curr_connects(user);
|
||||
stats.increment_current_connections_direct();
|
||||
|
||||
let relay_result = relay_bidirectional(
|
||||
client_reader,
|
||||
client_writer,
|
||||
tg_reader,
|
||||
tg_writer,
|
||||
config.general.direct_relay_copy_buf_c2s_bytes,
|
||||
config.general.direct_relay_copy_buf_s2c_bytes,
|
||||
user,
|
||||
Arc::clone(&stats),
|
||||
buffer_pool,
|
||||
)
|
||||
.await;
|
||||
);
|
||||
tokio::pin!(relay_result);
|
||||
let relay_result = loop {
|
||||
if let Some(cutover) = affected_cutover_state(
|
||||
&route_rx,
|
||||
RelayRouteMode::Direct,
|
||||
route_snapshot.generation,
|
||||
) {
|
||||
let delay = cutover_stagger_delay(session_id, cutover.generation);
|
||||
warn!(
|
||||
user = %user,
|
||||
target_mode = cutover.mode.as_str(),
|
||||
cutover_generation = cutover.generation,
|
||||
delay_ms = delay.as_millis() as u64,
|
||||
"Cutover affected direct session, closing client connection"
|
||||
);
|
||||
tokio::time::sleep(delay).await;
|
||||
break Err(ProxyError::Proxy(ROUTE_SWITCH_ERROR_MSG.to_string()));
|
||||
}
|
||||
tokio::select! {
|
||||
result = &mut relay_result => {
|
||||
break result;
|
||||
}
|
||||
changed = route_rx.changed() => {
|
||||
if changed.is_err() {
|
||||
break relay_result.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
stats.decrement_current_connections_direct();
|
||||
stats.decrement_user_curr_connects(user);
|
||||
|
||||
match &relay_result {
|
||||
@@ -118,10 +158,16 @@ fn get_dc_addr_static(dc_idx: i16, config: &ProxyConfig) -> Result<SocketAddr> {
|
||||
// Unknown DC requested by client without override: log and fall back.
|
||||
if !config.dc_overrides.contains_key(&dc_key) {
|
||||
warn!(dc_idx = dc_idx, "Requested non-standard DC with no override; falling back to default cluster");
|
||||
if let Some(path) = &config.general.unknown_dc_log_path
|
||||
&& let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path)
|
||||
if config.general.unknown_dc_file_log_enabled
|
||||
&& let Some(path) = &config.general.unknown_dc_log_path
|
||||
&& let Ok(handle) = tokio::runtime::Handle::try_current()
|
||||
{
|
||||
let _ = writeln!(file, "dc_idx={dc_idx}");
|
||||
let path = path.clone();
|
||||
handle.spawn_blocking(move || {
|
||||
if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) {
|
||||
let _ = writeln!(file, "dc_idx={dc_idx}");
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
||||
use tracing::{debug, warn, trace, info};
|
||||
use tracing::{debug, warn, trace};
|
||||
use zeroize::Zeroize;
|
||||
|
||||
use crate::crypto::{sha256, AesCtr, SecureRandom};
|
||||
@@ -19,6 +19,31 @@ use crate::stats::ReplayChecker;
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::tls_front::{TlsFrontCache, emulator};
|
||||
|
||||
fn decode_user_secrets(
|
||||
config: &ProxyConfig,
|
||||
preferred_user: Option<&str>,
|
||||
) -> Vec<(String, Vec<u8>)> {
|
||||
let mut secrets = Vec::with_capacity(config.access.users.len());
|
||||
|
||||
if let Some(preferred) = preferred_user
|
||||
&& let Some(secret_hex) = config.access.users.get(preferred)
|
||||
&& let Ok(bytes) = hex::decode(secret_hex)
|
||||
{
|
||||
secrets.push((preferred.to_string(), bytes));
|
||||
}
|
||||
|
||||
for (name, secret_hex) in &config.access.users {
|
||||
if preferred_user.is_some_and(|preferred| preferred == name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
if let Ok(bytes) = hex::decode(secret_hex) {
|
||||
secrets.push((name.clone(), bytes));
|
||||
}
|
||||
}
|
||||
|
||||
secrets
|
||||
}
|
||||
|
||||
/// Result of successful handshake
|
||||
///
|
||||
/// Key material (`dec_key`, `dec_iv`, `enc_key`, `enc_iv`) is
|
||||
@@ -82,11 +107,7 @@ where
|
||||
return HandshakeResult::BadClient { reader, writer };
|
||||
}
|
||||
|
||||
let secrets: Vec<(String, Vec<u8>)> = config.access.users.iter()
|
||||
.filter_map(|(name, hex)| {
|
||||
hex::decode(hex).ok().map(|bytes| (name.clone(), bytes))
|
||||
})
|
||||
.collect();
|
||||
let secrets = decode_user_secrets(config, None);
|
||||
|
||||
let validation = match tls::validate_tls_handshake(
|
||||
handshake,
|
||||
@@ -201,7 +222,7 @@ where
|
||||
return HandshakeResult::Error(ProxyError::Io(e));
|
||||
}
|
||||
|
||||
info!(
|
||||
debug!(
|
||||
peer = %peer,
|
||||
user = %validation.user,
|
||||
"TLS handshake successful"
|
||||
@@ -223,6 +244,7 @@ pub async fn handle_mtproto_handshake<R, W>(
|
||||
config: &ProxyConfig,
|
||||
replay_checker: &ReplayChecker,
|
||||
is_tls: bool,
|
||||
preferred_user: Option<&str>,
|
||||
) -> HandshakeResult<(CryptoReader<R>, CryptoWriter<W>, HandshakeSuccess), R, W>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send,
|
||||
@@ -239,11 +261,9 @@ where
|
||||
|
||||
let enc_prekey_iv: Vec<u8> = dec_prekey_iv.iter().rev().copied().collect();
|
||||
|
||||
for (user, secret_hex) in &config.access.users {
|
||||
let secret = match hex::decode(secret_hex) {
|
||||
Ok(s) => s,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let decoded_users = decode_user_secrets(config, preferred_user);
|
||||
|
||||
for (user, secret) in decoded_users {
|
||||
|
||||
let dec_prekey = &dec_prekey_iv[..PREKEY_LEN];
|
||||
let dec_iv_bytes = &dec_prekey_iv[PREKEY_LEN..];
|
||||
@@ -311,7 +331,7 @@ where
|
||||
is_tls,
|
||||
};
|
||||
|
||||
info!(
|
||||
debug!(
|
||||
peer = %peer,
|
||||
user = %user,
|
||||
dc = dc_idx,
|
||||
|
||||
@@ -6,29 +6,36 @@ use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex, OnceLock};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use bytes::Bytes;
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use tracing::{debug, info, trace, warn};
|
||||
use tokio::sync::{mpsc, oneshot, watch};
|
||||
use tracing::{debug, trace, warn};
|
||||
|
||||
use crate::config::ProxyConfig;
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::error::{ProxyError, Result};
|
||||
use crate::protocol::constants::{*, secure_padding_len};
|
||||
use crate::proxy::handshake::HandshakeSuccess;
|
||||
use crate::proxy::route_mode::{
|
||||
RelayRouteMode, RouteCutoverState, ROUTE_SWITCH_ERROR_MSG, affected_cutover_state,
|
||||
cutover_stagger_delay,
|
||||
};
|
||||
use crate::stats::Stats;
|
||||
use crate::stream::{BufferPool, CryptoReader, CryptoWriter};
|
||||
use crate::transport::middle_proxy::{MePool, MeResponse, proto_flags_for_tag};
|
||||
|
||||
enum C2MeCommand {
|
||||
Data { payload: Vec<u8>, flags: u32 },
|
||||
Data { payload: Bytes, flags: u32 },
|
||||
Close,
|
||||
}
|
||||
|
||||
const DESYNC_DEDUP_WINDOW: Duration = Duration::from_secs(60);
|
||||
const DESYNC_ERROR_CLASS: &str = "frame_too_large_crypto_desync";
|
||||
const C2ME_CHANNEL_CAPACITY: usize = 1024;
|
||||
const C2ME_CHANNEL_CAPACITY_FALLBACK: usize = 128;
|
||||
const C2ME_SOFT_PRESSURE_MIN_FREE_SLOTS: usize = 64;
|
||||
const C2ME_SENDER_FAIRNESS_BUDGET: usize = 32;
|
||||
const ME_D2C_FLUSH_BATCH_MAX_FRAMES_MIN: usize = 1;
|
||||
const ME_D2C_FLUSH_BATCH_MAX_BYTES_MIN: usize = 4096;
|
||||
static DESYNC_DEDUP: OnceLock<Mutex<HashMap<u64, Instant>>> = OnceLock::new();
|
||||
|
||||
struct RelayForensicsState {
|
||||
@@ -43,6 +50,31 @@ struct RelayForensicsState {
|
||||
desync_all_full: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct MeD2cFlushPolicy {
|
||||
max_frames: usize,
|
||||
max_bytes: usize,
|
||||
max_delay: Duration,
|
||||
ack_flush_immediate: bool,
|
||||
}
|
||||
|
||||
impl MeD2cFlushPolicy {
|
||||
fn from_config(config: &ProxyConfig) -> Self {
|
||||
Self {
|
||||
max_frames: config
|
||||
.general
|
||||
.me_d2c_flush_batch_max_frames
|
||||
.max(ME_D2C_FLUSH_BATCH_MAX_FRAMES_MIN),
|
||||
max_bytes: config
|
||||
.general
|
||||
.me_d2c_flush_batch_max_bytes
|
||||
.max(ME_D2C_FLUSH_BATCH_MAX_BYTES_MIN),
|
||||
max_delay: Duration::from_micros(config.general.me_d2c_flush_batch_max_delay_us),
|
||||
ack_flush_immediate: config.general.me_d2c_ack_flush_immediate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn hash_value<T: Hash>(value: &T) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
value.hash(&mut hasher);
|
||||
@@ -200,6 +232,9 @@ pub(crate) async fn handle_via_middle_proxy<R, W>(
|
||||
_buffer_pool: Arc<BufferPool>,
|
||||
local_addr: SocketAddr,
|
||||
rng: Arc<SecureRandom>,
|
||||
mut route_rx: watch::Receiver<RouteCutoverState>,
|
||||
route_snapshot: RouteCutoverState,
|
||||
session_id: u64,
|
||||
) -> Result<()>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + 'static,
|
||||
@@ -210,7 +245,7 @@ where
|
||||
let proto_tag = success.proto_tag;
|
||||
let pool_generation = me_pool.current_generation();
|
||||
|
||||
info!(
|
||||
debug!(
|
||||
user = %user,
|
||||
peer = %peer,
|
||||
dc = success.dc_idx,
|
||||
@@ -237,6 +272,28 @@ where
|
||||
|
||||
stats.increment_user_connects(&user);
|
||||
stats.increment_user_curr_connects(&user);
|
||||
stats.increment_current_connections_me();
|
||||
|
||||
if let Some(cutover) = affected_cutover_state(
|
||||
&route_rx,
|
||||
RelayRouteMode::Middle,
|
||||
route_snapshot.generation,
|
||||
) {
|
||||
let delay = cutover_stagger_delay(session_id, cutover.generation);
|
||||
warn!(
|
||||
conn_id,
|
||||
target_mode = cutover.mode.as_str(),
|
||||
cutover_generation = cutover.generation,
|
||||
delay_ms = delay.as_millis() as u64,
|
||||
"Cutover affected middle session before relay start, closing client connection"
|
||||
);
|
||||
tokio::time::sleep(delay).await;
|
||||
let _ = me_pool.send_close(conn_id).await;
|
||||
me_pool.registry().unregister(conn_id).await;
|
||||
stats.decrement_current_connections_me();
|
||||
stats.decrement_user_curr_connects(&user);
|
||||
return Err(ProxyError::Proxy(ROUTE_SWITCH_ERROR_MSG.to_string()));
|
||||
}
|
||||
|
||||
// Per-user ad_tag from access.user_ad_tags; fallback to general.ad_tag (hot-reloadable)
|
||||
let user_tag: Option<Vec<u8>> = config
|
||||
@@ -269,7 +326,11 @@ where
|
||||
|
||||
let frame_limit = config.general.max_client_frame;
|
||||
|
||||
let (c2me_tx, mut c2me_rx) = mpsc::channel::<C2MeCommand>(C2ME_CHANNEL_CAPACITY);
|
||||
let c2me_channel_capacity = config
|
||||
.general
|
||||
.me_c2me_channel_capacity
|
||||
.max(C2ME_CHANNEL_CAPACITY_FALLBACK);
|
||||
let (c2me_tx, mut c2me_rx) = mpsc::channel::<C2MeCommand>(c2me_channel_capacity);
|
||||
let me_pool_c2me = me_pool.clone();
|
||||
let effective_tag = effective_tag;
|
||||
let c2me_sender = tokio::spawn(async move {
|
||||
@@ -282,7 +343,7 @@ where
|
||||
success.dc_idx,
|
||||
peer,
|
||||
translated_local_addr,
|
||||
&payload,
|
||||
payload.as_ref(),
|
||||
flags,
|
||||
effective_tag.as_deref(),
|
||||
).await?;
|
||||
@@ -307,71 +368,152 @@ where
|
||||
let rng_clone = rng.clone();
|
||||
let user_clone = user.clone();
|
||||
let bytes_me2c_clone = bytes_me2c.clone();
|
||||
let d2c_flush_policy = MeD2cFlushPolicy::from_config(&config);
|
||||
let me_writer = tokio::spawn(async move {
|
||||
let mut writer = crypto_writer;
|
||||
let mut frame_buf = Vec::with_capacity(16 * 1024);
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = me_rx_task.recv() => {
|
||||
match msg {
|
||||
Some(MeResponse::Data { flags, data }) => {
|
||||
trace!(conn_id, bytes = data.len(), flags, "ME->C data");
|
||||
bytes_me2c_clone.fetch_add(data.len() as u64, Ordering::Relaxed);
|
||||
stats_clone.add_user_octets_to(&user_clone, data.len() as u64);
|
||||
write_client_payload(
|
||||
&mut writer,
|
||||
proto_tag,
|
||||
flags,
|
||||
&data,
|
||||
rng_clone.as_ref(),
|
||||
&mut frame_buf,
|
||||
)
|
||||
.await?;
|
||||
let Some(first) = msg else {
|
||||
debug!(conn_id, "ME channel closed");
|
||||
return Err(ProxyError::Proxy("ME connection lost".into()));
|
||||
};
|
||||
|
||||
// Drain all immediately queued ME responses and flush once.
|
||||
while let Ok(next) = me_rx_task.try_recv() {
|
||||
match next {
|
||||
MeResponse::Data { flags, data } => {
|
||||
trace!(conn_id, bytes = data.len(), flags, "ME->C data (batched)");
|
||||
bytes_me2c_clone.fetch_add(data.len() as u64, Ordering::Relaxed);
|
||||
stats_clone.add_user_octets_to(&user_clone, data.len() as u64);
|
||||
write_client_payload(
|
||||
&mut writer,
|
||||
proto_tag,
|
||||
flags,
|
||||
&data,
|
||||
rng_clone.as_ref(),
|
||||
&mut frame_buf,
|
||||
).await?;
|
||||
let mut batch_frames = 0usize;
|
||||
let mut batch_bytes = 0usize;
|
||||
let mut flush_immediately;
|
||||
|
||||
match process_me_writer_response(
|
||||
first,
|
||||
&mut writer,
|
||||
proto_tag,
|
||||
rng_clone.as_ref(),
|
||||
&mut frame_buf,
|
||||
stats_clone.as_ref(),
|
||||
&user_clone,
|
||||
bytes_me2c_clone.as_ref(),
|
||||
conn_id,
|
||||
d2c_flush_policy.ack_flush_immediate,
|
||||
false,
|
||||
).await? {
|
||||
MeWriterResponseOutcome::Continue { frames, bytes, flush_immediately: immediate } => {
|
||||
batch_frames = batch_frames.saturating_add(frames);
|
||||
batch_bytes = batch_bytes.saturating_add(bytes);
|
||||
flush_immediately = immediate;
|
||||
}
|
||||
MeWriterResponseOutcome::Close => {
|
||||
let _ = writer.flush().await;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
while !flush_immediately
|
||||
&& batch_frames < d2c_flush_policy.max_frames
|
||||
&& batch_bytes < d2c_flush_policy.max_bytes
|
||||
{
|
||||
let Ok(next) = me_rx_task.try_recv() else {
|
||||
break;
|
||||
};
|
||||
|
||||
match process_me_writer_response(
|
||||
next,
|
||||
&mut writer,
|
||||
proto_tag,
|
||||
rng_clone.as_ref(),
|
||||
&mut frame_buf,
|
||||
stats_clone.as_ref(),
|
||||
&user_clone,
|
||||
bytes_me2c_clone.as_ref(),
|
||||
conn_id,
|
||||
d2c_flush_policy.ack_flush_immediate,
|
||||
true,
|
||||
).await? {
|
||||
MeWriterResponseOutcome::Continue { frames, bytes, flush_immediately: immediate } => {
|
||||
batch_frames = batch_frames.saturating_add(frames);
|
||||
batch_bytes = batch_bytes.saturating_add(bytes);
|
||||
flush_immediately |= immediate;
|
||||
}
|
||||
MeWriterResponseOutcome::Close => {
|
||||
let _ = writer.flush().await;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !flush_immediately
|
||||
&& !d2c_flush_policy.max_delay.is_zero()
|
||||
&& batch_frames < d2c_flush_policy.max_frames
|
||||
&& batch_bytes < d2c_flush_policy.max_bytes
|
||||
{
|
||||
match tokio::time::timeout(d2c_flush_policy.max_delay, me_rx_task.recv()).await {
|
||||
Ok(Some(next)) => {
|
||||
match process_me_writer_response(
|
||||
next,
|
||||
&mut writer,
|
||||
proto_tag,
|
||||
rng_clone.as_ref(),
|
||||
&mut frame_buf,
|
||||
stats_clone.as_ref(),
|
||||
&user_clone,
|
||||
bytes_me2c_clone.as_ref(),
|
||||
conn_id,
|
||||
d2c_flush_policy.ack_flush_immediate,
|
||||
true,
|
||||
).await? {
|
||||
MeWriterResponseOutcome::Continue { frames, bytes, flush_immediately: immediate } => {
|
||||
batch_frames = batch_frames.saturating_add(frames);
|
||||
batch_bytes = batch_bytes.saturating_add(bytes);
|
||||
flush_immediately |= immediate;
|
||||
}
|
||||
MeResponse::Ack(confirm) => {
|
||||
trace!(conn_id, confirm, "ME->C quickack (batched)");
|
||||
write_client_ack(&mut writer, proto_tag, confirm).await?;
|
||||
}
|
||||
MeResponse::Close => {
|
||||
debug!(conn_id, "ME sent close (batched)");
|
||||
MeWriterResponseOutcome::Close => {
|
||||
let _ = writer.flush().await;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.flush().await.map_err(ProxyError::Io)?;
|
||||
}
|
||||
Some(MeResponse::Ack(confirm)) => {
|
||||
trace!(conn_id, confirm, "ME->C quickack");
|
||||
write_client_ack(&mut writer, proto_tag, confirm).await?;
|
||||
}
|
||||
Some(MeResponse::Close) => {
|
||||
debug!(conn_id, "ME sent close");
|
||||
let _ = writer.flush().await;
|
||||
return Ok(());
|
||||
}
|
||||
None => {
|
||||
debug!(conn_id, "ME channel closed");
|
||||
return Err(ProxyError::Proxy("ME connection lost".into()));
|
||||
while !flush_immediately
|
||||
&& batch_frames < d2c_flush_policy.max_frames
|
||||
&& batch_bytes < d2c_flush_policy.max_bytes
|
||||
{
|
||||
let Ok(extra) = me_rx_task.try_recv() else {
|
||||
break;
|
||||
};
|
||||
|
||||
match process_me_writer_response(
|
||||
extra,
|
||||
&mut writer,
|
||||
proto_tag,
|
||||
rng_clone.as_ref(),
|
||||
&mut frame_buf,
|
||||
stats_clone.as_ref(),
|
||||
&user_clone,
|
||||
bytes_me2c_clone.as_ref(),
|
||||
conn_id,
|
||||
d2c_flush_policy.ack_flush_immediate,
|
||||
true,
|
||||
).await? {
|
||||
MeWriterResponseOutcome::Continue { frames, bytes, flush_immediately: immediate } => {
|
||||
batch_frames = batch_frames.saturating_add(frames);
|
||||
batch_bytes = batch_bytes.saturating_add(bytes);
|
||||
flush_immediately |= immediate;
|
||||
}
|
||||
MeWriterResponseOutcome::Close => {
|
||||
let _ = writer.flush().await;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
debug!(conn_id, "ME channel closed");
|
||||
return Err(ProxyError::Proxy("ME connection lost".into()));
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
writer.flush().await.map_err(ProxyError::Io)?;
|
||||
}
|
||||
_ = &mut stop_rx => {
|
||||
debug!(conn_id, "ME writer stop signal");
|
||||
@@ -384,46 +526,75 @@ where
|
||||
let mut main_result: Result<()> = Ok(());
|
||||
let mut client_closed = false;
|
||||
let mut frame_counter: u64 = 0;
|
||||
let mut route_watch_open = true;
|
||||
loop {
|
||||
match read_client_payload(
|
||||
&mut crypto_reader,
|
||||
proto_tag,
|
||||
frame_limit,
|
||||
&forensics,
|
||||
&mut frame_counter,
|
||||
&stats,
|
||||
).await {
|
||||
Ok(Some((payload, quickack))) => {
|
||||
trace!(conn_id, bytes = payload.len(), "C->ME frame");
|
||||
forensics.bytes_c2me = forensics
|
||||
.bytes_c2me
|
||||
.saturating_add(payload.len() as u64);
|
||||
stats.add_user_octets_from(&user, payload.len() as u64);
|
||||
let mut flags = proto_flags;
|
||||
if quickack {
|
||||
flags |= RPC_FLAG_QUICKACK;
|
||||
}
|
||||
if payload.len() >= 8 && payload[..8].iter().all(|b| *b == 0) {
|
||||
flags |= RPC_FLAG_NOT_ENCRYPTED;
|
||||
}
|
||||
// Keep client read loop lightweight: route heavy ME send path via a dedicated task.
|
||||
if enqueue_c2me_command(&c2me_tx, C2MeCommand::Data { payload, flags })
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
main_result = Err(ProxyError::Proxy("ME sender channel closed".into()));
|
||||
break;
|
||||
if let Some(cutover) = affected_cutover_state(
|
||||
&route_rx,
|
||||
RelayRouteMode::Middle,
|
||||
route_snapshot.generation,
|
||||
) {
|
||||
let delay = cutover_stagger_delay(session_id, cutover.generation);
|
||||
warn!(
|
||||
conn_id,
|
||||
target_mode = cutover.mode.as_str(),
|
||||
cutover_generation = cutover.generation,
|
||||
delay_ms = delay.as_millis() as u64,
|
||||
"Cutover affected middle session, closing client connection"
|
||||
);
|
||||
tokio::time::sleep(delay).await;
|
||||
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
|
||||
main_result = Err(ProxyError::Proxy(ROUTE_SWITCH_ERROR_MSG.to_string()));
|
||||
break;
|
||||
}
|
||||
|
||||
tokio::select! {
|
||||
changed = route_rx.changed(), if route_watch_open => {
|
||||
if changed.is_err() {
|
||||
route_watch_open = false;
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
debug!(conn_id, "Client EOF");
|
||||
client_closed = true;
|
||||
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
main_result = Err(e);
|
||||
break;
|
||||
payload_result = read_client_payload(
|
||||
&mut crypto_reader,
|
||||
proto_tag,
|
||||
frame_limit,
|
||||
&forensics,
|
||||
&mut frame_counter,
|
||||
&stats,
|
||||
) => {
|
||||
match payload_result {
|
||||
Ok(Some((payload, quickack))) => {
|
||||
trace!(conn_id, bytes = payload.len(), "C->ME frame");
|
||||
forensics.bytes_c2me = forensics
|
||||
.bytes_c2me
|
||||
.saturating_add(payload.len() as u64);
|
||||
stats.add_user_octets_from(&user, payload.len() as u64);
|
||||
let mut flags = proto_flags;
|
||||
if quickack {
|
||||
flags |= RPC_FLAG_QUICKACK;
|
||||
}
|
||||
if payload.len() >= 8 && payload[..8].iter().all(|b| *b == 0) {
|
||||
flags |= RPC_FLAG_NOT_ENCRYPTED;
|
||||
}
|
||||
// Keep client read loop lightweight: route heavy ME send path via a dedicated task.
|
||||
if enqueue_c2me_command(&c2me_tx, C2MeCommand::Data { payload, flags })
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
main_result = Err(ProxyError::Proxy("ME sender channel closed".into()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
debug!(conn_id, "Client EOF");
|
||||
client_closed = true;
|
||||
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
main_result = Err(e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -466,6 +637,7 @@ where
|
||||
"ME relay cleanup"
|
||||
);
|
||||
me_pool.registry().unregister(conn_id).await;
|
||||
stats.decrement_current_connections_me();
|
||||
stats.decrement_user_curr_connects(&user);
|
||||
result
|
||||
}
|
||||
@@ -477,7 +649,7 @@ async fn read_client_payload<R>(
|
||||
forensics: &RelayForensicsState,
|
||||
frame_counter: &mut u64,
|
||||
stats: &Stats,
|
||||
) -> Result<Option<(Vec<u8>, bool)>>
|
||||
) -> Result<Option<(Bytes, bool)>>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + 'static,
|
||||
{
|
||||
@@ -576,7 +748,82 @@ where
|
||||
payload.truncate(secure_payload_len);
|
||||
}
|
||||
*frame_counter += 1;
|
||||
return Ok(Some((payload, quickack)));
|
||||
return Ok(Some((Bytes::from(payload), quickack)));
|
||||
}
|
||||
}
|
||||
|
||||
enum MeWriterResponseOutcome {
|
||||
Continue {
|
||||
frames: usize,
|
||||
bytes: usize,
|
||||
flush_immediately: bool,
|
||||
},
|
||||
Close,
|
||||
}
|
||||
|
||||
async fn process_me_writer_response<W>(
|
||||
response: MeResponse,
|
||||
client_writer: &mut CryptoWriter<W>,
|
||||
proto_tag: ProtoTag,
|
||||
rng: &SecureRandom,
|
||||
frame_buf: &mut Vec<u8>,
|
||||
stats: &Stats,
|
||||
user: &str,
|
||||
bytes_me2c: &AtomicU64,
|
||||
conn_id: u64,
|
||||
ack_flush_immediate: bool,
|
||||
batched: bool,
|
||||
) -> Result<MeWriterResponseOutcome>
|
||||
where
|
||||
W: AsyncWrite + Unpin + Send + 'static,
|
||||
{
|
||||
match response {
|
||||
MeResponse::Data { flags, data } => {
|
||||
if batched {
|
||||
trace!(conn_id, bytes = data.len(), flags, "ME->C data (batched)");
|
||||
} else {
|
||||
trace!(conn_id, bytes = data.len(), flags, "ME->C data");
|
||||
}
|
||||
bytes_me2c.fetch_add(data.len() as u64, Ordering::Relaxed);
|
||||
stats.add_user_octets_to(user, data.len() as u64);
|
||||
write_client_payload(
|
||||
client_writer,
|
||||
proto_tag,
|
||||
flags,
|
||||
&data,
|
||||
rng,
|
||||
frame_buf,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(MeWriterResponseOutcome::Continue {
|
||||
frames: 1,
|
||||
bytes: data.len(),
|
||||
flush_immediately: false,
|
||||
})
|
||||
}
|
||||
MeResponse::Ack(confirm) => {
|
||||
if batched {
|
||||
trace!(conn_id, confirm, "ME->C quickack (batched)");
|
||||
} else {
|
||||
trace!(conn_id, confirm, "ME->C quickack");
|
||||
}
|
||||
write_client_ack(client_writer, proto_tag, confirm).await?;
|
||||
|
||||
Ok(MeWriterResponseOutcome::Continue {
|
||||
frames: 1,
|
||||
bytes: 4,
|
||||
flush_immediately: ack_flush_immediate,
|
||||
})
|
||||
}
|
||||
MeResponse::Close => {
|
||||
if batched {
|
||||
debug!(conn_id, "ME sent close (batched)");
|
||||
} else {
|
||||
debug!(conn_id, "ME sent close");
|
||||
}
|
||||
Ok(MeWriterResponseOutcome::Close)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -689,9 +936,7 @@ where
|
||||
client_writer
|
||||
.write_all(&bytes)
|
||||
.await
|
||||
.map_err(ProxyError::Io)?;
|
||||
// ACK should remain low-latency.
|
||||
client_writer.flush().await.map_err(ProxyError::Io)
|
||||
.map_err(ProxyError::Io)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -713,7 +958,7 @@ mod tests {
|
||||
enqueue_c2me_command(
|
||||
&tx,
|
||||
C2MeCommand::Data {
|
||||
payload: vec![1, 2, 3],
|
||||
payload: Bytes::from_static(&[1, 2, 3]),
|
||||
flags: 0,
|
||||
},
|
||||
)
|
||||
@@ -726,7 +971,7 @@ mod tests {
|
||||
.unwrap();
|
||||
match recv {
|
||||
C2MeCommand::Data { payload, flags } => {
|
||||
assert_eq!(payload, vec![1, 2, 3]);
|
||||
assert_eq!(payload.as_ref(), &[1, 2, 3]);
|
||||
assert_eq!(flags, 0);
|
||||
}
|
||||
C2MeCommand::Close => panic!("unexpected close command"),
|
||||
@@ -737,7 +982,7 @@ mod tests {
|
||||
async fn enqueue_c2me_command_falls_back_to_send_when_queue_is_full() {
|
||||
let (tx, mut rx) = mpsc::channel::<C2MeCommand>(1);
|
||||
tx.send(C2MeCommand::Data {
|
||||
payload: vec![9],
|
||||
payload: Bytes::from_static(&[9]),
|
||||
flags: 9,
|
||||
})
|
||||
.await
|
||||
@@ -748,7 +993,7 @@ mod tests {
|
||||
enqueue_c2me_command(
|
||||
&tx2,
|
||||
C2MeCommand::Data {
|
||||
payload: vec![7, 7],
|
||||
payload: Bytes::from_static(&[7, 7]),
|
||||
flags: 7,
|
||||
},
|
||||
)
|
||||
@@ -767,7 +1012,7 @@ mod tests {
|
||||
.unwrap();
|
||||
match recv {
|
||||
C2MeCommand::Data { payload, flags } => {
|
||||
assert_eq!(payload, vec![7, 7]);
|
||||
assert_eq!(payload.as_ref(), &[7, 7]);
|
||||
assert_eq!(flags, 7);
|
||||
}
|
||||
C2MeCommand::Close => panic!("unexpected close command"),
|
||||
|
||||
@@ -5,6 +5,7 @@ pub mod direct_relay;
|
||||
pub mod handshake;
|
||||
pub mod masking;
|
||||
pub mod middle_relay;
|
||||
pub mod route_mode;
|
||||
pub mod relay;
|
||||
|
||||
pub use client::ClientHandler;
|
||||
|
||||
@@ -57,7 +57,9 @@ use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, ReadBuf, copy_bidirectional};
|
||||
use tokio::io::{
|
||||
AsyncRead, AsyncWrite, AsyncWriteExt, ReadBuf, copy_bidirectional_with_sizes,
|
||||
};
|
||||
use tokio::time::Instant;
|
||||
use tracing::{debug, trace, warn};
|
||||
use crate::error::Result;
|
||||
@@ -296,9 +298,8 @@ impl<S: AsyncWrite + Unpin> AsyncWrite for StatsIo<S> {
|
||||
///
|
||||
/// ## API compatibility
|
||||
///
|
||||
/// Signature is identical to the previous implementation. The `_buffer_pool`
|
||||
/// parameter is retained for call-site compatibility — `copy_bidirectional`
|
||||
/// manages its own internal buffers (8 KB per direction).
|
||||
/// The `_buffer_pool` parameter is retained for call-site compatibility.
|
||||
/// Effective relay copy buffers are configured by `c2s_buf_size` / `s2c_buf_size`.
|
||||
///
|
||||
/// ## Guarantees preserved
|
||||
///
|
||||
@@ -312,6 +313,8 @@ pub async fn relay_bidirectional<CR, CW, SR, SW>(
|
||||
client_writer: CW,
|
||||
server_reader: SR,
|
||||
server_writer: SW,
|
||||
c2s_buf_size: usize,
|
||||
s2c_buf_size: usize,
|
||||
user: &str,
|
||||
stats: Arc<Stats>,
|
||||
_buffer_pool: Arc<BufferPool>,
|
||||
@@ -402,7 +405,12 @@ where
|
||||
// When the watchdog fires, select! drops the copy future,
|
||||
// releasing the &mut borrows on client and server.
|
||||
let copy_result = tokio::select! {
|
||||
result = copy_bidirectional(&mut client, &mut server) => Some(result),
|
||||
result = copy_bidirectional_with_sizes(
|
||||
&mut client,
|
||||
&mut server,
|
||||
c2s_buf_size.max(1),
|
||||
s2c_buf_size.max(1),
|
||||
) => Some(result),
|
||||
_ = watchdog => None, // Activity timeout — cancel relay
|
||||
};
|
||||
|
||||
@@ -463,4 +471,4 @@ where
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
142
src/proxy/route_mode.rs
Normal file
142
src/proxy/route_mode.rs
Normal file
@@ -0,0 +1,142 @@
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use tokio::sync::watch;
|
||||
|
||||
pub(crate) const ROUTE_SWITCH_ERROR_MSG: &str = "Route mode switched by cutover";
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub(crate) enum RelayRouteMode {
|
||||
Direct = 0,
|
||||
Middle = 1,
|
||||
}
|
||||
|
||||
impl RelayRouteMode {
|
||||
pub(crate) fn as_u8(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
|
||||
pub(crate) fn from_u8(value: u8) -> Self {
|
||||
match value {
|
||||
1 => Self::Middle,
|
||||
_ => Self::Direct,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Direct => "direct",
|
||||
Self::Middle => "middle",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct RouteCutoverState {
|
||||
pub mode: RelayRouteMode,
|
||||
pub generation: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct RouteRuntimeController {
|
||||
mode: Arc<AtomicU8>,
|
||||
generation: Arc<AtomicU64>,
|
||||
direct_since_epoch_secs: Arc<AtomicU64>,
|
||||
tx: watch::Sender<RouteCutoverState>,
|
||||
}
|
||||
|
||||
impl RouteRuntimeController {
|
||||
pub(crate) fn new(initial_mode: RelayRouteMode) -> Self {
|
||||
let initial = RouteCutoverState {
|
||||
mode: initial_mode,
|
||||
generation: 0,
|
||||
};
|
||||
let (tx, _rx) = watch::channel(initial);
|
||||
let direct_since_epoch_secs = if matches!(initial_mode, RelayRouteMode::Direct) {
|
||||
now_epoch_secs()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
Self {
|
||||
mode: Arc::new(AtomicU8::new(initial_mode.as_u8())),
|
||||
generation: Arc::new(AtomicU64::new(0)),
|
||||
direct_since_epoch_secs: Arc::new(AtomicU64::new(direct_since_epoch_secs)),
|
||||
tx,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn snapshot(&self) -> RouteCutoverState {
|
||||
RouteCutoverState {
|
||||
mode: RelayRouteMode::from_u8(self.mode.load(Ordering::Relaxed)),
|
||||
generation: self.generation.load(Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn subscribe(&self) -> watch::Receiver<RouteCutoverState> {
|
||||
self.tx.subscribe()
|
||||
}
|
||||
|
||||
pub(crate) fn direct_since_epoch_secs(&self) -> Option<u64> {
|
||||
let value = self.direct_since_epoch_secs.load(Ordering::Relaxed);
|
||||
(value > 0).then_some(value)
|
||||
}
|
||||
|
||||
pub(crate) fn set_mode(&self, mode: RelayRouteMode) -> Option<RouteCutoverState> {
|
||||
let previous = self.mode.swap(mode.as_u8(), Ordering::Relaxed);
|
||||
if previous == mode.as_u8() {
|
||||
return None;
|
||||
}
|
||||
if matches!(mode, RelayRouteMode::Direct) {
|
||||
self.direct_since_epoch_secs
|
||||
.store(now_epoch_secs(), Ordering::Relaxed);
|
||||
} else {
|
||||
self.direct_since_epoch_secs.store(0, Ordering::Relaxed);
|
||||
}
|
||||
let generation = self.generation.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
let next = RouteCutoverState { mode, generation };
|
||||
self.tx.send_replace(next);
|
||||
Some(next)
|
||||
}
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|value| value.as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
pub(crate) fn is_session_affected_by_cutover(
|
||||
current: RouteCutoverState,
|
||||
_session_mode: RelayRouteMode,
|
||||
session_generation: u64,
|
||||
) -> bool {
|
||||
current.generation > session_generation
|
||||
}
|
||||
|
||||
pub(crate) fn affected_cutover_state(
|
||||
rx: &watch::Receiver<RouteCutoverState>,
|
||||
session_mode: RelayRouteMode,
|
||||
session_generation: u64,
|
||||
) -> Option<RouteCutoverState> {
|
||||
let current = *rx.borrow();
|
||||
if is_session_affected_by_cutover(current, session_mode, session_generation) {
|
||||
return Some(current);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn cutover_stagger_delay(session_id: u64, generation: u64) -> Duration {
|
||||
let mut value = session_id
|
||||
^ generation.rotate_left(17)
|
||||
^ 0x9e37_79b9_7f4a_7c15;
|
||||
value ^= value >> 30;
|
||||
value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9);
|
||||
value ^= value >> 27;
|
||||
value = value.wrapping_mul(0x94d0_49bb_1331_11eb);
|
||||
value ^= value >> 31;
|
||||
let ms = 1000 + (value % 1000);
|
||||
Duration::from_millis(ms)
|
||||
}
|
||||
373
src/startup.rs
Normal file
373
src/startup.rs
Normal file
@@ -0,0 +1,373 @@
|
||||
use std::time::{Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub const COMPONENT_CONFIG_LOAD: &str = "config_load";
|
||||
pub const COMPONENT_TRACING_INIT: &str = "tracing_init";
|
||||
pub const COMPONENT_API_BOOTSTRAP: &str = "api_bootstrap";
|
||||
pub const COMPONENT_TLS_FRONT_BOOTSTRAP: &str = "tls_front_bootstrap";
|
||||
pub const COMPONENT_NETWORK_PROBE: &str = "network_probe";
|
||||
pub const COMPONENT_ME_SECRET_FETCH: &str = "me_secret_fetch";
|
||||
pub const COMPONENT_ME_PROXY_CONFIG_V4: &str = "me_proxy_config_fetch_v4";
|
||||
pub const COMPONENT_ME_PROXY_CONFIG_V6: &str = "me_proxy_config_fetch_v6";
|
||||
pub const COMPONENT_ME_POOL_CONSTRUCT: &str = "me_pool_construct";
|
||||
pub const COMPONENT_ME_POOL_INIT_STAGE1: &str = "me_pool_init_stage1";
|
||||
pub const COMPONENT_ME_CONNECTIVITY_PING: &str = "me_connectivity_ping";
|
||||
pub const COMPONENT_DC_CONNECTIVITY_PING: &str = "dc_connectivity_ping";
|
||||
pub const COMPONENT_LISTENERS_BIND: &str = "listeners_bind";
|
||||
pub const COMPONENT_CONFIG_WATCHER_START: &str = "config_watcher_start";
|
||||
pub const COMPONENT_METRICS_START: &str = "metrics_start";
|
||||
pub const COMPONENT_RUNTIME_READY: &str = "runtime_ready";
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum StartupStatus {
|
||||
Initializing,
|
||||
Ready,
|
||||
}
|
||||
|
||||
impl StartupStatus {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Initializing => "initializing",
|
||||
Self::Ready => "ready",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum StartupComponentStatus {
|
||||
Pending,
|
||||
Running,
|
||||
Ready,
|
||||
Failed,
|
||||
Skipped,
|
||||
}
|
||||
|
||||
impl StartupComponentStatus {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Pending => "pending",
|
||||
Self::Running => "running",
|
||||
Self::Ready => "ready",
|
||||
Self::Failed => "failed",
|
||||
Self::Skipped => "skipped",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum StartupMeStatus {
|
||||
Pending,
|
||||
Initializing,
|
||||
Ready,
|
||||
Failed,
|
||||
Skipped,
|
||||
}
|
||||
|
||||
impl StartupMeStatus {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Pending => "pending",
|
||||
Self::Initializing => "initializing",
|
||||
Self::Ready => "ready",
|
||||
Self::Failed => "failed",
|
||||
Self::Skipped => "skipped",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct StartupComponentSnapshot {
|
||||
pub id: &'static str,
|
||||
pub title: &'static str,
|
||||
pub weight: f64,
|
||||
pub status: StartupComponentStatus,
|
||||
pub started_at_epoch_ms: Option<u64>,
|
||||
pub finished_at_epoch_ms: Option<u64>,
|
||||
pub duration_ms: Option<u64>,
|
||||
pub attempts: u32,
|
||||
pub details: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct StartupMeSnapshot {
|
||||
pub status: StartupMeStatus,
|
||||
pub current_stage: String,
|
||||
pub init_attempt: u32,
|
||||
pub retry_limit: String,
|
||||
pub last_error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct StartupSnapshot {
|
||||
pub status: StartupStatus,
|
||||
pub degraded: bool,
|
||||
pub current_stage: String,
|
||||
pub started_at_epoch_secs: u64,
|
||||
pub ready_at_epoch_secs: Option<u64>,
|
||||
pub total_elapsed_ms: u64,
|
||||
pub transport_mode: String,
|
||||
pub me: StartupMeSnapshot,
|
||||
pub components: Vec<StartupComponentSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct StartupComponent {
|
||||
id: &'static str,
|
||||
title: &'static str,
|
||||
weight: f64,
|
||||
status: StartupComponentStatus,
|
||||
started_at_epoch_ms: Option<u64>,
|
||||
finished_at_epoch_ms: Option<u64>,
|
||||
duration_ms: Option<u64>,
|
||||
attempts: u32,
|
||||
details: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct StartupState {
|
||||
status: StartupStatus,
|
||||
degraded: bool,
|
||||
current_stage: String,
|
||||
started_at_epoch_secs: u64,
|
||||
ready_at_epoch_secs: Option<u64>,
|
||||
transport_mode: String,
|
||||
me: StartupMeSnapshot,
|
||||
components: Vec<StartupComponent>,
|
||||
}
|
||||
|
||||
pub struct StartupTracker {
|
||||
started_at_instant: Instant,
|
||||
state: RwLock<StartupState>,
|
||||
}
|
||||
|
||||
impl StartupTracker {
|
||||
pub fn new(started_at_epoch_secs: u64) -> Self {
|
||||
Self {
|
||||
started_at_instant: Instant::now(),
|
||||
state: RwLock::new(StartupState {
|
||||
status: StartupStatus::Initializing,
|
||||
degraded: false,
|
||||
current_stage: COMPONENT_CONFIG_LOAD.to_string(),
|
||||
started_at_epoch_secs,
|
||||
ready_at_epoch_secs: None,
|
||||
transport_mode: "unknown".to_string(),
|
||||
me: StartupMeSnapshot {
|
||||
status: StartupMeStatus::Pending,
|
||||
current_stage: "pending".to_string(),
|
||||
init_attempt: 0,
|
||||
retry_limit: "unlimited".to_string(),
|
||||
last_error: None,
|
||||
},
|
||||
components: component_blueprint(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn set_transport_mode(&self, mode: &'static str) {
|
||||
self.state.write().await.transport_mode = mode.to_string();
|
||||
}
|
||||
|
||||
pub async fn set_degraded(&self, degraded: bool) {
|
||||
self.state.write().await.degraded = degraded;
|
||||
}
|
||||
|
||||
pub async fn start_component(&self, id: &'static str, details: Option<String>) {
|
||||
let mut guard = self.state.write().await;
|
||||
guard.current_stage = id.to_string();
|
||||
if let Some(component) = guard.components.iter_mut().find(|component| component.id == id) {
|
||||
if component.started_at_epoch_ms.is_none() {
|
||||
component.started_at_epoch_ms = Some(now_epoch_ms());
|
||||
}
|
||||
component.attempts = component.attempts.saturating_add(1);
|
||||
component.status = StartupComponentStatus::Running;
|
||||
component.details = normalize_details(details);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn complete_component(&self, id: &'static str, details: Option<String>) {
|
||||
self.finish_component(id, StartupComponentStatus::Ready, details)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub async fn fail_component(&self, id: &'static str, details: Option<String>) {
|
||||
self.finish_component(id, StartupComponentStatus::Failed, details)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub async fn skip_component(&self, id: &'static str, details: Option<String>) {
|
||||
self.finish_component(id, StartupComponentStatus::Skipped, details)
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn finish_component(
|
||||
&self,
|
||||
id: &'static str,
|
||||
status: StartupComponentStatus,
|
||||
details: Option<String>,
|
||||
) {
|
||||
let mut guard = self.state.write().await;
|
||||
let finished_at = now_epoch_ms();
|
||||
if let Some(component) = guard.components.iter_mut().find(|component| component.id == id) {
|
||||
if component.started_at_epoch_ms.is_none() {
|
||||
component.started_at_epoch_ms = Some(finished_at);
|
||||
component.attempts = component.attempts.saturating_add(1);
|
||||
}
|
||||
component.finished_at_epoch_ms = Some(finished_at);
|
||||
component.duration_ms = component
|
||||
.started_at_epoch_ms
|
||||
.map(|started_at| finished_at.saturating_sub(started_at));
|
||||
component.status = status;
|
||||
component.details = normalize_details(details);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn set_me_status(&self, status: StartupMeStatus, stage: &'static str) {
|
||||
let mut guard = self.state.write().await;
|
||||
guard.me.status = status;
|
||||
guard.me.current_stage = stage.to_string();
|
||||
}
|
||||
|
||||
pub async fn set_me_retry_limit(&self, retry_limit: String) {
|
||||
self.state.write().await.me.retry_limit = retry_limit;
|
||||
}
|
||||
|
||||
pub async fn set_me_init_attempt(&self, attempt: u32) {
|
||||
self.state.write().await.me.init_attempt = attempt;
|
||||
}
|
||||
|
||||
pub async fn set_me_last_error(&self, error: Option<String>) {
|
||||
self.state.write().await.me.last_error = normalize_details(error);
|
||||
}
|
||||
|
||||
pub async fn mark_ready(&self) {
|
||||
let mut guard = self.state.write().await;
|
||||
if guard.status == StartupStatus::Ready {
|
||||
return;
|
||||
}
|
||||
guard.status = StartupStatus::Ready;
|
||||
guard.current_stage = "ready".to_string();
|
||||
guard.ready_at_epoch_secs = Some(now_epoch_secs());
|
||||
}
|
||||
|
||||
pub async fn snapshot(&self) -> StartupSnapshot {
|
||||
let guard = self.state.read().await;
|
||||
StartupSnapshot {
|
||||
status: guard.status,
|
||||
degraded: guard.degraded,
|
||||
current_stage: guard.current_stage.clone(),
|
||||
started_at_epoch_secs: guard.started_at_epoch_secs,
|
||||
ready_at_epoch_secs: guard.ready_at_epoch_secs,
|
||||
total_elapsed_ms: self.started_at_instant.elapsed().as_millis() as u64,
|
||||
transport_mode: guard.transport_mode.clone(),
|
||||
me: guard.me.clone(),
|
||||
components: guard
|
||||
.components
|
||||
.iter()
|
||||
.map(|component| StartupComponentSnapshot {
|
||||
id: component.id,
|
||||
title: component.title,
|
||||
weight: component.weight,
|
||||
status: component.status,
|
||||
started_at_epoch_ms: component.started_at_epoch_ms,
|
||||
finished_at_epoch_ms: component.finished_at_epoch_ms,
|
||||
duration_ms: component.duration_ms,
|
||||
attempts: component.attempts,
|
||||
details: component.details.clone(),
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_progress_pct(snapshot: &StartupSnapshot, me_stage_progress: Option<f64>) -> f64 {
|
||||
if snapshot.status == StartupStatus::Ready {
|
||||
return 100.0;
|
||||
}
|
||||
|
||||
let mut total_weight = 0.0f64;
|
||||
let mut completed_weight = 0.0f64;
|
||||
|
||||
for component in &snapshot.components {
|
||||
total_weight += component.weight;
|
||||
let unit_progress = match component.status {
|
||||
StartupComponentStatus::Pending => 0.0,
|
||||
StartupComponentStatus::Running => {
|
||||
if component.id == COMPONENT_ME_POOL_INIT_STAGE1 {
|
||||
me_stage_progress.unwrap_or(0.0).clamp(0.0, 1.0)
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
StartupComponentStatus::Ready
|
||||
| StartupComponentStatus::Failed
|
||||
| StartupComponentStatus::Skipped => 1.0,
|
||||
};
|
||||
completed_weight += component.weight * unit_progress;
|
||||
}
|
||||
|
||||
if total_weight <= f64::EPSILON {
|
||||
0.0
|
||||
} else {
|
||||
((completed_weight / total_weight) * 100.0).clamp(0.0, 100.0)
|
||||
}
|
||||
}
|
||||
|
||||
fn component_blueprint() -> Vec<StartupComponent> {
|
||||
vec![
|
||||
component(COMPONENT_CONFIG_LOAD, "Config load", 5.0),
|
||||
component(COMPONENT_TRACING_INIT, "Tracing init", 3.0),
|
||||
component(COMPONENT_API_BOOTSTRAP, "API bootstrap", 5.0),
|
||||
component(COMPONENT_TLS_FRONT_BOOTSTRAP, "TLS front bootstrap", 5.0),
|
||||
component(COMPONENT_NETWORK_PROBE, "Network probe", 10.0),
|
||||
component(COMPONENT_ME_SECRET_FETCH, "ME secret fetch", 8.0),
|
||||
component(COMPONENT_ME_PROXY_CONFIG_V4, "ME config v4 fetch", 4.0),
|
||||
component(COMPONENT_ME_PROXY_CONFIG_V6, "ME config v6 fetch", 4.0),
|
||||
component(COMPONENT_ME_POOL_CONSTRUCT, "ME pool construct", 6.0),
|
||||
component(COMPONENT_ME_POOL_INIT_STAGE1, "ME pool init stage1", 24.0),
|
||||
component(COMPONENT_ME_CONNECTIVITY_PING, "ME connectivity ping", 6.0),
|
||||
component(COMPONENT_DC_CONNECTIVITY_PING, "DC connectivity ping", 8.0),
|
||||
component(COMPONENT_LISTENERS_BIND, "Listener bind", 8.0),
|
||||
component(COMPONENT_CONFIG_WATCHER_START, "Config watcher start", 2.0),
|
||||
component(COMPONENT_METRICS_START, "Metrics start", 1.0),
|
||||
component(COMPONENT_RUNTIME_READY, "Runtime ready", 1.0),
|
||||
]
|
||||
}
|
||||
|
||||
fn component(id: &'static str, title: &'static str, weight: f64) -> StartupComponent {
|
||||
StartupComponent {
|
||||
id,
|
||||
title,
|
||||
weight,
|
||||
status: StartupComponentStatus::Pending,
|
||||
started_at_epoch_ms: None,
|
||||
finished_at_epoch_ms: None,
|
||||
duration_ms: None,
|
||||
attempts: 0,
|
||||
details: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_details(details: Option<String>) -> Option<String> {
|
||||
details.map(|detail| {
|
||||
if detail.len() <= 256 {
|
||||
detail
|
||||
} else {
|
||||
detail[..256].to_string()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
fn now_epoch_ms() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64
|
||||
}
|
||||
507
src/stats/mod.rs
507
src/stats/mod.rs
@@ -6,7 +6,7 @@ pub mod beobachten;
|
||||
pub mod telemetry;
|
||||
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering};
|
||||
use std::time::{Instant, Duration};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
use dashmap::DashMap;
|
||||
use parking_lot::Mutex;
|
||||
use lru::LruCache;
|
||||
@@ -16,7 +16,7 @@ use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::VecDeque;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::config::MeTelemetryLevel;
|
||||
use crate::config::{MeTelemetryLevel, MeWriterPickMode};
|
||||
use self::telemetry::TelemetryPolicy;
|
||||
|
||||
// ============= Stats =============
|
||||
@@ -25,6 +25,8 @@ use self::telemetry::TelemetryPolicy;
|
||||
pub struct Stats {
|
||||
connects_all: AtomicU64,
|
||||
connects_bad: AtomicU64,
|
||||
current_connections_direct: AtomicU64,
|
||||
current_connections_me: AtomicU64,
|
||||
handshake_timeouts: AtomicU64,
|
||||
upstream_connect_attempt_total: AtomicU64,
|
||||
upstream_connect_success_total: AtomicU64,
|
||||
@@ -73,12 +75,38 @@ pub struct Stats {
|
||||
me_floor_mode_switch_total: AtomicU64,
|
||||
me_floor_mode_switch_static_to_adaptive_total: AtomicU64,
|
||||
me_floor_mode_switch_adaptive_to_static_total: AtomicU64,
|
||||
me_floor_cpu_cores_detected_gauge: AtomicU64,
|
||||
me_floor_cpu_cores_effective_gauge: AtomicU64,
|
||||
me_floor_global_cap_raw_gauge: AtomicU64,
|
||||
me_floor_global_cap_effective_gauge: AtomicU64,
|
||||
me_floor_target_writers_total_gauge: AtomicU64,
|
||||
me_floor_active_cap_configured_gauge: AtomicU64,
|
||||
me_floor_active_cap_effective_gauge: AtomicU64,
|
||||
me_floor_warm_cap_configured_gauge: AtomicU64,
|
||||
me_floor_warm_cap_effective_gauge: AtomicU64,
|
||||
me_writers_active_current_gauge: AtomicU64,
|
||||
me_writers_warm_current_gauge: AtomicU64,
|
||||
me_floor_cap_block_total: AtomicU64,
|
||||
me_floor_swap_idle_total: AtomicU64,
|
||||
me_floor_swap_idle_failed_total: AtomicU64,
|
||||
me_handshake_error_codes: DashMap<i32, AtomicU64>,
|
||||
me_route_drop_no_conn: AtomicU64,
|
||||
me_route_drop_channel_closed: AtomicU64,
|
||||
me_route_drop_queue_full: AtomicU64,
|
||||
me_route_drop_queue_full_base: AtomicU64,
|
||||
me_route_drop_queue_full_high: AtomicU64,
|
||||
me_writer_pick_sorted_rr_success_try_total: AtomicU64,
|
||||
me_writer_pick_sorted_rr_success_fallback_total: AtomicU64,
|
||||
me_writer_pick_sorted_rr_full_total: AtomicU64,
|
||||
me_writer_pick_sorted_rr_closed_total: AtomicU64,
|
||||
me_writer_pick_sorted_rr_no_candidate_total: AtomicU64,
|
||||
me_writer_pick_p2c_success_try_total: AtomicU64,
|
||||
me_writer_pick_p2c_success_fallback_total: AtomicU64,
|
||||
me_writer_pick_p2c_full_total: AtomicU64,
|
||||
me_writer_pick_p2c_closed_total: AtomicU64,
|
||||
me_writer_pick_p2c_no_candidate_total: AtomicU64,
|
||||
me_writer_pick_blocking_fallback_total: AtomicU64,
|
||||
me_writer_pick_mode_switch_total: AtomicU64,
|
||||
me_socks_kdf_strict_reject: AtomicU64,
|
||||
me_socks_kdf_compat_fallback: AtomicU64,
|
||||
secure_padding_invalid: AtomicU64,
|
||||
@@ -100,10 +128,16 @@ pub struct Stats {
|
||||
me_refill_failed_total: AtomicU64,
|
||||
me_writer_restored_same_endpoint_total: AtomicU64,
|
||||
me_writer_restored_fallback_total: AtomicU64,
|
||||
me_no_writer_failfast_total: AtomicU64,
|
||||
me_async_recovery_trigger_total: AtomicU64,
|
||||
me_inline_recovery_total: AtomicU64,
|
||||
ip_reservation_rollback_tcp_limit_total: AtomicU64,
|
||||
ip_reservation_rollback_quota_limit_total: AtomicU64,
|
||||
telemetry_core_enabled: AtomicBool,
|
||||
telemetry_user_enabled: AtomicBool,
|
||||
telemetry_me_level: AtomicU8,
|
||||
user_stats: DashMap<String, UserStats>,
|
||||
user_stats_last_cleanup_epoch_secs: AtomicU64,
|
||||
start_time: parking_lot::RwLock<Option<Instant>>,
|
||||
}
|
||||
|
||||
@@ -115,6 +149,7 @@ pub struct UserStats {
|
||||
pub octets_to_client: AtomicU64,
|
||||
pub msgs_from_client: AtomicU64,
|
||||
pub msgs_to_client: AtomicU64,
|
||||
pub last_seen_epoch_secs: AtomicU64,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
@@ -145,6 +180,72 @@ impl Stats {
|
||||
self.telemetry_me_level().allows_debug()
|
||||
}
|
||||
|
||||
fn decrement_atomic_saturating(counter: &AtomicU64) {
|
||||
let mut current = counter.load(Ordering::Relaxed);
|
||||
loop {
|
||||
if current == 0 {
|
||||
break;
|
||||
}
|
||||
match counter.compare_exchange_weak(
|
||||
current,
|
||||
current - 1,
|
||||
Ordering::Relaxed,
|
||||
Ordering::Relaxed,
|
||||
) {
|
||||
Ok(_) => break,
|
||||
Err(actual) => current = actual,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
fn touch_user_stats(stats: &UserStats) {
|
||||
stats
|
||||
.last_seen_epoch_secs
|
||||
.store(Self::now_epoch_secs(), Ordering::Relaxed);
|
||||
}
|
||||
|
||||
fn maybe_cleanup_user_stats(&self) {
|
||||
const USER_STATS_CLEANUP_INTERVAL_SECS: u64 = 60;
|
||||
const USER_STATS_IDLE_TTL_SECS: u64 = 24 * 60 * 60;
|
||||
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
let last_cleanup_epoch_secs = self
|
||||
.user_stats_last_cleanup_epoch_secs
|
||||
.load(Ordering::Relaxed);
|
||||
if now_epoch_secs.saturating_sub(last_cleanup_epoch_secs)
|
||||
< USER_STATS_CLEANUP_INTERVAL_SECS
|
||||
{
|
||||
return;
|
||||
}
|
||||
if self
|
||||
.user_stats_last_cleanup_epoch_secs
|
||||
.compare_exchange(
|
||||
last_cleanup_epoch_secs,
|
||||
now_epoch_secs,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Relaxed,
|
||||
)
|
||||
.is_err()
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
self.user_stats.retain(|_, stats| {
|
||||
if stats.curr_connects.load(Ordering::Relaxed) > 0 {
|
||||
return true;
|
||||
}
|
||||
let last_seen_epoch_secs = stats.last_seen_epoch_secs.load(Ordering::Relaxed);
|
||||
now_epoch_secs.saturating_sub(last_seen_epoch_secs) <= USER_STATS_IDLE_TTL_SECS
|
||||
});
|
||||
}
|
||||
|
||||
pub fn apply_telemetry_policy(&self, policy: TelemetryPolicy) {
|
||||
self.telemetry_core_enabled
|
||||
.store(policy.core_enabled, Ordering::Relaxed);
|
||||
@@ -172,6 +273,18 @@ impl Stats {
|
||||
self.connects_bad.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_current_connections_direct(&self) {
|
||||
self.current_connections_direct.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn decrement_current_connections_direct(&self) {
|
||||
Self::decrement_atomic_saturating(&self.current_connections_direct);
|
||||
}
|
||||
pub fn increment_current_connections_me(&self) {
|
||||
self.current_connections_me.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
pub fn decrement_current_connections_me(&self) {
|
||||
Self::decrement_atomic_saturating(&self.current_connections_me);
|
||||
}
|
||||
pub fn increment_handshake_timeouts(&self) {
|
||||
if self.telemetry_core_enabled() {
|
||||
self.handshake_timeouts.fetch_add(1, Ordering::Relaxed);
|
||||
@@ -396,6 +509,93 @@ impl Stats {
|
||||
self.me_route_drop_queue_full_high.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_writer_pick_success_try_total(&self, mode: MeWriterPickMode) {
|
||||
if !self.telemetry_me_allows_normal() {
|
||||
return;
|
||||
}
|
||||
match mode {
|
||||
MeWriterPickMode::SortedRr => {
|
||||
self.me_writer_pick_sorted_rr_success_try_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
MeWriterPickMode::P2c => {
|
||||
self.me_writer_pick_p2c_success_try_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn increment_me_writer_pick_success_fallback_total(&self, mode: MeWriterPickMode) {
|
||||
if !self.telemetry_me_allows_normal() {
|
||||
return;
|
||||
}
|
||||
match mode {
|
||||
MeWriterPickMode::SortedRr => {
|
||||
self.me_writer_pick_sorted_rr_success_fallback_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
MeWriterPickMode::P2c => {
|
||||
self.me_writer_pick_p2c_success_fallback_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn increment_me_writer_pick_full_total(&self, mode: MeWriterPickMode) {
|
||||
if !self.telemetry_me_allows_normal() {
|
||||
return;
|
||||
}
|
||||
match mode {
|
||||
MeWriterPickMode::SortedRr => {
|
||||
self.me_writer_pick_sorted_rr_full_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
MeWriterPickMode::P2c => {
|
||||
self.me_writer_pick_p2c_full_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn increment_me_writer_pick_closed_total(&self, mode: MeWriterPickMode) {
|
||||
if !self.telemetry_me_allows_normal() {
|
||||
return;
|
||||
}
|
||||
match mode {
|
||||
MeWriterPickMode::SortedRr => {
|
||||
self.me_writer_pick_sorted_rr_closed_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
MeWriterPickMode::P2c => {
|
||||
self.me_writer_pick_p2c_closed_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn increment_me_writer_pick_no_candidate_total(&self, mode: MeWriterPickMode) {
|
||||
if !self.telemetry_me_allows_normal() {
|
||||
return;
|
||||
}
|
||||
match mode {
|
||||
MeWriterPickMode::SortedRr => {
|
||||
self.me_writer_pick_sorted_rr_no_candidate_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
MeWriterPickMode::P2c => {
|
||||
self.me_writer_pick_p2c_no_candidate_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn increment_me_writer_pick_blocking_fallback_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_writer_pick_blocking_fallback_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_writer_pick_mode_switch_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_writer_pick_mode_switch_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_socks_kdf_strict_reject(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_socks_kdf_strict_reject.fetch_add(1, Ordering::Relaxed);
|
||||
@@ -522,6 +722,34 @@ impl Stats {
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_no_writer_failfast_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_no_writer_failfast_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_async_recovery_trigger_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_async_recovery_trigger_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_inline_recovery_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_inline_recovery_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_ip_reservation_rollback_tcp_limit_total(&self) {
|
||||
if self.telemetry_core_enabled() {
|
||||
self.ip_reservation_rollback_tcp_limit_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_ip_reservation_rollback_quota_limit_total(&self) {
|
||||
if self.telemetry_core_enabled() {
|
||||
self.ip_reservation_rollback_quota_limit_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_endpoint_quarantine_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_endpoint_quarantine_total
|
||||
@@ -611,8 +839,100 @@ impl Stats {
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_cpu_cores_detected_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_cpu_cores_detected_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_cpu_cores_effective_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_cpu_cores_effective_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_global_cap_raw_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_global_cap_raw_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_global_cap_effective_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_global_cap_effective_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_target_writers_total_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_target_writers_total_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_active_cap_configured_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_active_cap_configured_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_active_cap_effective_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_active_cap_effective_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_warm_cap_configured_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_warm_cap_configured_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_floor_warm_cap_effective_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_warm_cap_effective_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_writers_active_current_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_writers_active_current_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn set_me_writers_warm_current_gauge(&self, value: u64) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_writers_warm_current_gauge
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_floor_cap_block_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_cap_block_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_floor_swap_idle_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_swap_idle_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn increment_me_floor_swap_idle_failed_total(&self) {
|
||||
if self.telemetry_me_allows_normal() {
|
||||
self.me_floor_swap_idle_failed_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
pub fn get_connects_all(&self) -> u64 { self.connects_all.load(Ordering::Relaxed) }
|
||||
pub fn get_connects_bad(&self) -> u64 { self.connects_bad.load(Ordering::Relaxed) }
|
||||
pub fn get_current_connections_direct(&self) -> u64 {
|
||||
self.current_connections_direct.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_current_connections_me(&self) -> u64 {
|
||||
self.current_connections_me.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_current_connections_total(&self) -> u64 {
|
||||
self.get_current_connections_direct()
|
||||
.saturating_add(self.get_current_connections_me())
|
||||
}
|
||||
pub fn get_me_keepalive_sent(&self) -> u64 { self.me_keepalive_sent.load(Ordering::Relaxed) }
|
||||
pub fn get_me_keepalive_failed(&self) -> u64 { self.me_keepalive_failed.load(Ordering::Relaxed) }
|
||||
pub fn get_me_keepalive_pong(&self) -> u64 { self.me_keepalive_pong.load(Ordering::Relaxed) }
|
||||
@@ -706,6 +1026,58 @@ impl Stats {
|
||||
self.me_floor_mode_switch_adaptive_to_static_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_cpu_cores_detected_gauge(&self) -> u64 {
|
||||
self.me_floor_cpu_cores_detected_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_cpu_cores_effective_gauge(&self) -> u64 {
|
||||
self.me_floor_cpu_cores_effective_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_global_cap_raw_gauge(&self) -> u64 {
|
||||
self.me_floor_global_cap_raw_gauge.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_global_cap_effective_gauge(&self) -> u64 {
|
||||
self.me_floor_global_cap_effective_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_target_writers_total_gauge(&self) -> u64 {
|
||||
self.me_floor_target_writers_total_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_active_cap_configured_gauge(&self) -> u64 {
|
||||
self.me_floor_active_cap_configured_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_active_cap_effective_gauge(&self) -> u64 {
|
||||
self.me_floor_active_cap_effective_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_warm_cap_configured_gauge(&self) -> u64 {
|
||||
self.me_floor_warm_cap_configured_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_warm_cap_effective_gauge(&self) -> u64 {
|
||||
self.me_floor_warm_cap_effective_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writers_active_current_gauge(&self) -> u64 {
|
||||
self.me_writers_active_current_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writers_warm_current_gauge(&self) -> u64 {
|
||||
self.me_writers_warm_current_gauge
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_cap_block_total(&self) -> u64 {
|
||||
self.me_floor_cap_block_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_swap_idle_total(&self) -> u64 {
|
||||
self.me_floor_swap_idle_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_floor_swap_idle_failed_total(&self) -> u64 {
|
||||
self.me_floor_swap_idle_failed_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_handshake_error_code_counts(&self) -> Vec<(i32, u64)> {
|
||||
let mut out: Vec<(i32, u64)> = self
|
||||
.me_handshake_error_codes
|
||||
@@ -728,6 +1100,52 @@ impl Stats {
|
||||
pub fn get_me_route_drop_queue_full_high(&self) -> u64 {
|
||||
self.me_route_drop_queue_full_high.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_sorted_rr_success_try_total(&self) -> u64 {
|
||||
self.me_writer_pick_sorted_rr_success_try_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_sorted_rr_success_fallback_total(&self) -> u64 {
|
||||
self.me_writer_pick_sorted_rr_success_fallback_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_sorted_rr_full_total(&self) -> u64 {
|
||||
self.me_writer_pick_sorted_rr_full_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_sorted_rr_closed_total(&self) -> u64 {
|
||||
self.me_writer_pick_sorted_rr_closed_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_sorted_rr_no_candidate_total(&self) -> u64 {
|
||||
self.me_writer_pick_sorted_rr_no_candidate_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_p2c_success_try_total(&self) -> u64 {
|
||||
self.me_writer_pick_p2c_success_try_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_p2c_success_fallback_total(&self) -> u64 {
|
||||
self.me_writer_pick_p2c_success_fallback_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_p2c_full_total(&self) -> u64 {
|
||||
self.me_writer_pick_p2c_full_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_p2c_closed_total(&self) -> u64 {
|
||||
self.me_writer_pick_p2c_closed_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_p2c_no_candidate_total(&self) -> u64 {
|
||||
self.me_writer_pick_p2c_no_candidate_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_blocking_fallback_total(&self) -> u64 {
|
||||
self.me_writer_pick_blocking_fallback_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_writer_pick_mode_switch_total(&self) -> u64 {
|
||||
self.me_writer_pick_mode_switch_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_socks_kdf_strict_reject(&self) -> u64 {
|
||||
self.me_socks_kdf_strict_reject.load(Ordering::Relaxed)
|
||||
}
|
||||
@@ -791,25 +1209,58 @@ impl Stats {
|
||||
pub fn get_me_writer_restored_fallback_total(&self) -> u64 {
|
||||
self.me_writer_restored_fallback_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_no_writer_failfast_total(&self) -> u64 {
|
||||
self.me_no_writer_failfast_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_async_recovery_trigger_total(&self) -> u64 {
|
||||
self.me_async_recovery_trigger_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_me_inline_recovery_total(&self) -> u64 {
|
||||
self.me_inline_recovery_total.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_ip_reservation_rollback_tcp_limit_total(&self) -> u64 {
|
||||
self.ip_reservation_rollback_tcp_limit_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
pub fn get_ip_reservation_rollback_quota_limit_total(&self) -> u64 {
|
||||
self.ip_reservation_rollback_quota_limit_total
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn increment_user_connects(&self, user: &str) {
|
||||
if !self.telemetry_user_enabled() {
|
||||
return;
|
||||
}
|
||||
self.user_stats.entry(user.to_string()).or_default()
|
||||
.connects.fetch_add(1, Ordering::Relaxed);
|
||||
self.maybe_cleanup_user_stats();
|
||||
if let Some(stats) = self.user_stats.get(user) {
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.connects.fetch_add(1, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
let stats = self.user_stats.entry(user.to_string()).or_default();
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.connects.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn increment_user_curr_connects(&self, user: &str) {
|
||||
if !self.telemetry_user_enabled() {
|
||||
return;
|
||||
}
|
||||
self.user_stats.entry(user.to_string()).or_default()
|
||||
.curr_connects.fetch_add(1, Ordering::Relaxed);
|
||||
self.maybe_cleanup_user_stats();
|
||||
if let Some(stats) = self.user_stats.get(user) {
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.curr_connects.fetch_add(1, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
let stats = self.user_stats.entry(user.to_string()).or_default();
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.curr_connects.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn decrement_user_curr_connects(&self, user: &str) {
|
||||
self.maybe_cleanup_user_stats();
|
||||
if let Some(stats) = self.user_stats.get(user) {
|
||||
Self::touch_user_stats(stats.value());
|
||||
let counter = &stats.curr_connects;
|
||||
let mut current = counter.load(Ordering::Relaxed);
|
||||
loop {
|
||||
@@ -839,32 +1290,60 @@ impl Stats {
|
||||
if !self.telemetry_user_enabled() {
|
||||
return;
|
||||
}
|
||||
self.user_stats.entry(user.to_string()).or_default()
|
||||
.octets_from_client.fetch_add(bytes, Ordering::Relaxed);
|
||||
self.maybe_cleanup_user_stats();
|
||||
if let Some(stats) = self.user_stats.get(user) {
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.octets_from_client.fetch_add(bytes, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
let stats = self.user_stats.entry(user.to_string()).or_default();
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.octets_from_client.fetch_add(bytes, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn add_user_octets_to(&self, user: &str, bytes: u64) {
|
||||
if !self.telemetry_user_enabled() {
|
||||
return;
|
||||
}
|
||||
self.user_stats.entry(user.to_string()).or_default()
|
||||
.octets_to_client.fetch_add(bytes, Ordering::Relaxed);
|
||||
self.maybe_cleanup_user_stats();
|
||||
if let Some(stats) = self.user_stats.get(user) {
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.octets_to_client.fetch_add(bytes, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
let stats = self.user_stats.entry(user.to_string()).or_default();
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.octets_to_client.fetch_add(bytes, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn increment_user_msgs_from(&self, user: &str) {
|
||||
if !self.telemetry_user_enabled() {
|
||||
return;
|
||||
}
|
||||
self.user_stats.entry(user.to_string()).or_default()
|
||||
.msgs_from_client.fetch_add(1, Ordering::Relaxed);
|
||||
self.maybe_cleanup_user_stats();
|
||||
if let Some(stats) = self.user_stats.get(user) {
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.msgs_from_client.fetch_add(1, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
let stats = self.user_stats.entry(user.to_string()).or_default();
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.msgs_from_client.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn increment_user_msgs_to(&self, user: &str) {
|
||||
if !self.telemetry_user_enabled() {
|
||||
return;
|
||||
}
|
||||
self.user_stats.entry(user.to_string()).or_default()
|
||||
.msgs_to_client.fetch_add(1, Ordering::Relaxed);
|
||||
self.maybe_cleanup_user_stats();
|
||||
if let Some(stats) = self.user_stats.get(user) {
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.msgs_to_client.fetch_add(1, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
let stats = self.user_stats.entry(user.to_string()).or_default();
|
||||
Self::touch_user_stats(stats.value());
|
||||
stats.msgs_to_client.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn get_user_total_octets(&self, user: &str) -> u64 {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use bytes::Bytes;
|
||||
|
||||
use crate::crypto::{AesCbc, crc32, crc32c};
|
||||
use crate::error::{ProxyError, Result};
|
||||
@@ -6,8 +7,8 @@ use crate::protocol::constants::*;
|
||||
|
||||
/// Commands sent to dedicated writer tasks to avoid mutex contention on TCP writes.
|
||||
pub(crate) enum WriterCommand {
|
||||
Data(Vec<u8>),
|
||||
DataAndFlush(Vec<u8>),
|
||||
Data(Bytes),
|
||||
DataAndFlush(Bytes),
|
||||
Close,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::hash::{DefaultHasher, Hash, Hasher};
|
||||
use std::net::IpAddr;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -14,6 +15,7 @@ use crate::error::Result;
|
||||
use super::MePool;
|
||||
use super::rotation::{MeReinitTrigger, enqueue_reinit_trigger};
|
||||
use super::secret::download_proxy_secret_with_max_len;
|
||||
use super::selftest::record_timeskew_sample;
|
||||
use std::time::SystemTime;
|
||||
|
||||
async fn retry_fetch(url: &str) -> Option<ProxyConfigData> {
|
||||
@@ -42,6 +44,88 @@ pub struct ProxyConfigData {
|
||||
pub proxy_for_lines: u32,
|
||||
}
|
||||
|
||||
pub fn parse_proxy_config_text(text: &str, http_status: u16) -> ProxyConfigData {
|
||||
let mut map: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
|
||||
let mut proxy_for_lines: u32 = 0;
|
||||
for line in text.lines() {
|
||||
if let Some((dc, ip, port)) = parse_proxy_line(line) {
|
||||
map.entry(dc).or_default().push((ip, port));
|
||||
proxy_for_lines = proxy_for_lines.saturating_add(1);
|
||||
}
|
||||
}
|
||||
|
||||
let default_dc = text.lines().find_map(|l| {
|
||||
let t = l.trim();
|
||||
if let Some(rest) = t.strip_prefix("default") {
|
||||
return rest.trim().trim_end_matches(';').parse::<i32>().ok();
|
||||
}
|
||||
None
|
||||
});
|
||||
|
||||
ProxyConfigData {
|
||||
map,
|
||||
default_dc,
|
||||
http_status,
|
||||
proxy_for_lines,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn load_proxy_config_cache(path: &str) -> Result<ProxyConfigData> {
|
||||
let text = tokio::fs::read_to_string(path).await.map_err(|e| {
|
||||
crate::error::ProxyError::Proxy(format!("read proxy-config cache '{path}' failed: {e}"))
|
||||
})?;
|
||||
Ok(parse_proxy_config_text(&text, 200))
|
||||
}
|
||||
|
||||
pub async fn save_proxy_config_cache(path: &str, raw_text: &str) -> Result<()> {
|
||||
if let Some(parent) = Path::new(path).parent()
|
||||
&& !parent.as_os_str().is_empty()
|
||||
{
|
||||
tokio::fs::create_dir_all(parent).await.map_err(|e| {
|
||||
crate::error::ProxyError::Proxy(format!(
|
||||
"create proxy-config cache dir '{}' failed: {e}",
|
||||
parent.display()
|
||||
))
|
||||
})?;
|
||||
}
|
||||
|
||||
tokio::fs::write(path, raw_text).await.map_err(|e| {
|
||||
crate::error::ProxyError::Proxy(format!("write proxy-config cache '{path}' failed: {e}"))
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn fetch_proxy_config_with_raw(url: &str) -> Result<(ProxyConfigData, String)> {
|
||||
let resp = reqwest::get(url)
|
||||
.await
|
||||
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config GET failed: {e}")))?
|
||||
;
|
||||
let http_status = resp.status().as_u16();
|
||||
|
||||
if let Some(date) = resp.headers().get(reqwest::header::DATE)
|
||||
&& let Ok(date_str) = date.to_str()
|
||||
&& let Ok(server_time) = httpdate::parse_http_date(date_str)
|
||||
&& let Ok(skew) = SystemTime::now().duration_since(server_time).or_else(|e| {
|
||||
server_time.duration_since(SystemTime::now()).map_err(|_| e)
|
||||
})
|
||||
{
|
||||
let skew_secs = skew.as_secs();
|
||||
record_timeskew_sample("proxy_config_date_header", skew_secs);
|
||||
if skew_secs > 60 {
|
||||
warn!(skew_secs, "Time skew >60s detected from fetch_proxy_config Date header");
|
||||
} else if skew_secs > 30 {
|
||||
warn!(skew_secs, "Time skew >30s detected from fetch_proxy_config Date header");
|
||||
}
|
||||
}
|
||||
|
||||
let text = resp
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config read failed: {e}")))?;
|
||||
let parsed = parse_proxy_config_text(&text, http_status);
|
||||
Ok((parsed, text))
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct StableSnapshot {
|
||||
candidate_hash: Option<u64>,
|
||||
@@ -170,61 +254,9 @@ fn parse_proxy_line(line: &str) -> Option<(i32, IpAddr, u16)> {
|
||||
}
|
||||
|
||||
pub async fn fetch_proxy_config(url: &str) -> Result<ProxyConfigData> {
|
||||
let resp = reqwest::get(url)
|
||||
fetch_proxy_config_with_raw(url)
|
||||
.await
|
||||
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config GET failed: {e}")))?
|
||||
;
|
||||
let http_status = resp.status().as_u16();
|
||||
|
||||
if let Some(date) = resp.headers().get(reqwest::header::DATE)
|
||||
&& let Ok(date_str) = date.to_str()
|
||||
&& let Ok(server_time) = httpdate::parse_http_date(date_str)
|
||||
&& let Ok(skew) = SystemTime::now().duration_since(server_time).or_else(|e| {
|
||||
server_time.duration_since(SystemTime::now()).map_err(|_| e)
|
||||
})
|
||||
{
|
||||
let skew_secs = skew.as_secs();
|
||||
if skew_secs > 60 {
|
||||
warn!(skew_secs, "Time skew >60s detected from fetch_proxy_config Date header");
|
||||
} else if skew_secs > 30 {
|
||||
warn!(skew_secs, "Time skew >30s detected from fetch_proxy_config Date header");
|
||||
}
|
||||
}
|
||||
|
||||
let text = resp
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config read failed: {e}")))?;
|
||||
|
||||
let mut map: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
|
||||
let mut proxy_for_lines: u32 = 0;
|
||||
for line in text.lines() {
|
||||
if let Some((dc, ip, port)) = parse_proxy_line(line) {
|
||||
map.entry(dc).or_default().push((ip, port));
|
||||
proxy_for_lines = proxy_for_lines.saturating_add(1);
|
||||
}
|
||||
}
|
||||
|
||||
let default_dc = text
|
||||
.lines()
|
||||
.find_map(|l| {
|
||||
let t = l.trim();
|
||||
if let Some(rest) = t.strip_prefix("default") {
|
||||
return rest
|
||||
.trim()
|
||||
.trim_end_matches(';')
|
||||
.parse::<i32>()
|
||||
.ok();
|
||||
}
|
||||
None
|
||||
});
|
||||
|
||||
Ok(ProxyConfigData {
|
||||
map,
|
||||
default_dc,
|
||||
http_status,
|
||||
proxy_for_lines,
|
||||
})
|
||||
.map(|(parsed, _raw)| parsed)
|
||||
}
|
||||
|
||||
fn snapshot_passes_guards(
|
||||
@@ -276,6 +308,8 @@ async fn run_update_cycle(
|
||||
cfg.general.me_bind_stale_ttl_secs,
|
||||
cfg.general.me_secret_atomic_snapshot,
|
||||
cfg.general.me_deterministic_writer_sort,
|
||||
cfg.general.me_writer_pick_mode,
|
||||
cfg.general.me_writer_pick_sample_size,
|
||||
cfg.general.me_single_endpoint_shadow_writers,
|
||||
cfg.general.me_single_endpoint_outage_mode_enabled,
|
||||
cfg.general.me_single_endpoint_outage_disable_quarantine,
|
||||
@@ -285,7 +319,19 @@ async fn run_update_cycle(
|
||||
cfg.general.me_floor_mode,
|
||||
cfg.general.me_adaptive_floor_idle_secs,
|
||||
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
cfg.general.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
cfg.general.me_adaptive_floor_recover_grace_secs,
|
||||
cfg.general.me_adaptive_floor_writers_per_core_total,
|
||||
cfg.general.me_adaptive_floor_cpu_cores_override,
|
||||
cfg.general.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
cfg.general.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
cfg.general.me_adaptive_floor_max_active_writers_per_core,
|
||||
cfg.general.me_adaptive_floor_max_warm_writers_per_core,
|
||||
cfg.general.me_adaptive_floor_max_active_writers_global,
|
||||
cfg.general.me_adaptive_floor_max_warm_writers_global,
|
||||
cfg.general.me_health_interval_ms_unhealthy,
|
||||
cfg.general.me_health_interval_ms_healthy,
|
||||
cfg.general.me_warn_rate_limit_ms,
|
||||
);
|
||||
|
||||
let required_cfg_snapshots = cfg.general.me_config_stable_snapshots.max(1);
|
||||
@@ -488,6 +534,8 @@ pub async fn me_config_updater(
|
||||
cfg.general.me_bind_stale_ttl_secs,
|
||||
cfg.general.me_secret_atomic_snapshot,
|
||||
cfg.general.me_deterministic_writer_sort,
|
||||
cfg.general.me_writer_pick_mode,
|
||||
cfg.general.me_writer_pick_sample_size,
|
||||
cfg.general.me_single_endpoint_shadow_writers,
|
||||
cfg.general.me_single_endpoint_outage_mode_enabled,
|
||||
cfg.general.me_single_endpoint_outage_disable_quarantine,
|
||||
@@ -497,7 +545,19 @@ pub async fn me_config_updater(
|
||||
cfg.general.me_floor_mode,
|
||||
cfg.general.me_adaptive_floor_idle_secs,
|
||||
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
|
||||
cfg.general.me_adaptive_floor_min_writers_multi_endpoint,
|
||||
cfg.general.me_adaptive_floor_recover_grace_secs,
|
||||
cfg.general.me_adaptive_floor_writers_per_core_total,
|
||||
cfg.general.me_adaptive_floor_cpu_cores_override,
|
||||
cfg.general.me_adaptive_floor_max_extra_writers_single_per_core,
|
||||
cfg.general.me_adaptive_floor_max_extra_writers_multi_per_core,
|
||||
cfg.general.me_adaptive_floor_max_active_writers_per_core,
|
||||
cfg.general.me_adaptive_floor_max_warm_writers_per_core,
|
||||
cfg.general.me_adaptive_floor_max_active_writers_global,
|
||||
cfg.general.me_adaptive_floor_max_warm_writers_global,
|
||||
cfg.general.me_health_interval_ms_unhealthy,
|
||||
cfg.general.me_health_interval_ms_healthy,
|
||||
cfg.general.me_warn_rate_limit_ms,
|
||||
);
|
||||
let new_secs = cfg.general.effective_update_every_secs().max(1);
|
||||
if new_secs == update_every_secs {
|
||||
|
||||
@@ -33,6 +33,7 @@ use super::codec::{
|
||||
cbc_decrypt_inplace, cbc_encrypt_padded, parse_handshake_flags, parse_nonce_payload,
|
||||
read_rpc_frame_plaintext, rpc_crc,
|
||||
};
|
||||
use super::selftest::{BndAddrStatus, BndPortStatus, record_bnd_status, record_upstream_bnd_status};
|
||||
use super::wire::{extract_ip_material, IpMaterial};
|
||||
use super::MePool;
|
||||
|
||||
@@ -84,38 +85,7 @@ impl MePool {
|
||||
}
|
||||
|
||||
async fn resolve_dc_idx_for_endpoint(&self, addr: SocketAddr) -> Option<i16> {
|
||||
if addr.is_ipv4() {
|
||||
let map = self.proxy_map_v4.read().await;
|
||||
for (dc, addrs) in map.iter() {
|
||||
if addrs
|
||||
.iter()
|
||||
.any(|(ip, port)| SocketAddr::new(*ip, *port) == addr)
|
||||
{
|
||||
let abs_dc = dc.abs();
|
||||
if abs_dc > 0
|
||||
&& let Ok(dc_idx) = i16::try_from(abs_dc)
|
||||
{
|
||||
return Some(dc_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let map = self.proxy_map_v6.read().await;
|
||||
for (dc, addrs) in map.iter() {
|
||||
if addrs
|
||||
.iter()
|
||||
.any(|(ip, port)| SocketAddr::new(*ip, *port) == addr)
|
||||
{
|
||||
let abs_dc = dc.abs();
|
||||
if abs_dc > 0
|
||||
&& let Ok(dc_idx) = i16::try_from(abs_dc)
|
||||
{
|
||||
return Some(dc_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
i16::try_from(self.resolve_dc_for_endpoint(addr).await).ok()
|
||||
}
|
||||
|
||||
fn direct_bind_ip_for_stun(
|
||||
@@ -162,14 +132,27 @@ impl MePool {
|
||||
)
|
||||
}
|
||||
|
||||
fn bnd_port_status(bound: Option<SocketAddr>) -> BndPortStatus {
|
||||
match bound {
|
||||
Some(addr) if addr.port() == 0 => BndPortStatus::Zero,
|
||||
Some(_) => BndPortStatus::Ok,
|
||||
None => BndPortStatus::Error,
|
||||
}
|
||||
}
|
||||
|
||||
/// TCP connect with timeout + return RTT in milliseconds.
|
||||
pub(crate) async fn connect_tcp(
|
||||
&self,
|
||||
addr: SocketAddr,
|
||||
dc_idx_override: Option<i16>,
|
||||
) -> Result<(TcpStream, f64, Option<UpstreamEgressInfo>)> {
|
||||
let start = Instant::now();
|
||||
let (stream, upstream_egress) = if let Some(upstream) = &self.upstream {
|
||||
let dc_idx = self.resolve_dc_idx_for_endpoint(addr).await;
|
||||
let dc_idx = if let Some(dc_idx) = dc_idx_override {
|
||||
Some(dc_idx)
|
||||
} else {
|
||||
self.resolve_dc_idx_for_endpoint(addr).await
|
||||
};
|
||||
let (stream, egress) = upstream.connect_with_details(addr, dc_idx, None).await?;
|
||||
(stream, Some(egress))
|
||||
} else {
|
||||
@@ -216,10 +199,26 @@ impl MePool {
|
||||
|
||||
fn configure_keepalive(stream: &TcpStream) -> std::io::Result<()> {
|
||||
let sock = SockRef::from(stream);
|
||||
let ka = TcpKeepalive::new()
|
||||
.with_time(Duration::from_secs(30))
|
||||
.with_interval(Duration::from_secs(10))
|
||||
.with_retries(3);
|
||||
let ka = TcpKeepalive::new().with_time(Duration::from_secs(30));
|
||||
|
||||
// Mirror socket2 v0.5.10 target gate for with_retries(), the stricter method.
|
||||
#[cfg(any(
|
||||
target_os = "android",
|
||||
target_os = "dragonfly",
|
||||
target_os = "freebsd",
|
||||
target_os = "fuchsia",
|
||||
target_os = "illumos",
|
||||
target_os = "ios",
|
||||
target_os = "visionos",
|
||||
target_os = "linux",
|
||||
target_os = "macos",
|
||||
target_os = "netbsd",
|
||||
target_os = "tvos",
|
||||
target_os = "watchos",
|
||||
target_os = "cygwin",
|
||||
))]
|
||||
let ka = ka.with_interval(Duration::from_secs(10)).with_retries(3);
|
||||
|
||||
sock.set_tcp_keepalive(&ka)?;
|
||||
sock.set_keepalive(true)?;
|
||||
Ok(())
|
||||
@@ -265,7 +264,27 @@ impl MePool {
|
||||
IpFamily::V6
|
||||
};
|
||||
let is_socks_route = Self::is_socks_route(upstream_egress);
|
||||
let raw_socks_bound_addr = if is_socks_route {
|
||||
upstream_egress.and_then(|info| info.socks_bound_addr)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let socks_bound_addr = Self::select_socks_bound_addr(family, upstream_egress);
|
||||
let bnd_addr_status = if !is_socks_route {
|
||||
BndAddrStatus::Error
|
||||
} else if raw_socks_bound_addr.is_some() && socks_bound_addr.is_none() {
|
||||
BndAddrStatus::Bogon
|
||||
} else if socks_bound_addr.is_some() {
|
||||
BndAddrStatus::Ok
|
||||
} else {
|
||||
BndAddrStatus::Error
|
||||
};
|
||||
let bnd_port_status = if is_socks_route {
|
||||
Self::bnd_port_status(raw_socks_bound_addr)
|
||||
} else {
|
||||
BndPortStatus::Error
|
||||
};
|
||||
record_bnd_status(bnd_addr_status, bnd_port_status, raw_socks_bound_addr);
|
||||
let reflected = if let Some(bound) = socks_bound_addr {
|
||||
Some(bound)
|
||||
} else if is_socks_route {
|
||||
@@ -296,6 +315,18 @@ impl MePool {
|
||||
|
||||
let local_addr_nat = self.translate_our_addr_with_reflection(local_addr, reflected);
|
||||
let peer_addr_nat = SocketAddr::new(self.translate_ip_for_nat(peer_addr.ip()), peer_addr.port());
|
||||
if let Some(upstream_info) = upstream_egress {
|
||||
let client_ip_for_kdf = socks_bound_addr
|
||||
.map(|value| value.ip())
|
||||
.unwrap_or(local_addr_nat.ip());
|
||||
record_upstream_bnd_status(
|
||||
upstream_info.upstream_id,
|
||||
bnd_addr_status,
|
||||
bnd_port_status,
|
||||
raw_socks_bound_addr,
|
||||
Some(client_ip_for_kdf),
|
||||
);
|
||||
}
|
||||
let (mut rd, mut wr) = tokio::io::split(stream);
|
||||
|
||||
let my_nonce: [u8; 16] = rng.bytes(16).try_into().unwrap();
|
||||
@@ -387,9 +418,11 @@ impl MePool {
|
||||
socks_bound_addr.map(|value| value.ip()),
|
||||
client_port_source,
|
||||
);
|
||||
let mut kdf_fingerprint_guard = self.kdf_material_fingerprint.lock().await;
|
||||
if let Some((prev_fingerprint, prev_client_port)) =
|
||||
let previous_kdf_fingerprint = {
|
||||
let kdf_fingerprint_guard = self.kdf_material_fingerprint.read().await;
|
||||
kdf_fingerprint_guard.get(&peer_addr_nat).copied()
|
||||
};
|
||||
if let Some((prev_fingerprint, prev_client_port)) = previous_kdf_fingerprint
|
||||
{
|
||||
if prev_fingerprint != kdf_fingerprint {
|
||||
self.stats.increment_me_kdf_drift_total();
|
||||
@@ -416,6 +449,9 @@ impl MePool {
|
||||
);
|
||||
}
|
||||
}
|
||||
// Keep fingerprint updates eventually consistent for diagnostics while avoiding
|
||||
// serializing all concurrent handshakes on a single async mutex.
|
||||
let mut kdf_fingerprint_guard = self.kdf_material_fingerprint.write().await;
|
||||
kdf_fingerprint_guard.insert(peer_addr_nat, (kdf_fingerprint, client_port_for_kdf));
|
||||
drop(kdf_fingerprint_guard);
|
||||
|
||||
@@ -677,3 +713,66 @@ fn hex_dump(data: &[u8]) -> String {
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::ErrorKind;
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_configure_keepalive_loopback() {
|
||||
let listener = match TcpListener::bind("127.0.0.1:0").await {
|
||||
Ok(listener) => listener,
|
||||
Err(error) if error.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(error) => panic!("bind failed: {error}"),
|
||||
};
|
||||
|
||||
let addr = match listener.local_addr() {
|
||||
Ok(addr) => addr,
|
||||
Err(error) => panic!("local_addr failed: {error}"),
|
||||
};
|
||||
|
||||
let stream = match TcpStream::connect(addr).await {
|
||||
Ok(stream) => stream,
|
||||
Err(error) if error.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(error) => panic!("connect failed: {error}"),
|
||||
};
|
||||
|
||||
if let Err(error) = MePool::configure_keepalive(&stream) {
|
||||
if error.kind() == ErrorKind::PermissionDenied {
|
||||
return;
|
||||
}
|
||||
panic!("configure_keepalive failed: {error}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "openbsd")]
|
||||
fn test_openbsd_keepalive_cfg_path_compiles() {
|
||||
let _ka = TcpKeepalive::new().with_time(Duration::from_secs(30));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(any(
|
||||
target_os = "android",
|
||||
target_os = "dragonfly",
|
||||
target_os = "freebsd",
|
||||
target_os = "fuchsia",
|
||||
target_os = "illumos",
|
||||
target_os = "ios",
|
||||
target_os = "visionos",
|
||||
target_os = "linux",
|
||||
target_os = "macos",
|
||||
target_os = "netbsd",
|
||||
target_os = "tvos",
|
||||
target_os = "watchos",
|
||||
target_os = "cygwin",
|
||||
))]
|
||||
fn test_retry_keepalive_cfg_path_compiles() {
|
||||
let _ka = TcpKeepalive::new()
|
||||
.with_time(Duration::from_secs(30))
|
||||
.with_interval(Duration::from_secs(10))
|
||||
.with_retries(3);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@ use crate::network::IpFamily;
|
||||
|
||||
use super::MePool;
|
||||
|
||||
const HEALTH_INTERVAL_SECS: u64 = 1;
|
||||
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
|
||||
#[allow(dead_code)]
|
||||
const MAX_CONCURRENT_PER_DC_DEFAULT: usize = 1;
|
||||
@@ -22,6 +21,34 @@ const IDLE_REFRESH_TRIGGER_BASE_SECS: u64 = 45;
|
||||
const IDLE_REFRESH_TRIGGER_JITTER_SECS: u64 = 5;
|
||||
const IDLE_REFRESH_RETRY_SECS: u64 = 8;
|
||||
const IDLE_REFRESH_SUCCESS_GUARD_SECS: u64 = 5;
|
||||
const HEALTH_RECONNECT_BUDGET_PER_CORE: usize = 2;
|
||||
const HEALTH_RECONNECT_BUDGET_PER_DC: usize = 1;
|
||||
const HEALTH_RECONNECT_BUDGET_MIN: usize = 4;
|
||||
const HEALTH_RECONNECT_BUDGET_MAX: usize = 128;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct DcFloorPlanEntry {
|
||||
dc: i32,
|
||||
endpoints: Vec<SocketAddr>,
|
||||
alive: usize,
|
||||
min_required: usize,
|
||||
target_required: usize,
|
||||
max_required: usize,
|
||||
has_bound_clients: bool,
|
||||
floor_capped: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct FamilyFloorPlan {
|
||||
by_dc: HashMap<i32, DcFloorPlanEntry>,
|
||||
active_cap_configured_total: usize,
|
||||
active_cap_effective_total: usize,
|
||||
warm_cap_configured_total: usize,
|
||||
warm_cap_effective_total: usize,
|
||||
active_writers_current: usize,
|
||||
warm_writers_current: usize,
|
||||
target_writers_total: usize,
|
||||
}
|
||||
|
||||
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
|
||||
let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new();
|
||||
@@ -34,10 +61,18 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
let mut idle_refresh_next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut adaptive_idle_since: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut adaptive_recover_until: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut floor_warn_next_allowed: HashMap<(i32, IpFamily), Instant> = HashMap::new();
|
||||
let mut degraded_interval = true;
|
||||
loop {
|
||||
tokio::time::sleep(Duration::from_secs(HEALTH_INTERVAL_SECS)).await;
|
||||
let interval = if degraded_interval {
|
||||
pool.health_interval_unhealthy()
|
||||
} else {
|
||||
pool.health_interval_healthy()
|
||||
};
|
||||
tokio::time::sleep(interval).await;
|
||||
pool.prune_closed_writers().await;
|
||||
check_family(
|
||||
reap_draining_writers(&pool).await;
|
||||
let v4_degraded = check_family(
|
||||
IpFamily::V4,
|
||||
&pool,
|
||||
&rng,
|
||||
@@ -51,9 +86,10 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
&mut idle_refresh_next_attempt,
|
||||
&mut adaptive_idle_since,
|
||||
&mut adaptive_recover_until,
|
||||
&mut floor_warn_next_allowed,
|
||||
)
|
||||
.await;
|
||||
check_family(
|
||||
let v6_degraded = check_family(
|
||||
IpFamily::V6,
|
||||
&pool,
|
||||
&rng,
|
||||
@@ -67,8 +103,32 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
|
||||
&mut idle_refresh_next_attempt,
|
||||
&mut adaptive_idle_since,
|
||||
&mut adaptive_recover_until,
|
||||
&mut floor_warn_next_allowed,
|
||||
)
|
||||
.await;
|
||||
degraded_interval = v4_degraded || v6_degraded;
|
||||
}
|
||||
}
|
||||
|
||||
async fn reap_draining_writers(pool: &Arc<MePool>) {
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let writers = pool.writers.read().await.clone();
|
||||
for writer in writers {
|
||||
if !writer.draining.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
continue;
|
||||
}
|
||||
if pool.registry.is_writer_empty(writer.id).await {
|
||||
pool.remove_writer_and_close_clients(writer.id).await;
|
||||
continue;
|
||||
}
|
||||
let deadline_epoch_secs = writer
|
||||
.drain_deadline_epoch_secs
|
||||
.load(std::sync::atomic::Ordering::Relaxed);
|
||||
if deadline_epoch_secs != 0 && now_epoch_secs >= deadline_epoch_secs {
|
||||
warn!(writer_id = writer.id, "Drain timeout, force-closing");
|
||||
pool.stats.increment_pool_force_close_total();
|
||||
pool.remove_writer_and_close_clients(writer.id).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,71 +146,107 @@ async fn check_family(
|
||||
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) {
|
||||
floor_warn_next_allowed: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) -> bool {
|
||||
let enabled = match family {
|
||||
IpFamily::V4 => pool.decision.ipv4_me,
|
||||
IpFamily::V6 => pool.decision.ipv6_me,
|
||||
};
|
||||
if !enabled {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
let map = match family {
|
||||
IpFamily::V4 => pool.proxy_map_v4.read().await.clone(),
|
||||
IpFamily::V6 => pool.proxy_map_v6.read().await.clone(),
|
||||
};
|
||||
let mut family_degraded = false;
|
||||
|
||||
let mut dc_endpoints = HashMap::<i32, Vec<SocketAddr>>::new();
|
||||
for (dc, addrs) in map {
|
||||
let entry = dc_endpoints.entry(dc.abs()).or_default();
|
||||
for (ip, port) in addrs {
|
||||
let map_guard = match family {
|
||||
IpFamily::V4 => pool.proxy_map_v4.read().await,
|
||||
IpFamily::V6 => pool.proxy_map_v6.read().await,
|
||||
};
|
||||
for (dc, addrs) in map_guard.iter() {
|
||||
let entry = dc_endpoints.entry(*dc).or_default();
|
||||
for (ip, port) in addrs.iter().copied() {
|
||||
entry.push(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
drop(map_guard);
|
||||
for endpoints in dc_endpoints.values_mut() {
|
||||
endpoints.sort_unstable();
|
||||
endpoints.dedup();
|
||||
}
|
||||
let mut reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len());
|
||||
|
||||
if pool.floor_mode() == MeFloorMode::Static {
|
||||
adaptive_idle_since.clear();
|
||||
adaptive_recover_until.clear();
|
||||
}
|
||||
|
||||
let mut live_addr_counts = HashMap::<SocketAddr, usize>::new();
|
||||
let mut live_writer_ids_by_addr = HashMap::<SocketAddr, Vec<u64>>::new();
|
||||
let mut live_addr_counts = HashMap::<(i32, SocketAddr), usize>::new();
|
||||
let mut live_writer_ids_by_addr = HashMap::<(i32, SocketAddr), Vec<u64>>::new();
|
||||
for writer in pool.writers.read().await.iter().filter(|w| {
|
||||
!w.draining.load(std::sync::atomic::Ordering::Relaxed)
|
||||
}) {
|
||||
*live_addr_counts.entry(writer.addr).or_insert(0) += 1;
|
||||
if !matches!(
|
||||
super::pool::WriterContour::from_u8(
|
||||
writer.contour.load(std::sync::atomic::Ordering::Relaxed),
|
||||
),
|
||||
super::pool::WriterContour::Active
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
let key = (writer.writer_dc, writer.addr);
|
||||
*live_addr_counts.entry(key).or_insert(0) += 1;
|
||||
live_writer_ids_by_addr
|
||||
.entry(writer.addr)
|
||||
.entry(key)
|
||||
.or_default()
|
||||
.push(writer.id);
|
||||
}
|
||||
let writer_idle_since = pool.registry.writer_idle_since_snapshot().await;
|
||||
let bound_clients_by_writer = pool
|
||||
.registry
|
||||
.writer_activity_snapshot()
|
||||
.await
|
||||
.bound_clients_by_writer;
|
||||
let floor_plan = build_family_floor_plan(
|
||||
pool,
|
||||
family,
|
||||
&dc_endpoints,
|
||||
&live_addr_counts,
|
||||
&live_writer_ids_by_addr,
|
||||
&bound_clients_by_writer,
|
||||
adaptive_idle_since,
|
||||
adaptive_recover_until,
|
||||
)
|
||||
.await;
|
||||
pool.set_adaptive_floor_runtime_caps(
|
||||
floor_plan.active_cap_configured_total,
|
||||
floor_plan.active_cap_effective_total,
|
||||
floor_plan.warm_cap_configured_total,
|
||||
floor_plan.warm_cap_effective_total,
|
||||
floor_plan.target_writers_total,
|
||||
floor_plan.active_writers_current,
|
||||
floor_plan.warm_writers_current,
|
||||
);
|
||||
|
||||
for (dc, endpoints) in dc_endpoints {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let key = (dc, family);
|
||||
let reduce_for_idle = should_reduce_floor_for_idle(
|
||||
pool,
|
||||
key,
|
||||
&endpoints,
|
||||
&live_writer_ids_by_addr,
|
||||
adaptive_idle_since,
|
||||
adaptive_recover_until,
|
||||
)
|
||||
.await;
|
||||
let required = pool.required_writers_for_dc_with_floor_mode(endpoints.len(), reduce_for_idle);
|
||||
let required = floor_plan
|
||||
.by_dc
|
||||
.get(&dc)
|
||||
.map(|entry| entry.target_required)
|
||||
.unwrap_or_else(|| {
|
||||
pool.required_writers_for_dc_with_floor_mode(endpoints.len(), false)
|
||||
});
|
||||
let alive = endpoints
|
||||
.iter()
|
||||
.map(|addr| *live_addr_counts.get(addr).unwrap_or(&0))
|
||||
.map(|addr| *live_addr_counts.get(&(dc, *addr)).unwrap_or(&0))
|
||||
.sum::<usize>();
|
||||
|
||||
if endpoints.len() == 1 && pool.single_endpoint_outage_mode_enabled() && alive == 0 {
|
||||
family_degraded = true;
|
||||
if single_endpoint_outage.insert(key) {
|
||||
pool.stats.increment_me_single_endpoint_outage_enter_total();
|
||||
warn!(
|
||||
@@ -170,6 +266,7 @@ async fn check_family(
|
||||
required,
|
||||
outage_backoff,
|
||||
outage_next_attempt,
|
||||
&mut reconnect_budget,
|
||||
)
|
||||
.await;
|
||||
continue;
|
||||
@@ -205,6 +302,7 @@ async fn check_family(
|
||||
required,
|
||||
&live_writer_ids_by_addr,
|
||||
&writer_idle_since,
|
||||
&bound_clients_by_writer,
|
||||
idle_refresh_next_attempt,
|
||||
)
|
||||
.await;
|
||||
@@ -218,14 +316,34 @@ async fn check_family(
|
||||
alive,
|
||||
required,
|
||||
&live_writer_ids_by_addr,
|
||||
&bound_clients_by_writer,
|
||||
shadow_rotate_deadline,
|
||||
)
|
||||
.await;
|
||||
continue;
|
||||
}
|
||||
let missing = required - alive;
|
||||
family_degraded = true;
|
||||
|
||||
let now = Instant::now();
|
||||
if reconnect_budget == 0 {
|
||||
let base_ms = pool.me_reconnect_backoff_base.as_millis() as u64;
|
||||
let next_ms = (*backoff.get(&key).unwrap_or(&base_ms)).max(base_ms);
|
||||
let jitter = next_ms / JITTER_FRAC_NUM;
|
||||
let wait = Duration::from_millis(next_ms)
|
||||
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
|
||||
next_attempt.insert(key, now + wait);
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive,
|
||||
required,
|
||||
endpoint_count = endpoints.len(),
|
||||
reconnect_budget,
|
||||
"Skipping reconnect due to per-tick health reconnect budget"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if let Some(ts) = next_attempt.get(&key)
|
||||
&& now < *ts
|
||||
{
|
||||
@@ -236,7 +354,10 @@ async fn check_family(
|
||||
if *inflight.get(&key).unwrap_or(&0) >= max_concurrent {
|
||||
continue;
|
||||
}
|
||||
if pool.has_refill_inflight_for_endpoints(&endpoints).await {
|
||||
if pool
|
||||
.has_refill_inflight_for_dc_key(super::pool::RefillDcKey { dc, family })
|
||||
.await
|
||||
{
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
@@ -251,9 +372,44 @@ async fn check_family(
|
||||
|
||||
let mut restored = 0usize;
|
||||
for _ in 0..missing {
|
||||
if reconnect_budget == 0 {
|
||||
break;
|
||||
}
|
||||
reconnect_budget = reconnect_budget.saturating_sub(1);
|
||||
if pool.active_contour_writer_count_total().await
|
||||
>= floor_plan.active_cap_effective_total
|
||||
{
|
||||
let swapped = maybe_swap_idle_writer_for_cap(
|
||||
pool,
|
||||
rng,
|
||||
dc,
|
||||
family,
|
||||
&endpoints,
|
||||
&live_writer_ids_by_addr,
|
||||
&writer_idle_since,
|
||||
&bound_clients_by_writer,
|
||||
)
|
||||
.await;
|
||||
if swapped {
|
||||
pool.stats.increment_me_floor_swap_idle_total();
|
||||
restored += 1;
|
||||
continue;
|
||||
}
|
||||
pool.stats.increment_me_floor_cap_block_total();
|
||||
pool.stats.increment_me_floor_swap_idle_failed_total();
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive,
|
||||
required,
|
||||
active_cap_effective_total = floor_plan.active_cap_effective_total,
|
||||
"Adaptive floor cap reached, reconnect attempt blocked"
|
||||
);
|
||||
break;
|
||||
}
|
||||
let res = tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_endpoints_round_robin(&endpoints, rng.as_ref()),
|
||||
pool.connect_endpoints_round_robin(dc, &endpoints, rng.as_ref()),
|
||||
)
|
||||
.await;
|
||||
match res {
|
||||
@@ -295,20 +451,373 @@ async fn check_family(
|
||||
let wait = Duration::from_millis(next_ms)
|
||||
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
|
||||
next_attempt.insert(key, now + wait);
|
||||
warn!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive = now_alive,
|
||||
required,
|
||||
endpoint_count = endpoints.len(),
|
||||
backoff_ms = next_ms,
|
||||
"DC writer floor is below required level, scheduled reconnect"
|
||||
);
|
||||
if pool.is_runtime_ready() {
|
||||
let warn_cooldown = pool.warn_rate_limit_duration();
|
||||
if should_emit_rate_limited_warn(
|
||||
floor_warn_next_allowed,
|
||||
key,
|
||||
now,
|
||||
warn_cooldown,
|
||||
) {
|
||||
warn!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive = now_alive,
|
||||
required,
|
||||
endpoint_count = endpoints.len(),
|
||||
backoff_ms = next_ms,
|
||||
"DC writer floor is below required level, scheduled reconnect"
|
||||
);
|
||||
}
|
||||
} else {
|
||||
info!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
alive = now_alive,
|
||||
required,
|
||||
endpoint_count = endpoints.len(),
|
||||
backoff_ms = next_ms,
|
||||
"DC writer floor is below required level during startup, scheduled reconnect"
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Some(v) = inflight.get_mut(&key) {
|
||||
*v = v.saturating_sub(1);
|
||||
}
|
||||
}
|
||||
|
||||
family_degraded
|
||||
}
|
||||
|
||||
fn health_reconnect_budget(pool: &Arc<MePool>, dc_groups: usize) -> usize {
|
||||
let cpu_cores = pool.adaptive_floor_effective_cpu_cores().max(1);
|
||||
let by_cpu = cpu_cores.saturating_mul(HEALTH_RECONNECT_BUDGET_PER_CORE);
|
||||
let by_dc = dc_groups.saturating_mul(HEALTH_RECONNECT_BUDGET_PER_DC);
|
||||
by_cpu
|
||||
.saturating_add(by_dc)
|
||||
.clamp(HEALTH_RECONNECT_BUDGET_MIN, HEALTH_RECONNECT_BUDGET_MAX)
|
||||
}
|
||||
|
||||
fn should_emit_rate_limited_warn(
|
||||
next_allowed: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
key: (i32, IpFamily),
|
||||
now: Instant,
|
||||
cooldown: Duration,
|
||||
) -> bool {
|
||||
let Some(ready_at) = next_allowed.get(&key).copied() else {
|
||||
next_allowed.insert(key, now + cooldown);
|
||||
return true;
|
||||
};
|
||||
if now >= ready_at {
|
||||
next_allowed.insert(key, now + cooldown);
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn adaptive_floor_class_min(
|
||||
pool: &Arc<MePool>,
|
||||
endpoint_count: usize,
|
||||
base_required: usize,
|
||||
) -> usize {
|
||||
if endpoint_count <= 1 {
|
||||
let min_single = (pool
|
||||
.me_adaptive_floor_min_writers_single_endpoint
|
||||
.load(std::sync::atomic::Ordering::Relaxed) as usize)
|
||||
.max(1);
|
||||
min_single.min(base_required.max(1))
|
||||
} else {
|
||||
pool.adaptive_floor_min_writers_multi_endpoint()
|
||||
.min(base_required.max(1))
|
||||
}
|
||||
}
|
||||
|
||||
fn adaptive_floor_class_max(
|
||||
pool: &Arc<MePool>,
|
||||
endpoint_count: usize,
|
||||
base_required: usize,
|
||||
cpu_cores: usize,
|
||||
) -> usize {
|
||||
let extra_per_core = if endpoint_count <= 1 {
|
||||
pool.adaptive_floor_max_extra_single_per_core()
|
||||
} else {
|
||||
pool.adaptive_floor_max_extra_multi_per_core()
|
||||
};
|
||||
base_required.saturating_add(cpu_cores.saturating_mul(extra_per_core))
|
||||
}
|
||||
|
||||
fn list_writer_ids_for_endpoints(
|
||||
dc: i32,
|
||||
endpoints: &[SocketAddr],
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
) -> Vec<u64> {
|
||||
let mut out = Vec::<u64>::new();
|
||||
for endpoint in endpoints {
|
||||
if let Some(ids) = live_writer_ids_by_addr.get(&(dc, *endpoint)) {
|
||||
out.extend(ids.iter().copied());
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
async fn build_family_floor_plan(
|
||||
pool: &Arc<MePool>,
|
||||
family: IpFamily,
|
||||
dc_endpoints: &HashMap<i32, Vec<SocketAddr>>,
|
||||
live_addr_counts: &HashMap<(i32, SocketAddr), usize>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) -> FamilyFloorPlan {
|
||||
let mut entries = Vec::<DcFloorPlanEntry>::new();
|
||||
let mut by_dc = HashMap::<i32, DcFloorPlanEntry>::new();
|
||||
let mut family_active_total = 0usize;
|
||||
|
||||
let floor_mode = pool.floor_mode();
|
||||
let is_adaptive = floor_mode == MeFloorMode::Adaptive;
|
||||
let cpu_cores = pool.adaptive_floor_effective_cpu_cores().max(1);
|
||||
let (active_writers_current, warm_writers_current, _) =
|
||||
pool.non_draining_writer_counts_by_contour().await;
|
||||
|
||||
for (dc, endpoints) in dc_endpoints {
|
||||
if endpoints.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let key = (*dc, family);
|
||||
let reduce_for_idle = should_reduce_floor_for_idle(
|
||||
pool,
|
||||
key,
|
||||
*dc,
|
||||
endpoints,
|
||||
live_writer_ids_by_addr,
|
||||
bound_clients_by_writer,
|
||||
adaptive_idle_since,
|
||||
adaptive_recover_until,
|
||||
)
|
||||
.await;
|
||||
let base_required = pool.required_writers_for_dc(endpoints.len()).max(1);
|
||||
let min_required = if is_adaptive {
|
||||
adaptive_floor_class_min(pool, endpoints.len(), base_required)
|
||||
} else {
|
||||
base_required
|
||||
};
|
||||
let mut max_required = if is_adaptive {
|
||||
adaptive_floor_class_max(pool, endpoints.len(), base_required, cpu_cores)
|
||||
} else {
|
||||
base_required
|
||||
};
|
||||
if max_required < min_required {
|
||||
max_required = min_required;
|
||||
}
|
||||
let desired_raw = if is_adaptive && reduce_for_idle {
|
||||
min_required
|
||||
} else {
|
||||
base_required
|
||||
};
|
||||
let target_required = desired_raw.clamp(min_required, max_required);
|
||||
let alive = endpoints
|
||||
.iter()
|
||||
.map(|endpoint| live_addr_counts.get(&(*dc, *endpoint)).copied().unwrap_or(0))
|
||||
.sum::<usize>();
|
||||
family_active_total = family_active_total.saturating_add(alive);
|
||||
let writer_ids = list_writer_ids_for_endpoints(*dc, endpoints, live_writer_ids_by_addr);
|
||||
let has_bound_clients = has_bound_clients_on_endpoint(&writer_ids, bound_clients_by_writer);
|
||||
|
||||
entries.push(DcFloorPlanEntry {
|
||||
dc: *dc,
|
||||
endpoints: endpoints.clone(),
|
||||
alive,
|
||||
min_required,
|
||||
target_required,
|
||||
max_required,
|
||||
has_bound_clients,
|
||||
floor_capped: false,
|
||||
});
|
||||
}
|
||||
|
||||
if entries.is_empty() {
|
||||
let active_cap_configured_total = pool.adaptive_floor_active_cap_configured_total();
|
||||
let warm_cap_configured_total = pool.adaptive_floor_warm_cap_configured_total();
|
||||
return FamilyFloorPlan {
|
||||
by_dc,
|
||||
active_cap_configured_total,
|
||||
active_cap_effective_total: active_cap_configured_total,
|
||||
warm_cap_configured_total,
|
||||
warm_cap_effective_total: warm_cap_configured_total,
|
||||
active_writers_current,
|
||||
warm_writers_current,
|
||||
target_writers_total: 0,
|
||||
};
|
||||
}
|
||||
|
||||
if !is_adaptive {
|
||||
let target_total = entries
|
||||
.iter()
|
||||
.map(|entry| entry.target_required)
|
||||
.sum::<usize>();
|
||||
let active_cap_configured_total = pool.adaptive_floor_active_cap_configured_total();
|
||||
let warm_cap_configured_total = pool.adaptive_floor_warm_cap_configured_total();
|
||||
for entry in entries {
|
||||
by_dc.insert(entry.dc, entry);
|
||||
}
|
||||
return FamilyFloorPlan {
|
||||
by_dc,
|
||||
active_cap_configured_total,
|
||||
active_cap_effective_total: active_cap_configured_total.max(target_total),
|
||||
warm_cap_configured_total,
|
||||
warm_cap_effective_total: warm_cap_configured_total,
|
||||
active_writers_current,
|
||||
warm_writers_current,
|
||||
target_writers_total: target_total,
|
||||
};
|
||||
}
|
||||
|
||||
let active_cap_configured_total = pool.adaptive_floor_active_cap_configured_total();
|
||||
let warm_cap_configured_total = pool.adaptive_floor_warm_cap_configured_total();
|
||||
let other_active = active_writers_current.saturating_sub(family_active_total);
|
||||
let min_sum = entries
|
||||
.iter()
|
||||
.map(|entry| entry.min_required)
|
||||
.sum::<usize>();
|
||||
let mut target_sum = entries
|
||||
.iter()
|
||||
.map(|entry| entry.target_required)
|
||||
.sum::<usize>();
|
||||
let family_cap = active_cap_configured_total
|
||||
.saturating_sub(other_active)
|
||||
.max(min_sum);
|
||||
if target_sum > family_cap {
|
||||
entries.sort_by_key(|entry| {
|
||||
(
|
||||
entry.has_bound_clients,
|
||||
std::cmp::Reverse(entry.target_required.saturating_sub(entry.min_required)),
|
||||
std::cmp::Reverse(entry.alive),
|
||||
entry.dc.abs(),
|
||||
entry.dc,
|
||||
entry.endpoints.len(),
|
||||
entry.max_required,
|
||||
)
|
||||
});
|
||||
let mut changed = true;
|
||||
while target_sum > family_cap && changed {
|
||||
changed = false;
|
||||
for entry in &mut entries {
|
||||
if target_sum <= family_cap {
|
||||
break;
|
||||
}
|
||||
if entry.target_required > entry.min_required {
|
||||
entry.target_required -= 1;
|
||||
entry.floor_capped = true;
|
||||
target_sum -= 1;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for entry in entries {
|
||||
by_dc.insert(entry.dc, entry);
|
||||
}
|
||||
let active_cap_effective_total =
|
||||
active_cap_configured_total.max(other_active.saturating_add(min_sum));
|
||||
let target_writers_total = other_active.saturating_add(target_sum);
|
||||
FamilyFloorPlan {
|
||||
by_dc,
|
||||
active_cap_configured_total,
|
||||
active_cap_effective_total,
|
||||
warm_cap_configured_total,
|
||||
warm_cap_effective_total: warm_cap_configured_total,
|
||||
active_writers_current,
|
||||
warm_writers_current,
|
||||
target_writers_total,
|
||||
}
|
||||
}
|
||||
|
||||
async fn maybe_swap_idle_writer_for_cap(
|
||||
pool: &Arc<MePool>,
|
||||
rng: &Arc<SecureRandom>,
|
||||
dc: i32,
|
||||
family: IpFamily,
|
||||
endpoints: &[SocketAddr],
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
writer_idle_since: &HashMap<u64, u64>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
) -> bool {
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let mut candidate: Option<(u64, SocketAddr, u64)> = None;
|
||||
for endpoint in endpoints {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&(dc, *endpoint)) else {
|
||||
continue;
|
||||
};
|
||||
for writer_id in writer_ids {
|
||||
if bound_clients_by_writer.get(writer_id).copied().unwrap_or(0) > 0 {
|
||||
continue;
|
||||
}
|
||||
let Some(idle_since_epoch_secs) = writer_idle_since.get(writer_id).copied() else {
|
||||
continue;
|
||||
};
|
||||
let idle_age_secs = now_epoch_secs.saturating_sub(idle_since_epoch_secs);
|
||||
if candidate
|
||||
.as_ref()
|
||||
.map(|(_, _, age)| idle_age_secs > *age)
|
||||
.unwrap_or(true)
|
||||
{
|
||||
candidate = Some((*writer_id, *endpoint, idle_age_secs));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let Some((old_writer_id, endpoint, idle_age_secs)) = candidate else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let connected = match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(error)) => {
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
%endpoint,
|
||||
old_writer_id,
|
||||
idle_age_secs,
|
||||
%error,
|
||||
"Adaptive floor cap swap connect failed"
|
||||
);
|
||||
false
|
||||
}
|
||||
Err(_) => {
|
||||
debug!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
%endpoint,
|
||||
old_writer_id,
|
||||
idle_age_secs,
|
||||
"Adaptive floor cap swap connect timed out"
|
||||
);
|
||||
false
|
||||
}
|
||||
};
|
||||
if !connected {
|
||||
return false;
|
||||
}
|
||||
|
||||
pool.mark_writer_draining_with_timeout(old_writer_id, pool.force_close_timeout(), false)
|
||||
.await;
|
||||
info!(
|
||||
dc = %dc,
|
||||
?family,
|
||||
%endpoint,
|
||||
old_writer_id,
|
||||
idle_age_secs,
|
||||
"Adaptive floor cap swap: idle writer rotated"
|
||||
);
|
||||
true
|
||||
}
|
||||
|
||||
async fn maybe_refresh_idle_writer_for_dc(
|
||||
@@ -320,8 +829,9 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
endpoints: &[SocketAddr],
|
||||
alive: usize,
|
||||
required: usize,
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
writer_idle_since: &HashMap<u64, u64>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
idle_refresh_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) {
|
||||
if alive < required {
|
||||
@@ -338,10 +848,13 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
let now_epoch_secs = MePool::now_epoch_secs();
|
||||
let mut candidate: Option<(u64, SocketAddr, u64, u64)> = None;
|
||||
for endpoint in endpoints {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(endpoint) else {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&(dc, *endpoint)) else {
|
||||
continue;
|
||||
};
|
||||
for writer_id in writer_ids {
|
||||
if bound_clients_by_writer.get(writer_id).copied().unwrap_or(0) > 0 {
|
||||
continue;
|
||||
}
|
||||
let Some(idle_since_epoch_secs) = writer_idle_since.get(writer_id).copied() else {
|
||||
continue;
|
||||
};
|
||||
@@ -365,7 +878,12 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
return;
|
||||
};
|
||||
|
||||
let rotate_ok = match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
|
||||
let rotate_ok = match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(error)) => {
|
||||
debug!(
|
||||
@@ -421,24 +939,22 @@ async fn maybe_refresh_idle_writer_for_dc(
|
||||
async fn should_reduce_floor_for_idle(
|
||||
pool: &Arc<MePool>,
|
||||
key: (i32, IpFamily),
|
||||
dc: i32,
|
||||
endpoints: &[SocketAddr],
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
adaptive_idle_since: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
adaptive_recover_until: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) -> bool {
|
||||
if endpoints.len() != 1 || pool.floor_mode() != MeFloorMode::Adaptive {
|
||||
if pool.floor_mode() != MeFloorMode::Adaptive {
|
||||
adaptive_idle_since.remove(&key);
|
||||
adaptive_recover_until.remove(&key);
|
||||
return false;
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
let endpoint = endpoints[0];
|
||||
let writer_ids = live_writer_ids_by_addr
|
||||
.get(&endpoint)
|
||||
.map(Vec::as_slice)
|
||||
.unwrap_or(&[]);
|
||||
let has_bound_clients = has_bound_clients_on_endpoint(pool, writer_ids).await;
|
||||
let writer_ids = list_writer_ids_for_endpoints(dc, endpoints, live_writer_ids_by_addr);
|
||||
let has_bound_clients = has_bound_clients_on_endpoint(&writer_ids, bound_clients_by_writer);
|
||||
if has_bound_clients {
|
||||
adaptive_idle_since.remove(&key);
|
||||
adaptive_recover_until.insert(key, now + pool.adaptive_floor_recover_grace_duration());
|
||||
@@ -457,13 +973,13 @@ async fn should_reduce_floor_for_idle(
|
||||
now.saturating_duration_since(*idle_since) >= pool.adaptive_floor_idle_duration()
|
||||
}
|
||||
|
||||
async fn has_bound_clients_on_endpoint(pool: &Arc<MePool>, writer_ids: &[u64]) -> bool {
|
||||
for writer_id in writer_ids {
|
||||
if !pool.registry.is_writer_empty(*writer_id).await {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
fn has_bound_clients_on_endpoint(
|
||||
writer_ids: &[u64],
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
) -> bool {
|
||||
writer_ids
|
||||
.iter()
|
||||
.any(|writer_id| bound_clients_by_writer.get(writer_id).copied().unwrap_or(0) > 0)
|
||||
}
|
||||
|
||||
async fn recover_single_endpoint_outage(
|
||||
@@ -474,6 +990,7 @@ async fn recover_single_endpoint_outage(
|
||||
required: usize,
|
||||
outage_backoff: &mut HashMap<(i32, IpFamily), u64>,
|
||||
outage_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
reconnect_budget: &mut usize,
|
||||
) {
|
||||
let now = Instant::now();
|
||||
if let Some(ts) = outage_next_attempt.get(&key)
|
||||
@@ -483,6 +1000,18 @@ async fn recover_single_endpoint_outage(
|
||||
}
|
||||
|
||||
let (min_backoff_ms, max_backoff_ms) = pool.single_endpoint_outage_backoff_bounds_ms();
|
||||
if *reconnect_budget == 0 {
|
||||
outage_next_attempt.insert(key, now + Duration::from_millis(min_backoff_ms.max(250)));
|
||||
debug!(
|
||||
dc = %key.0,
|
||||
family = ?key.1,
|
||||
%endpoint,
|
||||
required,
|
||||
"Single-endpoint outage reconnect deferred by health reconnect budget"
|
||||
);
|
||||
return;
|
||||
}
|
||||
*reconnect_budget = (*reconnect_budget).saturating_sub(1);
|
||||
pool.stats
|
||||
.increment_me_single_endpoint_outage_reconnect_attempt_total();
|
||||
|
||||
@@ -490,7 +1019,12 @@ async fn recover_single_endpoint_outage(
|
||||
let attempt_ok = if bypass_quarantine {
|
||||
pool.stats
|
||||
.increment_me_single_endpoint_quarantine_bypass_total();
|
||||
match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
|
||||
match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_one_for_dc(endpoint, key.0, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(e)) => {
|
||||
debug!(
|
||||
@@ -516,7 +1050,7 @@ async fn recover_single_endpoint_outage(
|
||||
let one_endpoint = [endpoint];
|
||||
match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_endpoints_round_robin(&one_endpoint, rng.as_ref()),
|
||||
pool.connect_endpoints_round_robin(key.0, &one_endpoint, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -580,7 +1114,8 @@ async fn maybe_rotate_single_endpoint_shadow(
|
||||
endpoints: &[SocketAddr],
|
||||
alive: usize,
|
||||
required: usize,
|
||||
live_writer_ids_by_addr: &HashMap<SocketAddr, Vec<u64>>,
|
||||
live_writer_ids_by_addr: &HashMap<(i32, SocketAddr), Vec<u64>>,
|
||||
bound_clients_by_writer: &HashMap<u64, usize>,
|
||||
shadow_rotate_deadline: &mut HashMap<(i32, IpFamily), Instant>,
|
||||
) {
|
||||
if endpoints.len() != 1 || alive < required {
|
||||
@@ -612,14 +1147,14 @@ async fn maybe_rotate_single_endpoint_shadow(
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&endpoint) else {
|
||||
let Some(writer_ids) = live_writer_ids_by_addr.get(&(dc, endpoint)) else {
|
||||
shadow_rotate_deadline.insert(key, now + Duration::from_secs(SHADOW_ROTATE_RETRY_SECS));
|
||||
return;
|
||||
};
|
||||
|
||||
let mut candidate_writer_id = None;
|
||||
for writer_id in writer_ids {
|
||||
if pool.registry.is_writer_empty(*writer_id).await {
|
||||
if bound_clients_by_writer.get(writer_id).copied().unwrap_or(0) == 0 {
|
||||
candidate_writer_id = Some(*writer_id);
|
||||
break;
|
||||
}
|
||||
@@ -638,7 +1173,12 @@ async fn maybe_rotate_single_endpoint_shadow(
|
||||
return;
|
||||
};
|
||||
|
||||
let rotate_ok = match tokio::time::timeout(pool.me_one_timeout, pool.connect_one(endpoint, rng.as_ref())).await {
|
||||
let rotate_ok = match tokio::time::timeout(
|
||||
pool.me_one_timeout,
|
||||
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => true,
|
||||
Ok(Err(e)) => {
|
||||
debug!(
|
||||
|
||||
@@ -10,6 +10,7 @@ mod pool_init;
|
||||
mod pool_nat;
|
||||
mod pool_refill;
|
||||
mod pool_reinit;
|
||||
mod pool_runtime_api;
|
||||
mod pool_writer;
|
||||
mod ping;
|
||||
mod reader;
|
||||
@@ -17,7 +18,9 @@ mod registry;
|
||||
mod rotation;
|
||||
mod send;
|
||||
mod secret;
|
||||
mod selftest;
|
||||
mod wire;
|
||||
mod pool_status;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
@@ -29,8 +32,15 @@ pub use pool::MePool;
|
||||
pub use pool_nat::{stun_probe, detect_public_ip};
|
||||
pub use registry::ConnRegistry;
|
||||
pub use secret::fetch_proxy_secret;
|
||||
pub use config_updater::{fetch_proxy_config, me_config_updater};
|
||||
#[allow(unused_imports)]
|
||||
pub use config_updater::{
|
||||
ProxyConfigData, fetch_proxy_config, fetch_proxy_config_with_raw, load_proxy_config_cache,
|
||||
me_config_updater, save_proxy_config_cache,
|
||||
};
|
||||
pub use rotation::{MeReinitTrigger, me_reinit_scheduler, me_rotation_task};
|
||||
pub(crate) use selftest::{
|
||||
bnd_snapshot, timeskew_snapshot, upstream_bnd_snapshots,
|
||||
};
|
||||
pub use wire::proto_flags_for_tag;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -331,7 +331,7 @@ pub async fn run_me_ping(pool: &Arc<MePool>, rng: &SecureRandom) -> Vec<MePingRe
|
||||
let mut error = None;
|
||||
let mut route = None;
|
||||
|
||||
match pool.connect_tcp(addr).await {
|
||||
match pool.connect_tcp(addr, None).await {
|
||||
Ok((stream, conn_rtt, upstream_egress)) => {
|
||||
connect_ms = Some(conn_rtt);
|
||||
route = route_from_egress(upstream_egress);
|
||||
|
||||
@@ -7,7 +7,9 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
use tokio::sync::{Mutex, Notify, RwLock, mpsc};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::config::{MeBindStaleMode, MeFloorMode, MeSocksKdfPolicy};
|
||||
use crate::config::{
|
||||
MeBindStaleMode, MeFloorMode, MeRouteNoWriterMode, MeSocksKdfPolicy, MeWriterPickMode,
|
||||
};
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::IpFamily;
|
||||
use crate::network::probe::NetworkDecision;
|
||||
@@ -22,18 +24,27 @@ pub(super) struct RefillDcKey {
|
||||
pub family: IpFamily,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub(super) struct RefillEndpointKey {
|
||||
pub dc: i32,
|
||||
pub addr: SocketAddr,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MeWriter {
|
||||
pub id: u64,
|
||||
pub addr: SocketAddr,
|
||||
pub writer_dc: i32,
|
||||
pub generation: u64,
|
||||
pub contour: Arc<AtomicU8>,
|
||||
pub created_at: Instant,
|
||||
pub tx: mpsc::Sender<WriterCommand>,
|
||||
pub cancel: CancellationToken,
|
||||
pub degraded: Arc<AtomicBool>,
|
||||
pub rtt_ema_ms_x10: Arc<AtomicU32>,
|
||||
pub draining: Arc<AtomicBool>,
|
||||
pub draining_started_at_epoch_secs: Arc<AtomicU64>,
|
||||
pub drain_deadline_epoch_secs: Arc<AtomicU64>,
|
||||
pub allow_drain_fallback: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
@@ -95,6 +106,7 @@ pub struct MePool {
|
||||
pub(super) me_keepalive_jitter: Duration,
|
||||
pub(super) me_keepalive_payload_random: bool,
|
||||
pub(super) rpc_proxy_req_every_secs: AtomicU64,
|
||||
pub(super) writer_cmd_channel_capacity: usize,
|
||||
pub(super) me_warmup_stagger_enabled: bool,
|
||||
pub(super) me_warmup_step_delay: Duration,
|
||||
pub(super) me_warmup_step_jitter: Duration,
|
||||
@@ -111,16 +123,40 @@ pub struct MePool {
|
||||
pub(super) me_floor_mode: AtomicU8,
|
||||
pub(super) me_adaptive_floor_idle_secs: AtomicU64,
|
||||
pub(super) me_adaptive_floor_min_writers_single_endpoint: AtomicU8,
|
||||
pub(super) me_adaptive_floor_min_writers_multi_endpoint: AtomicU8,
|
||||
pub(super) me_adaptive_floor_recover_grace_secs: AtomicU64,
|
||||
pub(super) me_adaptive_floor_writers_per_core_total: AtomicU32,
|
||||
pub(super) me_adaptive_floor_cpu_cores_override: AtomicU32,
|
||||
pub(super) me_adaptive_floor_max_extra_writers_single_per_core: AtomicU32,
|
||||
pub(super) me_adaptive_floor_max_extra_writers_multi_per_core: AtomicU32,
|
||||
pub(super) me_adaptive_floor_max_active_writers_per_core: AtomicU32,
|
||||
pub(super) me_adaptive_floor_max_warm_writers_per_core: AtomicU32,
|
||||
pub(super) me_adaptive_floor_max_active_writers_global: AtomicU32,
|
||||
pub(super) me_adaptive_floor_max_warm_writers_global: AtomicU32,
|
||||
pub(super) me_adaptive_floor_cpu_cores_detected: AtomicU32,
|
||||
pub(super) me_adaptive_floor_cpu_cores_effective: AtomicU32,
|
||||
pub(super) me_adaptive_floor_global_cap_raw: AtomicU64,
|
||||
pub(super) me_adaptive_floor_global_cap_effective: AtomicU64,
|
||||
pub(super) me_adaptive_floor_target_writers_total: AtomicU64,
|
||||
pub(super) me_adaptive_floor_active_cap_configured: AtomicU64,
|
||||
pub(super) me_adaptive_floor_active_cap_effective: AtomicU64,
|
||||
pub(super) me_adaptive_floor_warm_cap_configured: AtomicU64,
|
||||
pub(super) me_adaptive_floor_warm_cap_effective: AtomicU64,
|
||||
pub(super) me_adaptive_floor_active_writers_current: AtomicU64,
|
||||
pub(super) me_adaptive_floor_warm_writers_current: AtomicU64,
|
||||
pub(super) proxy_map_v4: Arc<RwLock<HashMap<i32, Vec<(IpAddr, u16)>>>>,
|
||||
pub(super) proxy_map_v6: Arc<RwLock<HashMap<i32, Vec<(IpAddr, u16)>>>>,
|
||||
pub(super) endpoint_dc_map: Arc<RwLock<HashMap<SocketAddr, Option<i32>>>>,
|
||||
pub(super) default_dc: AtomicI32,
|
||||
pub(super) next_writer_id: AtomicU64,
|
||||
pub(super) ping_tracker: Arc<Mutex<HashMap<i64, (std::time::Instant, u64)>>>,
|
||||
pub(super) ping_tracker_last_cleanup_epoch_ms: AtomicU64,
|
||||
pub(super) rtt_stats: Arc<Mutex<HashMap<u64, (f64, f64)>>>,
|
||||
pub(super) nat_reflection_cache: Arc<Mutex<NatReflectionCache>>,
|
||||
pub(super) nat_reflection_singleflight_v4: Arc<Mutex<()>>,
|
||||
pub(super) nat_reflection_singleflight_v6: Arc<Mutex<()>>,
|
||||
pub(super) writer_available: Arc<Notify>,
|
||||
pub(super) refill_inflight: Arc<Mutex<HashSet<SocketAddr>>>,
|
||||
pub(super) refill_inflight: Arc<Mutex<HashSet<RefillEndpointKey>>>,
|
||||
pub(super) refill_inflight_dc: Arc<Mutex<HashSet<RefillDcKey>>>,
|
||||
pub(super) conn_count: AtomicUsize,
|
||||
pub(super) stats: Arc<crate::stats::Stats>,
|
||||
@@ -132,7 +168,7 @@ pub struct MePool {
|
||||
pub(super) pending_hardswap_map_hash: AtomicU64,
|
||||
pub(super) hardswap: AtomicBool,
|
||||
pub(super) endpoint_quarantine: Arc<Mutex<HashMap<SocketAddr, Instant>>>,
|
||||
pub(super) kdf_material_fingerprint: Arc<Mutex<HashMap<SocketAddr, (u64, u16)>>>,
|
||||
pub(super) kdf_material_fingerprint: Arc<RwLock<HashMap<SocketAddr, (u64, u16)>>>,
|
||||
pub(super) me_pool_drain_ttl_secs: AtomicU64,
|
||||
pub(super) me_pool_force_close_secs: AtomicU64,
|
||||
pub(super) me_pool_min_fresh_ratio_permille: AtomicU32,
|
||||
@@ -144,8 +180,20 @@ pub struct MePool {
|
||||
pub(super) me_bind_stale_ttl_secs: AtomicU64,
|
||||
pub(super) secret_atomic_snapshot: AtomicBool,
|
||||
pub(super) me_deterministic_writer_sort: AtomicBool,
|
||||
pub(super) me_writer_pick_mode: AtomicU8,
|
||||
pub(super) me_writer_pick_sample_size: AtomicU8,
|
||||
pub(super) me_socks_kdf_policy: AtomicU8,
|
||||
pub(super) me_reader_route_data_wait_ms: Arc<AtomicU64>,
|
||||
pub(super) me_route_no_writer_mode: AtomicU8,
|
||||
pub(super) me_route_no_writer_wait: Duration,
|
||||
pub(super) me_route_inline_recovery_attempts: u32,
|
||||
pub(super) me_route_inline_recovery_wait: Duration,
|
||||
pub(super) me_health_interval_ms_unhealthy: AtomicU64,
|
||||
pub(super) me_health_interval_ms_healthy: AtomicU64,
|
||||
pub(super) me_warn_rate_limit_ms: AtomicU64,
|
||||
pub(super) runtime_ready: AtomicBool,
|
||||
pool_size: usize,
|
||||
pub(super) preferred_endpoints_by_dc: Arc<RwLock<HashMap<i32, Vec<SocketAddr>>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@@ -210,7 +258,16 @@ impl MePool {
|
||||
me_floor_mode: MeFloorMode,
|
||||
me_adaptive_floor_idle_secs: u64,
|
||||
me_adaptive_floor_min_writers_single_endpoint: u8,
|
||||
me_adaptive_floor_min_writers_multi_endpoint: u8,
|
||||
me_adaptive_floor_recover_grace_secs: u64,
|
||||
me_adaptive_floor_writers_per_core_total: u16,
|
||||
me_adaptive_floor_cpu_cores_override: u16,
|
||||
me_adaptive_floor_max_extra_writers_single_per_core: u16,
|
||||
me_adaptive_floor_max_extra_writers_multi_per_core: u16,
|
||||
me_adaptive_floor_max_active_writers_per_core: u16,
|
||||
me_adaptive_floor_max_warm_writers_per_core: u16,
|
||||
me_adaptive_floor_max_active_writers_global: u32,
|
||||
me_adaptive_floor_max_warm_writers_global: u32,
|
||||
hardswap: bool,
|
||||
me_pool_drain_ttl_secs: u64,
|
||||
me_pool_force_close_secs: u64,
|
||||
@@ -223,12 +280,29 @@ impl MePool {
|
||||
me_bind_stale_ttl_secs: u64,
|
||||
me_secret_atomic_snapshot: bool,
|
||||
me_deterministic_writer_sort: bool,
|
||||
me_writer_pick_mode: MeWriterPickMode,
|
||||
me_writer_pick_sample_size: u8,
|
||||
me_socks_kdf_policy: MeSocksKdfPolicy,
|
||||
me_writer_cmd_channel_capacity: usize,
|
||||
me_route_channel_capacity: usize,
|
||||
me_route_backpressure_base_timeout_ms: u64,
|
||||
me_route_backpressure_high_timeout_ms: u64,
|
||||
me_route_backpressure_high_watermark_pct: u8,
|
||||
me_reader_route_data_wait_ms: u64,
|
||||
me_health_interval_ms_unhealthy: u64,
|
||||
me_health_interval_ms_healthy: u64,
|
||||
me_warn_rate_limit_ms: u64,
|
||||
me_route_no_writer_mode: MeRouteNoWriterMode,
|
||||
me_route_no_writer_wait_ms: u64,
|
||||
me_route_inline_recovery_attempts: u32,
|
||||
me_route_inline_recovery_wait_ms: u64,
|
||||
) -> Arc<Self> {
|
||||
let registry = Arc::new(ConnRegistry::new());
|
||||
let endpoint_dc_map = Self::build_endpoint_dc_map_from_maps(&proxy_map_v4, &proxy_map_v6);
|
||||
let preferred_endpoints_by_dc =
|
||||
Self::build_preferred_endpoints_by_dc(&decision, &proxy_map_v4, &proxy_map_v6);
|
||||
let registry = Arc::new(ConnRegistry::with_route_channel_capacity(
|
||||
me_route_channel_capacity,
|
||||
));
|
||||
registry.update_route_backpressure_policy(
|
||||
me_route_backpressure_base_timeout_ms,
|
||||
me_route_backpressure_high_timeout_ms,
|
||||
@@ -275,6 +349,7 @@ impl MePool {
|
||||
me_keepalive_jitter: Duration::from_secs(me_keepalive_jitter_secs),
|
||||
me_keepalive_payload_random,
|
||||
rpc_proxy_req_every_secs: AtomicU64::new(rpc_proxy_req_every_secs),
|
||||
writer_cmd_channel_capacity: me_writer_cmd_channel_capacity.max(1),
|
||||
me_warmup_stagger_enabled,
|
||||
me_warmup_step_delay: Duration::from_millis(me_warmup_step_delay_ms),
|
||||
me_warmup_step_jitter: Duration::from_millis(me_warmup_step_jitter_ms),
|
||||
@@ -303,17 +378,59 @@ impl MePool {
|
||||
me_adaptive_floor_min_writers_single_endpoint: AtomicU8::new(
|
||||
me_adaptive_floor_min_writers_single_endpoint,
|
||||
),
|
||||
me_adaptive_floor_min_writers_multi_endpoint: AtomicU8::new(
|
||||
me_adaptive_floor_min_writers_multi_endpoint,
|
||||
),
|
||||
me_adaptive_floor_recover_grace_secs: AtomicU64::new(
|
||||
me_adaptive_floor_recover_grace_secs,
|
||||
),
|
||||
me_adaptive_floor_writers_per_core_total: AtomicU32::new(
|
||||
me_adaptive_floor_writers_per_core_total as u32,
|
||||
),
|
||||
me_adaptive_floor_cpu_cores_override: AtomicU32::new(
|
||||
me_adaptive_floor_cpu_cores_override as u32,
|
||||
),
|
||||
me_adaptive_floor_max_extra_writers_single_per_core: AtomicU32::new(
|
||||
me_adaptive_floor_max_extra_writers_single_per_core as u32,
|
||||
),
|
||||
me_adaptive_floor_max_extra_writers_multi_per_core: AtomicU32::new(
|
||||
me_adaptive_floor_max_extra_writers_multi_per_core as u32,
|
||||
),
|
||||
me_adaptive_floor_max_active_writers_per_core: AtomicU32::new(
|
||||
me_adaptive_floor_max_active_writers_per_core as u32,
|
||||
),
|
||||
me_adaptive_floor_max_warm_writers_per_core: AtomicU32::new(
|
||||
me_adaptive_floor_max_warm_writers_per_core as u32,
|
||||
),
|
||||
me_adaptive_floor_max_active_writers_global: AtomicU32::new(
|
||||
me_adaptive_floor_max_active_writers_global,
|
||||
),
|
||||
me_adaptive_floor_max_warm_writers_global: AtomicU32::new(
|
||||
me_adaptive_floor_max_warm_writers_global,
|
||||
),
|
||||
me_adaptive_floor_cpu_cores_detected: AtomicU32::new(1),
|
||||
me_adaptive_floor_cpu_cores_effective: AtomicU32::new(1),
|
||||
me_adaptive_floor_global_cap_raw: AtomicU64::new(0),
|
||||
me_adaptive_floor_global_cap_effective: AtomicU64::new(0),
|
||||
me_adaptive_floor_target_writers_total: AtomicU64::new(0),
|
||||
me_adaptive_floor_active_cap_configured: AtomicU64::new(0),
|
||||
me_adaptive_floor_active_cap_effective: AtomicU64::new(0),
|
||||
me_adaptive_floor_warm_cap_configured: AtomicU64::new(0),
|
||||
me_adaptive_floor_warm_cap_effective: AtomicU64::new(0),
|
||||
me_adaptive_floor_active_writers_current: AtomicU64::new(0),
|
||||
me_adaptive_floor_warm_writers_current: AtomicU64::new(0),
|
||||
pool_size: 2,
|
||||
proxy_map_v4: Arc::new(RwLock::new(proxy_map_v4)),
|
||||
proxy_map_v6: Arc::new(RwLock::new(proxy_map_v6)),
|
||||
default_dc: AtomicI32::new(default_dc.unwrap_or(0)),
|
||||
endpoint_dc_map: Arc::new(RwLock::new(endpoint_dc_map)),
|
||||
default_dc: AtomicI32::new(default_dc.unwrap_or(2)),
|
||||
next_writer_id: AtomicU64::new(1),
|
||||
ping_tracker: Arc::new(Mutex::new(HashMap::new())),
|
||||
ping_tracker_last_cleanup_epoch_ms: AtomicU64::new(0),
|
||||
rtt_stats: Arc::new(Mutex::new(HashMap::new())),
|
||||
nat_reflection_cache: Arc::new(Mutex::new(NatReflectionCache::default())),
|
||||
nat_reflection_singleflight_v4: Arc::new(Mutex::new(())),
|
||||
nat_reflection_singleflight_v6: Arc::new(Mutex::new(())),
|
||||
writer_available: Arc::new(Notify::new()),
|
||||
refill_inflight: Arc::new(Mutex::new(HashSet::new())),
|
||||
refill_inflight_dc: Arc::new(Mutex::new(HashSet::new())),
|
||||
@@ -326,7 +443,7 @@ impl MePool {
|
||||
pending_hardswap_map_hash: AtomicU64::new(0),
|
||||
hardswap: AtomicBool::new(hardswap),
|
||||
endpoint_quarantine: Arc::new(Mutex::new(HashMap::new())),
|
||||
kdf_material_fingerprint: Arc::new(Mutex::new(HashMap::new())),
|
||||
kdf_material_fingerprint: Arc::new(RwLock::new(HashMap::new())),
|
||||
me_pool_drain_ttl_secs: AtomicU64::new(me_pool_drain_ttl_secs),
|
||||
me_pool_force_close_secs: AtomicU64::new(me_pool_force_close_secs),
|
||||
me_pool_min_fresh_ratio_permille: AtomicU32::new(Self::ratio_to_permille(
|
||||
@@ -342,7 +459,19 @@ impl MePool {
|
||||
me_bind_stale_ttl_secs: AtomicU64::new(me_bind_stale_ttl_secs),
|
||||
secret_atomic_snapshot: AtomicBool::new(me_secret_atomic_snapshot),
|
||||
me_deterministic_writer_sort: AtomicBool::new(me_deterministic_writer_sort),
|
||||
me_writer_pick_mode: AtomicU8::new(me_writer_pick_mode.as_u8()),
|
||||
me_writer_pick_sample_size: AtomicU8::new(me_writer_pick_sample_size.clamp(2, 4)),
|
||||
me_socks_kdf_policy: AtomicU8::new(me_socks_kdf_policy.as_u8()),
|
||||
me_reader_route_data_wait_ms: Arc::new(AtomicU64::new(me_reader_route_data_wait_ms)),
|
||||
me_route_no_writer_mode: AtomicU8::new(me_route_no_writer_mode.as_u8()),
|
||||
me_route_no_writer_wait: Duration::from_millis(me_route_no_writer_wait_ms),
|
||||
me_route_inline_recovery_attempts,
|
||||
me_route_inline_recovery_wait: Duration::from_millis(me_route_inline_recovery_wait_ms),
|
||||
me_health_interval_ms_unhealthy: AtomicU64::new(me_health_interval_ms_unhealthy.max(1)),
|
||||
me_health_interval_ms_healthy: AtomicU64::new(me_health_interval_ms_healthy.max(1)),
|
||||
me_warn_rate_limit_ms: AtomicU64::new(me_warn_rate_limit_ms.max(1)),
|
||||
runtime_ready: AtomicBool::new(false),
|
||||
preferred_endpoints_by_dc: Arc::new(RwLock::new(preferred_endpoints_by_dc)),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -350,6 +479,14 @@ impl MePool {
|
||||
self.active_generation.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn set_runtime_ready(&self, ready: bool) {
|
||||
self.runtime_ready.store(ready, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn is_runtime_ready(&self) -> bool {
|
||||
self.runtime_ready.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn update_runtime_reinit_policy(
|
||||
&self,
|
||||
hardswap: bool,
|
||||
@@ -364,6 +501,8 @@ impl MePool {
|
||||
bind_stale_ttl_secs: u64,
|
||||
secret_atomic_snapshot: bool,
|
||||
deterministic_writer_sort: bool,
|
||||
writer_pick_mode: MeWriterPickMode,
|
||||
writer_pick_sample_size: u8,
|
||||
single_endpoint_shadow_writers: u8,
|
||||
single_endpoint_outage_mode_enabled: bool,
|
||||
single_endpoint_outage_disable_quarantine: bool,
|
||||
@@ -373,7 +512,19 @@ impl MePool {
|
||||
floor_mode: MeFloorMode,
|
||||
adaptive_floor_idle_secs: u64,
|
||||
adaptive_floor_min_writers_single_endpoint: u8,
|
||||
adaptive_floor_min_writers_multi_endpoint: u8,
|
||||
adaptive_floor_recover_grace_secs: u64,
|
||||
adaptive_floor_writers_per_core_total: u16,
|
||||
adaptive_floor_cpu_cores_override: u16,
|
||||
adaptive_floor_max_extra_writers_single_per_core: u16,
|
||||
adaptive_floor_max_extra_writers_multi_per_core: u16,
|
||||
adaptive_floor_max_active_writers_per_core: u16,
|
||||
adaptive_floor_max_warm_writers_per_core: u16,
|
||||
adaptive_floor_max_active_writers_global: u32,
|
||||
adaptive_floor_max_warm_writers_global: u32,
|
||||
me_health_interval_ms_unhealthy: u64,
|
||||
me_health_interval_ms_healthy: u64,
|
||||
me_warn_rate_limit_ms: u64,
|
||||
) {
|
||||
self.hardswap.store(hardswap, Ordering::Relaxed);
|
||||
self.me_pool_drain_ttl_secs
|
||||
@@ -398,6 +549,14 @@ impl MePool {
|
||||
.store(secret_atomic_snapshot, Ordering::Relaxed);
|
||||
self.me_deterministic_writer_sort
|
||||
.store(deterministic_writer_sort, Ordering::Relaxed);
|
||||
let previous_writer_pick_mode = self.writer_pick_mode();
|
||||
self.me_writer_pick_mode
|
||||
.store(writer_pick_mode.as_u8(), Ordering::Relaxed);
|
||||
self.me_writer_pick_sample_size
|
||||
.store(writer_pick_sample_size.clamp(2, 4), Ordering::Relaxed);
|
||||
if previous_writer_pick_mode != writer_pick_mode {
|
||||
self.stats.increment_me_writer_pick_mode_switch_total();
|
||||
}
|
||||
self.me_single_endpoint_shadow_writers
|
||||
.store(single_endpoint_shadow_writers, Ordering::Relaxed);
|
||||
self.me_single_endpoint_outage_mode_enabled
|
||||
@@ -417,8 +576,44 @@ impl MePool {
|
||||
.store(adaptive_floor_idle_secs, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_min_writers_single_endpoint
|
||||
.store(adaptive_floor_min_writers_single_endpoint, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_min_writers_multi_endpoint
|
||||
.store(adaptive_floor_min_writers_multi_endpoint, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_recover_grace_secs
|
||||
.store(adaptive_floor_recover_grace_secs, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_writers_per_core_total
|
||||
.store(adaptive_floor_writers_per_core_total as u32, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_cpu_cores_override
|
||||
.store(adaptive_floor_cpu_cores_override as u32, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_max_extra_writers_single_per_core
|
||||
.store(
|
||||
adaptive_floor_max_extra_writers_single_per_core as u32,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
self.me_adaptive_floor_max_extra_writers_multi_per_core
|
||||
.store(
|
||||
adaptive_floor_max_extra_writers_multi_per_core as u32,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
self.me_adaptive_floor_max_active_writers_per_core
|
||||
.store(
|
||||
adaptive_floor_max_active_writers_per_core as u32,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
self.me_adaptive_floor_max_warm_writers_per_core
|
||||
.store(
|
||||
adaptive_floor_max_warm_writers_per_core as u32,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
self.me_adaptive_floor_max_active_writers_global
|
||||
.store(adaptive_floor_max_active_writers_global, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_max_warm_writers_global
|
||||
.store(adaptive_floor_max_warm_writers_global, Ordering::Relaxed);
|
||||
self.me_health_interval_ms_unhealthy
|
||||
.store(me_health_interval_ms_unhealthy.max(1), Ordering::Relaxed);
|
||||
self.me_health_interval_ms_healthy
|
||||
.store(me_health_interval_ms_healthy.max(1), Ordering::Relaxed);
|
||||
self.me_warn_rate_limit_ms
|
||||
.store(me_warn_rate_limit_ms.max(1), Ordering::Relaxed);
|
||||
if previous_floor_mode != floor_mode {
|
||||
self.stats.increment_me_floor_mode_switch_total();
|
||||
match (previous_floor_mode, floor_mode) {
|
||||
@@ -458,9 +653,12 @@ impl MePool {
|
||||
route_backpressure_base_timeout_ms: u64,
|
||||
route_backpressure_high_timeout_ms: u64,
|
||||
route_backpressure_high_watermark_pct: u8,
|
||||
reader_route_data_wait_ms: u64,
|
||||
) {
|
||||
self.me_socks_kdf_policy
|
||||
.store(socks_kdf_policy.as_u8(), Ordering::Relaxed);
|
||||
self.me_reader_route_data_wait_ms
|
||||
.store(reader_route_data_wait_ms, Ordering::Relaxed);
|
||||
self.registry.update_route_backpressure_policy(
|
||||
route_backpressure_base_timeout_ms,
|
||||
route_backpressure_high_timeout_ms,
|
||||
@@ -489,6 +687,28 @@ impl MePool {
|
||||
self.proxy_secret.read().await.key_selector
|
||||
}
|
||||
|
||||
pub(super) async fn non_draining_writer_counts_by_contour(&self) -> (usize, usize, usize) {
|
||||
let ws = self.writers.read().await;
|
||||
let mut active = 0usize;
|
||||
let mut warm = 0usize;
|
||||
for writer in ws.iter() {
|
||||
if writer.draining.load(Ordering::Relaxed) {
|
||||
continue;
|
||||
}
|
||||
match WriterContour::from_u8(writer.contour.load(Ordering::Relaxed)) {
|
||||
WriterContour::Active => active = active.saturating_add(1),
|
||||
WriterContour::Warm => warm = warm.saturating_add(1),
|
||||
WriterContour::Draining => {}
|
||||
}
|
||||
}
|
||||
(active, warm, active.saturating_add(warm))
|
||||
}
|
||||
|
||||
pub(super) async fn active_contour_writer_count_total(&self) -> usize {
|
||||
let (active, _, _) = self.non_draining_writer_counts_by_contour().await;
|
||||
active
|
||||
}
|
||||
|
||||
pub(super) async fn secret_snapshot(&self) -> SecretSnapshot {
|
||||
self.proxy_secret.read().await.clone()
|
||||
}
|
||||
@@ -497,6 +717,16 @@ impl MePool {
|
||||
MeBindStaleMode::from_u8(self.me_bind_stale_mode.load(Ordering::Relaxed))
|
||||
}
|
||||
|
||||
pub(super) fn writer_pick_mode(&self) -> MeWriterPickMode {
|
||||
MeWriterPickMode::from_u8(self.me_writer_pick_mode.load(Ordering::Relaxed))
|
||||
}
|
||||
|
||||
pub(super) fn writer_pick_sample_size(&self) -> usize {
|
||||
self.me_writer_pick_sample_size
|
||||
.load(Ordering::Relaxed)
|
||||
.clamp(2, 4) as usize
|
||||
}
|
||||
|
||||
pub(super) fn required_writers_for_dc(&self, endpoint_count: usize) -> usize {
|
||||
if endpoint_count == 0 {
|
||||
return 0;
|
||||
@@ -525,6 +755,224 @@ impl MePool {
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_min_writers_multi_endpoint(&self) -> usize {
|
||||
(self
|
||||
.me_adaptive_floor_min_writers_multi_endpoint
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_max_extra_single_per_core(&self) -> usize {
|
||||
self.me_adaptive_floor_max_extra_writers_single_per_core
|
||||
.load(Ordering::Relaxed) as usize
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_max_extra_multi_per_core(&self) -> usize {
|
||||
self.me_adaptive_floor_max_extra_writers_multi_per_core
|
||||
.load(Ordering::Relaxed) as usize
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_max_active_writers_per_core(&self) -> usize {
|
||||
(self
|
||||
.me_adaptive_floor_max_active_writers_per_core
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_max_warm_writers_per_core(&self) -> usize {
|
||||
(self
|
||||
.me_adaptive_floor_max_warm_writers_per_core
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_max_active_writers_global(&self) -> usize {
|
||||
(self
|
||||
.me_adaptive_floor_max_active_writers_global
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_max_warm_writers_global(&self) -> usize {
|
||||
(self
|
||||
.me_adaptive_floor_max_warm_writers_global
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_detected_cpu_cores(&self) -> usize {
|
||||
std::thread::available_parallelism()
|
||||
.map(|value| value.get())
|
||||
.unwrap_or(1)
|
||||
.max(1)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_effective_cpu_cores(&self) -> usize {
|
||||
let detected = self.adaptive_floor_detected_cpu_cores();
|
||||
let override_cores = self
|
||||
.me_adaptive_floor_cpu_cores_override
|
||||
.load(Ordering::Relaxed) as usize;
|
||||
let effective = if override_cores == 0 {
|
||||
detected
|
||||
} else {
|
||||
override_cores.max(1)
|
||||
};
|
||||
self.me_adaptive_floor_cpu_cores_detected
|
||||
.store(detected as u32, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_cpu_cores_effective
|
||||
.store(effective as u32, Ordering::Relaxed);
|
||||
self.stats
|
||||
.set_me_floor_cpu_cores_detected_gauge(detected as u64);
|
||||
self.stats
|
||||
.set_me_floor_cpu_cores_effective_gauge(effective as u64);
|
||||
effective
|
||||
}
|
||||
|
||||
// Keeps per-contour (active/warm) writer budget bounded by CPU count.
|
||||
// Baseline is 86 writers on the first core and +48 for each extra core.
|
||||
fn adaptive_floor_cpu_budget_per_contour_cap(&self, cores: usize) -> usize {
|
||||
const FIRST_CORE_WRITER_BUDGET: usize = 86;
|
||||
const EXTRA_CORE_WRITER_BUDGET: usize = 48;
|
||||
if cores == 0 {
|
||||
return FIRST_CORE_WRITER_BUDGET;
|
||||
}
|
||||
FIRST_CORE_WRITER_BUDGET.saturating_add(
|
||||
cores
|
||||
.saturating_sub(1)
|
||||
.saturating_mul(EXTRA_CORE_WRITER_BUDGET),
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_active_cap_configured_total(&self) -> usize {
|
||||
let cores = self.adaptive_floor_effective_cpu_cores();
|
||||
let per_contour_budget = self.adaptive_floor_cpu_budget_per_contour_cap(cores);
|
||||
let configured = cores
|
||||
.saturating_mul(self.adaptive_floor_max_active_writers_per_core())
|
||||
.min(self.adaptive_floor_max_active_writers_global())
|
||||
.min(per_contour_budget)
|
||||
.max(1);
|
||||
self.me_adaptive_floor_active_cap_configured
|
||||
.store(configured as u64, Ordering::Relaxed);
|
||||
self.stats
|
||||
.set_me_floor_active_cap_configured_gauge(configured as u64);
|
||||
configured
|
||||
}
|
||||
|
||||
pub(super) fn adaptive_floor_warm_cap_configured_total(&self) -> usize {
|
||||
let cores = self.adaptive_floor_effective_cpu_cores();
|
||||
let per_contour_budget = self.adaptive_floor_cpu_budget_per_contour_cap(cores);
|
||||
let configured = cores
|
||||
.saturating_mul(self.adaptive_floor_max_warm_writers_per_core())
|
||||
.min(self.adaptive_floor_max_warm_writers_global())
|
||||
.min(per_contour_budget)
|
||||
.max(1);
|
||||
self.me_adaptive_floor_warm_cap_configured
|
||||
.store(configured as u64, Ordering::Relaxed);
|
||||
self.stats
|
||||
.set_me_floor_warm_cap_configured_gauge(configured as u64);
|
||||
configured
|
||||
}
|
||||
|
||||
pub(super) fn set_adaptive_floor_runtime_caps(
|
||||
&self,
|
||||
active_cap_configured: usize,
|
||||
active_cap_effective: usize,
|
||||
warm_cap_configured: usize,
|
||||
warm_cap_effective: usize,
|
||||
target_writers_total: usize,
|
||||
active_writers_current: usize,
|
||||
warm_writers_current: usize,
|
||||
) {
|
||||
self.me_adaptive_floor_global_cap_raw
|
||||
.store(active_cap_configured as u64, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_global_cap_effective
|
||||
.store(active_cap_effective as u64, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_target_writers_total
|
||||
.store(target_writers_total as u64, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_active_cap_configured
|
||||
.store(active_cap_configured as u64, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_active_cap_effective
|
||||
.store(active_cap_effective as u64, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_warm_cap_configured
|
||||
.store(warm_cap_configured as u64, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_warm_cap_effective
|
||||
.store(warm_cap_effective as u64, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_active_writers_current
|
||||
.store(active_writers_current as u64, Ordering::Relaxed);
|
||||
self.me_adaptive_floor_warm_writers_current
|
||||
.store(warm_writers_current as u64, Ordering::Relaxed);
|
||||
self.stats
|
||||
.set_me_floor_global_cap_raw_gauge(active_cap_configured as u64);
|
||||
self.stats
|
||||
.set_me_floor_global_cap_effective_gauge(active_cap_effective as u64);
|
||||
self.stats
|
||||
.set_me_floor_target_writers_total_gauge(target_writers_total as u64);
|
||||
self.stats
|
||||
.set_me_floor_active_cap_configured_gauge(active_cap_configured as u64);
|
||||
self.stats
|
||||
.set_me_floor_active_cap_effective_gauge(active_cap_effective as u64);
|
||||
self.stats
|
||||
.set_me_floor_warm_cap_configured_gauge(warm_cap_configured as u64);
|
||||
self.stats
|
||||
.set_me_floor_warm_cap_effective_gauge(warm_cap_effective as u64);
|
||||
self.stats
|
||||
.set_me_writers_active_current_gauge(active_writers_current as u64);
|
||||
self.stats
|
||||
.set_me_writers_warm_current_gauge(warm_writers_current as u64);
|
||||
}
|
||||
|
||||
pub(super) async fn active_coverage_required_total(&self) -> usize {
|
||||
let mut endpoints_by_dc = HashMap::<i32, HashSet<SocketAddr>>::new();
|
||||
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await;
|
||||
for (dc, addrs) in map.iter() {
|
||||
let entry = endpoints_by_dc.entry(*dc).or_default();
|
||||
for (ip, port) in addrs.iter().copied() {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await;
|
||||
for (dc, addrs) in map.iter() {
|
||||
let entry = endpoints_by_dc.entry(*dc).or_default();
|
||||
for (ip, port) in addrs.iter().copied() {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
endpoints_by_dc
|
||||
.values()
|
||||
.map(|endpoints| self.required_writers_for_dc_with_floor_mode(endpoints.len(), false))
|
||||
.sum()
|
||||
}
|
||||
|
||||
pub(super) async fn can_open_writer_for_contour(
|
||||
&self,
|
||||
contour: WriterContour,
|
||||
allow_coverage_override: bool,
|
||||
) -> bool {
|
||||
let (active_writers, warm_writers, _) = self.non_draining_writer_counts_by_contour().await;
|
||||
match contour {
|
||||
WriterContour::Active => {
|
||||
let active_cap = self.adaptive_floor_active_cap_configured_total();
|
||||
if active_writers < active_cap {
|
||||
return true;
|
||||
}
|
||||
if !allow_coverage_override {
|
||||
return false;
|
||||
}
|
||||
let coverage_required = self.active_coverage_required_total().await;
|
||||
active_writers < coverage_required
|
||||
}
|
||||
WriterContour::Warm => warm_writers < self.adaptive_floor_warm_cap_configured_total(),
|
||||
WriterContour::Draining => true,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn required_writers_for_dc_with_floor_mode(
|
||||
&self,
|
||||
endpoint_count: usize,
|
||||
@@ -534,13 +982,20 @@ impl MePool {
|
||||
if !reduce_for_idle {
|
||||
return base_required;
|
||||
}
|
||||
if endpoint_count != 1 || self.floor_mode() != MeFloorMode::Adaptive {
|
||||
if self.floor_mode() != MeFloorMode::Adaptive {
|
||||
return base_required;
|
||||
}
|
||||
let min_writers = (self
|
||||
.me_adaptive_floor_min_writers_single_endpoint
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1);
|
||||
let min_writers = if endpoint_count == 1 {
|
||||
(self
|
||||
.me_adaptive_floor_min_writers_single_endpoint
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
} else {
|
||||
(self
|
||||
.me_adaptive_floor_min_writers_multi_endpoint
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
};
|
||||
base_required.min(min_writers)
|
||||
}
|
||||
|
||||
@@ -599,6 +1054,51 @@ impl MePool {
|
||||
order
|
||||
}
|
||||
|
||||
pub(super) fn default_dc_for_routing(&self) -> i32 {
|
||||
let dc = self.default_dc.load(Ordering::Relaxed);
|
||||
if dc == 0 { 2 } else { dc }
|
||||
}
|
||||
|
||||
pub(super) async fn has_configured_endpoints_for_dc(&self, dc: i32) -> bool {
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await;
|
||||
if map.get(&dc).is_some_and(|endpoints| !endpoints.is_empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await;
|
||||
if map.get(&dc).is_some_and(|endpoints| !endpoints.is_empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
pub(super) async fn resolve_target_dc_for_routing(&self, target_dc: i32) -> (i32, bool) {
|
||||
if target_dc == 0 {
|
||||
return (self.default_dc_for_routing(), true);
|
||||
}
|
||||
|
||||
if self.has_configured_endpoints_for_dc(target_dc).await {
|
||||
return (target_dc, false);
|
||||
}
|
||||
|
||||
(self.default_dc_for_routing(), true)
|
||||
}
|
||||
|
||||
pub(super) async fn resolve_dc_for_endpoint(&self, addr: SocketAddr) -> i32 {
|
||||
if let Some(cached) = self.endpoint_dc_map.read().await.get(&addr).copied()
|
||||
&& let Some(dc) = cached
|
||||
{
|
||||
return dc;
|
||||
}
|
||||
|
||||
self.default_dc_for_routing()
|
||||
}
|
||||
|
||||
pub(super) async fn proxy_map_for_family(
|
||||
&self,
|
||||
family: IpFamily,
|
||||
@@ -608,4 +1108,123 @@ impl MePool {
|
||||
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_endpoint_dc(
|
||||
endpoint_dc_map: &mut HashMap<SocketAddr, Option<i32>>,
|
||||
dc: i32,
|
||||
ip: IpAddr,
|
||||
port: u16,
|
||||
) {
|
||||
let endpoint = SocketAddr::new(ip, port);
|
||||
match endpoint_dc_map.get_mut(&endpoint) {
|
||||
None => {
|
||||
endpoint_dc_map.insert(endpoint, Some(dc));
|
||||
}
|
||||
Some(existing) => {
|
||||
if existing.is_some_and(|existing_dc| existing_dc != dc) {
|
||||
*existing = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_preferred_endpoints_by_dc(
|
||||
decision: &NetworkDecision,
|
||||
map_v4: &HashMap<i32, Vec<(IpAddr, u16)>>,
|
||||
map_v6: &HashMap<i32, Vec<(IpAddr, u16)>>,
|
||||
) -> HashMap<i32, Vec<SocketAddr>> {
|
||||
let mut out = HashMap::<i32, Vec<SocketAddr>>::new();
|
||||
let mut dcs = HashSet::<i32>::new();
|
||||
dcs.extend(map_v4.keys().copied());
|
||||
dcs.extend(map_v6.keys().copied());
|
||||
|
||||
for dc in dcs {
|
||||
let v4 = map_v4
|
||||
.get(&dc)
|
||||
.map(|items| {
|
||||
items
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let v6 = map_v6
|
||||
.get(&dc)
|
||||
.map(|items| {
|
||||
items
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut selected = if decision.effective_multipath {
|
||||
let mut both = Vec::<SocketAddr>::with_capacity(v4.len().saturating_add(v6.len()));
|
||||
if decision.prefer_ipv6() {
|
||||
both.extend(v6.iter().copied());
|
||||
both.extend(v4.iter().copied());
|
||||
} else {
|
||||
both.extend(v4.iter().copied());
|
||||
both.extend(v6.iter().copied());
|
||||
}
|
||||
both
|
||||
} else if decision.prefer_ipv6() {
|
||||
if !v6.is_empty() { v6 } else { v4 }
|
||||
} else if !v4.is_empty() {
|
||||
v4
|
||||
} else {
|
||||
v6
|
||||
};
|
||||
|
||||
selected.sort_unstable();
|
||||
selected.dedup();
|
||||
out.insert(dc, selected);
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn build_endpoint_dc_map_from_maps(
|
||||
map_v4: &HashMap<i32, Vec<(IpAddr, u16)>>,
|
||||
map_v6: &HashMap<i32, Vec<(IpAddr, u16)>>,
|
||||
) -> HashMap<SocketAddr, Option<i32>> {
|
||||
let mut endpoint_dc_map = HashMap::<SocketAddr, Option<i32>>::new();
|
||||
for (dc, endpoints) in map_v4 {
|
||||
for (ip, port) in endpoints {
|
||||
Self::merge_endpoint_dc(&mut endpoint_dc_map, *dc, *ip, *port);
|
||||
}
|
||||
}
|
||||
for (dc, endpoints) in map_v6 {
|
||||
for (ip, port) in endpoints {
|
||||
Self::merge_endpoint_dc(&mut endpoint_dc_map, *dc, *ip, *port);
|
||||
}
|
||||
}
|
||||
endpoint_dc_map
|
||||
}
|
||||
|
||||
pub(super) async fn rebuild_endpoint_dc_map(&self) {
|
||||
let map_v4 = self.proxy_map_v4.read().await.clone();
|
||||
let map_v6 = self.proxy_map_v6.read().await.clone();
|
||||
let rebuilt = Self::build_endpoint_dc_map_from_maps(&map_v4, &map_v6);
|
||||
let preferred = Self::build_preferred_endpoints_by_dc(&self.decision, &map_v4, &map_v6);
|
||||
*self.endpoint_dc_map.write().await = rebuilt;
|
||||
*self.preferred_endpoints_by_dc.write().await = preferred;
|
||||
}
|
||||
|
||||
pub(super) async fn preferred_endpoints_for_dc(&self, dc: i32) -> Vec<SocketAddr> {
|
||||
let guard = self.preferred_endpoints_by_dc.read().await;
|
||||
guard.get(&dc).cloned().unwrap_or_default()
|
||||
}
|
||||
|
||||
pub(super) fn health_interval_unhealthy(&self) -> Duration {
|
||||
Duration::from_millis(self.me_health_interval_ms_unhealthy.load(Ordering::Relaxed).max(1))
|
||||
}
|
||||
|
||||
pub(super) fn health_interval_healthy(&self) -> Duration {
|
||||
Duration::from_millis(self.me_health_interval_ms_healthy.load(Ordering::Relaxed).max(1))
|
||||
}
|
||||
|
||||
pub(super) fn warn_rate_limit_duration(&self) -> Duration {
|
||||
Duration::from_millis(self.me_warn_rate_limit_ms.load(Ordering::Relaxed).max(1))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,6 +54,7 @@ impl MePool {
|
||||
&& let Some(addrs) = guard.get(&k).cloned()
|
||||
{
|
||||
guard.insert(-k, addrs);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -65,9 +66,14 @@ impl MePool {
|
||||
&& let Some(addrs) = guard.get(&k).cloned()
|
||||
{
|
||||
guard.insert(-k, addrs);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if changed {
|
||||
self.rebuild_endpoint_dc_map().await;
|
||||
self.writer_available.notify_waiters();
|
||||
}
|
||||
if changed {
|
||||
SnapshotApplyOutcome::AppliedChanged
|
||||
} else {
|
||||
@@ -104,7 +110,10 @@ impl MePool {
|
||||
pub async fn reconnect_all(self: &Arc<Self>) {
|
||||
let ws = self.writers.read().await.clone();
|
||||
for w in ws {
|
||||
if let Ok(()) = self.connect_one(w.addr, self.rng.as_ref()).await {
|
||||
if let Ok(()) = self
|
||||
.connect_one_for_dc(w.addr, w.writer_dc, self.rng.as_ref())
|
||||
.await
|
||||
{
|
||||
self.mark_writer_draining(w.id).await;
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashSet;
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -14,10 +14,12 @@ use super::pool::MePool;
|
||||
impl MePool {
|
||||
pub async fn init(self: &Arc<Self>, pool_size: usize, rng: &Arc<SecureRandom>) -> Result<()> {
|
||||
let family_order = self.family_order();
|
||||
let connect_concurrency = self.me_reconnect_max_concurrent_per_dc.max(1) as usize;
|
||||
let ks = self.key_selector().await;
|
||||
info!(
|
||||
me_servers = self.proxy_map_v4.read().await.len(),
|
||||
pool_size,
|
||||
connect_concurrency,
|
||||
key_selector = format_args!("0x{ks:08x}"),
|
||||
secret_len = self.proxy_secret.read().await.secret.len(),
|
||||
"Initializing ME pool"
|
||||
@@ -25,39 +27,54 @@ impl MePool {
|
||||
|
||||
for family in family_order {
|
||||
let map = self.proxy_map_for_family(family).await;
|
||||
let mut grouped_dc_addrs: HashMap<i32, Vec<(IpAddr, u16)>> = HashMap::new();
|
||||
for (dc, addrs) in map {
|
||||
if addrs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
grouped_dc_addrs.entry(dc.abs()).or_default().extend(addrs);
|
||||
}
|
||||
let mut dc_addrs: Vec<(i32, Vec<(IpAddr, u16)>)> = grouped_dc_addrs
|
||||
let mut dc_addrs: Vec<(i32, Vec<(IpAddr, u16)>)> = map
|
||||
.into_iter()
|
||||
.map(|(dc, mut addrs)| {
|
||||
addrs.sort_unstable();
|
||||
addrs.dedup();
|
||||
(dc, addrs)
|
||||
})
|
||||
.filter(|(_, addrs)| !addrs.is_empty())
|
||||
.collect();
|
||||
dc_addrs.sort_unstable_by_key(|(dc, _)| *dc);
|
||||
dc_addrs.sort_by_key(|(_, addrs)| (addrs.len() != 1, addrs.len()));
|
||||
|
||||
// Ensure at least one live writer per DC group; run missing DCs in parallel.
|
||||
// Stage 1: build base coverage for conditional-cast.
|
||||
// Single-endpoint DCs are prefilled first; multi-endpoint DCs require one live writer.
|
||||
let mut join = tokio::task::JoinSet::new();
|
||||
for (dc, addrs) in dc_addrs.iter().cloned() {
|
||||
if addrs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let target_writers = if addrs.len() == 1 {
|
||||
self.required_writers_for_dc_with_floor_mode(addrs.len(), false)
|
||||
} else {
|
||||
1usize
|
||||
};
|
||||
let endpoints: HashSet<SocketAddr> = addrs
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect();
|
||||
if self.active_writer_count_for_endpoints(&endpoints).await > 0 {
|
||||
if self
|
||||
.active_writer_count_for_dc_endpoints(dc, &endpoints)
|
||||
.await
|
||||
>= target_writers
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let pool = Arc::clone(self);
|
||||
let rng_clone = Arc::clone(rng);
|
||||
join.spawn(async move { pool.connect_primary_for_dc(dc, addrs, rng_clone).await });
|
||||
join.spawn(async move {
|
||||
pool.connect_primary_for_dc(
|
||||
dc,
|
||||
addrs,
|
||||
target_writers,
|
||||
rng_clone,
|
||||
connect_concurrency,
|
||||
true,
|
||||
)
|
||||
.await
|
||||
});
|
||||
}
|
||||
while join.join_next().await.is_some() {}
|
||||
|
||||
@@ -67,7 +84,7 @@ impl MePool {
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect();
|
||||
if self.active_writer_count_for_endpoints(&endpoints).await == 0 {
|
||||
if self.active_writer_count_for_dc_endpoints(*dc, &endpoints).await == 0 {
|
||||
missing_dcs.push(*dc);
|
||||
}
|
||||
}
|
||||
@@ -77,47 +94,36 @@ impl MePool {
|
||||
)));
|
||||
}
|
||||
|
||||
// Warm reserve writers asynchronously so startup does not block after first working pool is ready.
|
||||
// Stage 2: continue saturating multi-endpoint DC groups in background.
|
||||
let pool = Arc::clone(self);
|
||||
let rng_clone = Arc::clone(rng);
|
||||
let dc_addrs_bg = dc_addrs.clone();
|
||||
tokio::spawn(async move {
|
||||
if pool.me_warmup_stagger_enabled {
|
||||
for (dc, addrs) in &dc_addrs_bg {
|
||||
for (ip, port) in addrs {
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
let jitter = rand::rng()
|
||||
.random_range(0..=pool.me_warmup_step_jitter.as_millis() as u64);
|
||||
let delay_ms = pool.me_warmup_step_delay.as_millis() as u64 + jitter;
|
||||
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
|
||||
if let Err(e) = pool.connect_one(addr, rng_clone.as_ref()).await {
|
||||
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed (staggered)");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (dc, addrs) in &dc_addrs_bg {
|
||||
for (ip, port) in addrs {
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
if let Err(e) = pool.connect_one(addr, rng_clone.as_ref()).await {
|
||||
debug!(%addr, dc = %dc, error = %e, "Extra ME connect failed");
|
||||
}
|
||||
}
|
||||
if pool.connection_count() >= pool_size {
|
||||
break;
|
||||
}
|
||||
let mut join_bg = tokio::task::JoinSet::new();
|
||||
for (dc, addrs) in dc_addrs_bg {
|
||||
if addrs.len() <= 1 {
|
||||
continue;
|
||||
}
|
||||
let target_writers = pool.required_writers_for_dc_with_floor_mode(addrs.len(), false);
|
||||
let pool_clone = Arc::clone(&pool);
|
||||
let rng_clone_local = Arc::clone(&rng_clone);
|
||||
join_bg.spawn(async move {
|
||||
pool_clone
|
||||
.connect_primary_for_dc(
|
||||
dc,
|
||||
addrs,
|
||||
target_writers,
|
||||
rng_clone_local,
|
||||
connect_concurrency,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
});
|
||||
}
|
||||
while join_bg.join_next().await.is_some() {}
|
||||
debug!(
|
||||
target_pool_size = pool_size,
|
||||
current_pool_size = pool.connection_count(),
|
||||
"Background ME reserve warmup finished"
|
||||
"Background ME saturation warmup finished"
|
||||
);
|
||||
});
|
||||
|
||||
@@ -140,62 +146,111 @@ impl MePool {
|
||||
self: Arc<Self>,
|
||||
dc: i32,
|
||||
mut addrs: Vec<(IpAddr, u16)>,
|
||||
target_writers: usize,
|
||||
rng: Arc<SecureRandom>,
|
||||
connect_concurrency: usize,
|
||||
allow_coverage_override: bool,
|
||||
) -> bool {
|
||||
if addrs.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let target_writers = target_writers.max(1);
|
||||
addrs.shuffle(&mut rand::rng());
|
||||
if addrs.len() > 1 {
|
||||
let concurrency = 2usize;
|
||||
let endpoints: Vec<SocketAddr> = addrs
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect();
|
||||
let endpoint_set: HashSet<SocketAddr> = endpoints.iter().copied().collect();
|
||||
|
||||
loop {
|
||||
let alive = self
|
||||
.active_writer_count_for_dc_endpoints(dc, &endpoint_set)
|
||||
.await;
|
||||
if alive >= target_writers {
|
||||
info!(
|
||||
dc = %dc,
|
||||
alive,
|
||||
target_writers,
|
||||
"ME connected"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
let missing = target_writers.saturating_sub(alive).max(1);
|
||||
let concurrency = connect_concurrency.max(1).min(missing);
|
||||
let mut join = tokio::task::JoinSet::new();
|
||||
let mut next_idx = 0usize;
|
||||
for _ in 0..concurrency {
|
||||
let pool = Arc::clone(&self);
|
||||
let rng_clone = Arc::clone(&rng);
|
||||
let endpoints_clone = endpoints.clone();
|
||||
let generation = self.current_generation();
|
||||
join.spawn(async move {
|
||||
pool.connect_endpoints_round_robin_with_generation_contour(
|
||||
dc,
|
||||
&endpoints_clone,
|
||||
rng_clone.as_ref(),
|
||||
generation,
|
||||
super::pool::WriterContour::Active,
|
||||
allow_coverage_override,
|
||||
)
|
||||
.await
|
||||
});
|
||||
}
|
||||
|
||||
while next_idx < addrs.len() || !join.is_empty() {
|
||||
while next_idx < addrs.len() && join.len() < concurrency {
|
||||
let (ip, port) = addrs[next_idx];
|
||||
next_idx += 1;
|
||||
let addr = SocketAddr::new(ip, port);
|
||||
let pool = Arc::clone(&self);
|
||||
let rng_clone = Arc::clone(&rng);
|
||||
join.spawn(async move {
|
||||
(addr, pool.connect_one(addr, rng_clone.as_ref()).await)
|
||||
});
|
||||
}
|
||||
|
||||
let Some(res) = join.join_next().await else {
|
||||
break;
|
||||
};
|
||||
let mut progress = false;
|
||||
while let Some(res) = join.join_next().await {
|
||||
match res {
|
||||
Ok((addr, Ok(()))) => {
|
||||
info!(%addr, dc = %dc, "ME connected");
|
||||
join.abort_all();
|
||||
while join.join_next().await.is_some() {}
|
||||
return true;
|
||||
}
|
||||
Ok((addr, Err(e))) => {
|
||||
warn!(%addr, dc = %dc, error = %e, "ME connect failed, trying next");
|
||||
Ok(true) => {
|
||||
progress = true;
|
||||
}
|
||||
Ok(false) => {}
|
||||
Err(e) => {
|
||||
warn!(dc = %dc, error = %e, "ME connect task failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
warn!(dc = %dc, "All ME servers for DC failed at init");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (ip, port) in addrs {
|
||||
let addr = SocketAddr::new(ip, port);
|
||||
match self.connect_one(addr, rng.as_ref()).await {
|
||||
Ok(()) => {
|
||||
info!(%addr, dc = %dc, "ME connected");
|
||||
return true;
|
||||
let alive_after = self
|
||||
.active_writer_count_for_dc_endpoints(dc, &endpoint_set)
|
||||
.await;
|
||||
if alive_after >= target_writers {
|
||||
info!(
|
||||
dc = %dc,
|
||||
alive = alive_after,
|
||||
target_writers,
|
||||
"ME connected"
|
||||
);
|
||||
return true;
|
||||
}
|
||||
if !progress {
|
||||
let active_writers_current = self.active_contour_writer_count_total().await;
|
||||
let active_cap_configured = self.adaptive_floor_active_cap_configured_total();
|
||||
if !allow_coverage_override && active_writers_current >= active_cap_configured {
|
||||
info!(
|
||||
dc = %dc,
|
||||
alive = alive_after,
|
||||
target_writers,
|
||||
active_writers_current,
|
||||
active_cap_configured,
|
||||
"ME init saturation stopped by active writer cap"
|
||||
);
|
||||
} else {
|
||||
warn!(
|
||||
dc = %dc,
|
||||
alive = alive_after,
|
||||
target_writers,
|
||||
"All ME servers for DC failed at init"
|
||||
);
|
||||
}
|
||||
Err(e) => warn!(%addr, dc = %dc, error = %e, "ME connect failed, trying next"),
|
||||
return false;
|
||||
}
|
||||
|
||||
if self.me_warmup_stagger_enabled {
|
||||
let jitter = rand::rng()
|
||||
.random_range(0..=self.me_warmup_step_jitter.as_millis() as u64);
|
||||
let delay_ms = self.me_warmup_step_delay.as_millis() as u64 + jitter;
|
||||
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
|
||||
}
|
||||
}
|
||||
warn!(dc = %dc, "All ME servers for DC failed at init");
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,6 +248,43 @@ impl MePool {
|
||||
}
|
||||
}
|
||||
|
||||
let _singleflight_guard = if use_shared_cache {
|
||||
Some(match family {
|
||||
IpFamily::V4 => self.nat_reflection_singleflight_v4.lock().await,
|
||||
IpFamily::V6 => self.nat_reflection_singleflight_v6.lock().await,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if use_shared_cache
|
||||
&& let Some(until) = *self.stun_backoff_until.read().await
|
||||
&& Instant::now() < until
|
||||
{
|
||||
if let Ok(cache) = self.nat_reflection_cache.try_lock() {
|
||||
let slot = match family {
|
||||
IpFamily::V4 => cache.v4,
|
||||
IpFamily::V6 => cache.v6,
|
||||
};
|
||||
return slot.map(|(_, addr)| addr);
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
if use_shared_cache
|
||||
&& let Ok(mut cache) = self.nat_reflection_cache.try_lock()
|
||||
{
|
||||
let slot = match family {
|
||||
IpFamily::V4 => &mut cache.v4,
|
||||
IpFamily::V6 => &mut cache.v6,
|
||||
};
|
||||
if let Some((ts, addr)) = slot
|
||||
&& ts.elapsed() < STUN_CACHE_TTL
|
||||
{
|
||||
return Some(*addr);
|
||||
}
|
||||
}
|
||||
|
||||
let attempt = if use_shared_cache {
|
||||
self.nat_probe_attempts.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
|
||||
} else {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
@@ -9,7 +9,7 @@ use tracing::{debug, info, warn};
|
||||
use crate::crypto::SecureRandom;
|
||||
use crate::network::IpFamily;
|
||||
|
||||
use super::pool::{MePool, RefillDcKey, WriterContour};
|
||||
use super::pool::{MePool, RefillDcKey, RefillEndpointKey, WriterContour};
|
||||
|
||||
const ME_FLAP_UPTIME_THRESHOLD_SECS: u64 = 20;
|
||||
const ME_FLAP_QUARANTINE_SECS: u64 = 25;
|
||||
@@ -82,91 +82,79 @@ impl MePool {
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
pub(super) async fn has_refill_inflight_for_endpoints(&self, endpoints: &[SocketAddr]) -> bool {
|
||||
if endpoints.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
let guard = self.refill_inflight.lock().await;
|
||||
if endpoints.iter().any(|addr| guard.contains(addr)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
let dc_keys = self.resolve_refill_dc_keys_for_endpoints(endpoints).await;
|
||||
if dc_keys.is_empty() {
|
||||
return false;
|
||||
}
|
||||
pub(super) async fn has_refill_inflight_for_dc_key(&self, key: RefillDcKey) -> bool {
|
||||
let guard = self.refill_inflight_dc.lock().await;
|
||||
dc_keys.iter().any(|key| guard.contains(key))
|
||||
}
|
||||
|
||||
async fn resolve_refill_dc_key_for_addr(&self, addr: SocketAddr) -> Option<RefillDcKey> {
|
||||
let family = if addr.is_ipv4() {
|
||||
IpFamily::V4
|
||||
} else {
|
||||
IpFamily::V6
|
||||
};
|
||||
let map = self.proxy_map_for_family(family).await;
|
||||
for (dc, endpoints) in map {
|
||||
if endpoints
|
||||
.into_iter()
|
||||
.any(|(ip, port)| SocketAddr::new(ip, port) == addr)
|
||||
{
|
||||
return Some(RefillDcKey {
|
||||
dc: dc.abs(),
|
||||
family,
|
||||
});
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
async fn resolve_refill_dc_keys_for_endpoints(
|
||||
&self,
|
||||
endpoints: &[SocketAddr],
|
||||
) -> HashSet<RefillDcKey> {
|
||||
let mut out = HashSet::<RefillDcKey>::new();
|
||||
for addr in endpoints {
|
||||
if let Some(key) = self.resolve_refill_dc_key_for_addr(*addr).await {
|
||||
out.insert(key);
|
||||
}
|
||||
}
|
||||
out
|
||||
guard.contains(&key)
|
||||
}
|
||||
|
||||
pub(super) async fn connect_endpoints_round_robin(
|
||||
self: &Arc<Self>,
|
||||
dc: i32,
|
||||
endpoints: &[SocketAddr],
|
||||
rng: &SecureRandom,
|
||||
) -> bool {
|
||||
self.connect_endpoints_round_robin_with_generation_contour(
|
||||
dc,
|
||||
endpoints,
|
||||
rng,
|
||||
self.current_generation(),
|
||||
WriterContour::Active,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(super) async fn connect_endpoints_round_robin_with_generation_contour(
|
||||
self: &Arc<Self>,
|
||||
dc: i32,
|
||||
endpoints: &[SocketAddr],
|
||||
rng: &SecureRandom,
|
||||
generation: u64,
|
||||
contour: WriterContour,
|
||||
allow_coverage_override: bool,
|
||||
) -> bool {
|
||||
let candidates = self.connectable_endpoints(endpoints).await;
|
||||
let mut candidates = self.connectable_endpoints(endpoints).await;
|
||||
if candidates.is_empty() {
|
||||
return false;
|
||||
}
|
||||
if candidates.len() > 1 {
|
||||
let mut active_by_endpoint = HashMap::<SocketAddr, usize>::new();
|
||||
let ws = self.writers.read().await;
|
||||
for writer in ws.iter() {
|
||||
if writer.draining.load(Ordering::Relaxed) {
|
||||
continue;
|
||||
}
|
||||
if writer.writer_dc != dc {
|
||||
continue;
|
||||
}
|
||||
if !matches!(
|
||||
super::pool::WriterContour::from_u8(
|
||||
writer.contour.load(Ordering::Relaxed),
|
||||
),
|
||||
super::pool::WriterContour::Active
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
if candidates.contains(&writer.addr) {
|
||||
*active_by_endpoint.entry(writer.addr).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
drop(ws);
|
||||
candidates.sort_by_key(|addr| (active_by_endpoint.get(addr).copied().unwrap_or(0), *addr));
|
||||
}
|
||||
let start = (self.rr.fetch_add(1, Ordering::Relaxed) as usize) % candidates.len();
|
||||
for offset in 0..candidates.len() {
|
||||
let idx = (start + offset) % candidates.len();
|
||||
let addr = candidates[idx];
|
||||
match self
|
||||
.connect_one_with_generation_contour(addr, rng, generation, contour)
|
||||
.connect_one_with_generation_contour_for_dc_with_cap_policy(
|
||||
addr,
|
||||
rng,
|
||||
generation,
|
||||
contour,
|
||||
dc,
|
||||
allow_coverage_override,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => return true,
|
||||
@@ -176,48 +164,23 @@ impl MePool {
|
||||
false
|
||||
}
|
||||
|
||||
async fn endpoints_for_same_dc(&self, addr: SocketAddr) -> Vec<SocketAddr> {
|
||||
let mut target_dc = HashSet::<i32>::new();
|
||||
async fn endpoints_for_dc(&self, target_dc: i32) -> Vec<SocketAddr> {
|
||||
let mut endpoints = HashSet::<SocketAddr>::new();
|
||||
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await.clone();
|
||||
for (dc, addrs) in &map {
|
||||
if addrs
|
||||
.iter()
|
||||
.any(|(ip, port)| SocketAddr::new(*ip, *port) == addr)
|
||||
{
|
||||
target_dc.insert(dc.abs());
|
||||
}
|
||||
}
|
||||
for dc in &target_dc {
|
||||
for key in [*dc, -*dc] {
|
||||
if let Some(addrs) = map.get(&key) {
|
||||
for (ip, port) in addrs {
|
||||
endpoints.insert(SocketAddr::new(*ip, *port));
|
||||
}
|
||||
}
|
||||
let map = self.proxy_map_v4.read().await;
|
||||
if let Some(addrs) = map.get(&target_dc) {
|
||||
for (ip, port) in addrs {
|
||||
endpoints.insert(SocketAddr::new(*ip, *port));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await.clone();
|
||||
for (dc, addrs) in &map {
|
||||
if addrs
|
||||
.iter()
|
||||
.any(|(ip, port)| SocketAddr::new(*ip, *port) == addr)
|
||||
{
|
||||
target_dc.insert(dc.abs());
|
||||
}
|
||||
}
|
||||
for dc in &target_dc {
|
||||
for key in [*dc, -*dc] {
|
||||
if let Some(addrs) = map.get(&key) {
|
||||
for (ip, port) in addrs {
|
||||
endpoints.insert(SocketAddr::new(*ip, *port));
|
||||
}
|
||||
}
|
||||
let map = self.proxy_map_v6.read().await;
|
||||
if let Some(addrs) = map.get(&target_dc) {
|
||||
for (ip, port) in addrs {
|
||||
endpoints.insert(SocketAddr::new(*ip, *port));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -227,14 +190,14 @@ impl MePool {
|
||||
sorted
|
||||
}
|
||||
|
||||
async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr) -> bool {
|
||||
async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr, writer_dc: i32) -> bool {
|
||||
let fast_retries = self.me_reconnect_fast_retry_count.max(1);
|
||||
let same_endpoint_quarantined = self.is_endpoint_quarantined(addr).await;
|
||||
|
||||
if !same_endpoint_quarantined {
|
||||
for attempt in 0..fast_retries {
|
||||
self.stats.increment_me_reconnect_attempt();
|
||||
match self.connect_one(addr, self.rng.as_ref()).await {
|
||||
match self.connect_one_for_dc(addr, writer_dc, self.rng.as_ref()).await {
|
||||
Ok(()) => {
|
||||
self.stats.increment_me_reconnect_success();
|
||||
self.stats.increment_me_writer_restored_same_endpoint_total();
|
||||
@@ -262,7 +225,7 @@ impl MePool {
|
||||
);
|
||||
}
|
||||
|
||||
let dc_endpoints = self.endpoints_for_same_dc(addr).await;
|
||||
let dc_endpoints = self.endpoints_for_dc(writer_dc).await;
|
||||
if dc_endpoints.is_empty() {
|
||||
self.stats.increment_me_refill_failed_total();
|
||||
return false;
|
||||
@@ -271,7 +234,7 @@ impl MePool {
|
||||
for attempt in 0..fast_retries {
|
||||
self.stats.increment_me_reconnect_attempt();
|
||||
if self
|
||||
.connect_endpoints_round_robin(&dc_endpoints, self.rng.as_ref())
|
||||
.connect_endpoints_round_robin(writer_dc, &dc_endpoints, self.rng.as_ref())
|
||||
.await
|
||||
{
|
||||
self.stats.increment_me_reconnect_success();
|
||||
@@ -289,48 +252,63 @@ impl MePool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn trigger_immediate_refill(self: &Arc<Self>, addr: SocketAddr) {
|
||||
pub(crate) fn trigger_immediate_refill_for_dc(self: &Arc<Self>, addr: SocketAddr, writer_dc: i32) {
|
||||
let endpoint_key = RefillEndpointKey {
|
||||
dc: writer_dc,
|
||||
addr,
|
||||
};
|
||||
let pre_inserted = if let Ok(mut guard) = self.refill_inflight.try_lock() {
|
||||
if !guard.insert(endpoint_key) {
|
||||
self.stats.increment_me_refill_skipped_inflight_total();
|
||||
return;
|
||||
}
|
||||
true
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
let pool = Arc::clone(self);
|
||||
tokio::spawn(async move {
|
||||
let dc_endpoints = pool.endpoints_for_same_dc(addr).await;
|
||||
let dc_keys = pool.resolve_refill_dc_keys_for_endpoints(&dc_endpoints).await;
|
||||
let dc_key = RefillDcKey {
|
||||
dc: writer_dc,
|
||||
family: if addr.is_ipv4() {
|
||||
IpFamily::V4
|
||||
} else {
|
||||
IpFamily::V6
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
if !pre_inserted {
|
||||
let mut guard = pool.refill_inflight.lock().await;
|
||||
if !guard.insert(addr) {
|
||||
if !guard.insert(endpoint_key) {
|
||||
pool.stats.increment_me_refill_skipped_inflight_total();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if !dc_keys.is_empty() {
|
||||
{
|
||||
let mut dc_guard = pool.refill_inflight_dc.lock().await;
|
||||
if dc_keys.iter().any(|key| dc_guard.contains(key)) {
|
||||
if dc_guard.contains(&dc_key) {
|
||||
pool.stats.increment_me_refill_skipped_inflight_total();
|
||||
drop(dc_guard);
|
||||
let mut guard = pool.refill_inflight.lock().await;
|
||||
guard.remove(&addr);
|
||||
guard.remove(&endpoint_key);
|
||||
return;
|
||||
}
|
||||
dc_guard.extend(dc_keys.iter().copied());
|
||||
dc_guard.insert(dc_key);
|
||||
}
|
||||
|
||||
pool.stats.increment_me_refill_triggered_total();
|
||||
|
||||
let restored = pool.refill_writer_after_loss(addr).await;
|
||||
let restored = pool.refill_writer_after_loss(addr, writer_dc).await;
|
||||
if !restored {
|
||||
warn!(%addr, "ME immediate refill failed");
|
||||
warn!(%addr, dc = writer_dc, "ME immediate refill failed");
|
||||
}
|
||||
|
||||
let mut guard = pool.refill_inflight.lock().await;
|
||||
guard.remove(&addr);
|
||||
guard.remove(&endpoint_key);
|
||||
drop(guard);
|
||||
if !dc_keys.is_empty() {
|
||||
let mut dc_guard = pool.refill_inflight_dc.lock().await;
|
||||
for key in &dc_keys {
|
||||
dc_guard.remove(key);
|
||||
}
|
||||
}
|
||||
let mut dc_guard = pool.refill_inflight_dc.lock().await;
|
||||
dc_guard.remove(&dc_key);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ impl MePool {
|
||||
|
||||
fn coverage_ratio(
|
||||
desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>,
|
||||
active_writer_addrs: &HashSet<SocketAddr>,
|
||||
active_writer_addrs: &HashSet<(i32, SocketAddr)>,
|
||||
) -> (f32, Vec<i32>) {
|
||||
if desired_by_dc.is_empty() {
|
||||
return (1.0, Vec::new());
|
||||
@@ -76,7 +76,7 @@ impl MePool {
|
||||
}
|
||||
if endpoints
|
||||
.iter()
|
||||
.any(|addr| active_writer_addrs.contains(addr))
|
||||
.any(|addr| active_writer_addrs.contains(&(*dc, *addr)))
|
||||
{
|
||||
covered += 1;
|
||||
} else {
|
||||
@@ -91,32 +91,25 @@ impl MePool {
|
||||
}
|
||||
|
||||
pub async fn reconcile_connections(self: &Arc<Self>, rng: &SecureRandom) {
|
||||
let writers = self.writers.read().await;
|
||||
let current: HashSet<SocketAddr> = writers
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.map(|w| w.addr)
|
||||
.collect();
|
||||
drop(writers);
|
||||
|
||||
for family in self.family_order() {
|
||||
let map = self.proxy_map_for_family(family).await;
|
||||
for (_dc, addrs) in &map {
|
||||
for (dc, addrs) in &map {
|
||||
let dc_addrs: Vec<SocketAddr> = addrs
|
||||
.iter()
|
||||
.map(|(ip, port)| SocketAddr::new(*ip, *port))
|
||||
.collect();
|
||||
if !dc_addrs.iter().any(|a| current.contains(a)) {
|
||||
let dc_endpoints: HashSet<SocketAddr> = dc_addrs.iter().copied().collect();
|
||||
if self.active_writer_count_for_dc_endpoints(*dc, &dc_endpoints).await == 0 {
|
||||
let mut shuffled = dc_addrs.clone();
|
||||
shuffled.shuffle(&mut rand::rng());
|
||||
for addr in shuffled {
|
||||
if self.connect_one(addr, rng).await.is_ok() {
|
||||
if self.connect_one_for_dc(addr, *dc, rng).await.is_ok() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self.decision.effective_multipath && !current.is_empty() {
|
||||
if !self.decision.effective_multipath && self.connection_count() > 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -128,7 +121,7 @@ impl MePool {
|
||||
if self.decision.ipv4_me {
|
||||
let map_v4 = self.proxy_map_v4.read().await.clone();
|
||||
for (dc, addrs) in map_v4 {
|
||||
let entry = out.entry(dc.abs()).or_default();
|
||||
let entry = out.entry(dc).or_default();
|
||||
for (ip, port) in addrs {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
@@ -138,7 +131,7 @@ impl MePool {
|
||||
if self.decision.ipv6_me {
|
||||
let map_v6 = self.proxy_map_v6.read().await.clone();
|
||||
for (dc, addrs) in map_v6 {
|
||||
let entry = out.entry(dc.abs()).or_default();
|
||||
let entry = out.entry(dc).or_default();
|
||||
for (ip, port) in addrs {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
@@ -174,26 +167,30 @@ impl MePool {
|
||||
core.saturating_add(rand::rng().random_range(0..=jitter))
|
||||
}
|
||||
|
||||
async fn fresh_writer_count_for_endpoints(
|
||||
async fn fresh_writer_count_for_dc_endpoints(
|
||||
&self,
|
||||
generation: u64,
|
||||
dc: i32,
|
||||
endpoints: &HashSet<SocketAddr>,
|
||||
) -> usize {
|
||||
let ws = self.writers.read().await;
|
||||
ws.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| w.generation == generation)
|
||||
.filter(|w| w.writer_dc == dc)
|
||||
.filter(|w| endpoints.contains(&w.addr))
|
||||
.count()
|
||||
}
|
||||
|
||||
pub(super) async fn active_writer_count_for_endpoints(
|
||||
pub(super) async fn active_writer_count_for_dc_endpoints(
|
||||
&self,
|
||||
dc: i32,
|
||||
endpoints: &HashSet<SocketAddr>,
|
||||
) -> usize {
|
||||
let ws = self.writers.read().await;
|
||||
ws.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| w.writer_dc == dc)
|
||||
.filter(|w| endpoints.contains(&w.addr))
|
||||
.count()
|
||||
}
|
||||
@@ -220,7 +217,7 @@ impl MePool {
|
||||
let required = self.required_writers_for_dc(endpoint_list.len());
|
||||
let mut completed = false;
|
||||
let mut last_fresh_count = self
|
||||
.fresh_writer_count_for_endpoints(generation, endpoints)
|
||||
.fresh_writer_count_for_dc_endpoints(generation, *dc, endpoints)
|
||||
.await;
|
||||
|
||||
for pass_idx in 0..total_passes {
|
||||
@@ -247,10 +244,12 @@ impl MePool {
|
||||
|
||||
let connected = self
|
||||
.connect_endpoints_round_robin_with_generation_contour(
|
||||
*dc,
|
||||
&endpoint_list,
|
||||
rng,
|
||||
generation,
|
||||
WriterContour::Warm,
|
||||
false,
|
||||
)
|
||||
.await;
|
||||
debug!(
|
||||
@@ -265,7 +264,7 @@ impl MePool {
|
||||
}
|
||||
|
||||
last_fresh_count = self
|
||||
.fresh_writer_count_for_endpoints(generation, endpoints)
|
||||
.fresh_writer_count_for_dc_endpoints(generation, *dc, endpoints)
|
||||
.await;
|
||||
if last_fresh_count >= required {
|
||||
completed = true;
|
||||
@@ -377,10 +376,10 @@ impl MePool {
|
||||
}
|
||||
|
||||
let writers = self.writers.read().await;
|
||||
let active_writer_addrs: HashSet<SocketAddr> = writers
|
||||
let active_writer_addrs: HashSet<(i32, SocketAddr)> = writers
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.map(|w| w.addr)
|
||||
.map(|w| (w.writer_dc, w.addr))
|
||||
.collect();
|
||||
let min_ratio = Self::permille_to_ratio(
|
||||
self.me_pool_min_fresh_ratio_permille
|
||||
@@ -410,6 +409,7 @@ impl MePool {
|
||||
.iter()
|
||||
.filter(|w| !w.draining.load(Ordering::Relaxed))
|
||||
.filter(|w| w.generation == generation)
|
||||
.filter(|w| w.writer_dc == *dc)
|
||||
.filter(|w| endpoints.contains(&w.addr))
|
||||
.count();
|
||||
if fresh_count < required {
|
||||
@@ -438,9 +438,9 @@ impl MePool {
|
||||
self.promote_warm_generation_to_active(generation).await;
|
||||
}
|
||||
|
||||
let desired_addrs: HashSet<SocketAddr> = desired_by_dc
|
||||
.values()
|
||||
.flat_map(|set| set.iter().copied())
|
||||
let desired_addrs: HashSet<(i32, SocketAddr)> = desired_by_dc
|
||||
.iter()
|
||||
.flat_map(|(dc, set)| set.iter().copied().map(|addr| (*dc, addr)))
|
||||
.collect();
|
||||
|
||||
let stale_writer_ids: Vec<u64> = writers
|
||||
@@ -450,7 +450,7 @@ impl MePool {
|
||||
if hardswap {
|
||||
w.generation < generation
|
||||
} else {
|
||||
!desired_addrs.contains(&w.addr)
|
||||
!desired_addrs.contains(&(w.writer_dc, w.addr))
|
||||
}
|
||||
})
|
||||
.map(|w| w.id)
|
||||
|
||||
128
src/transport/middle_proxy/pool_runtime_api.rs
Normal file
128
src/transport/middle_proxy/pool_runtime_api.rs
Normal file
@@ -0,0 +1,128 @@
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
|
||||
use super::pool::{MePool, RefillDcKey};
|
||||
use crate::network::IpFamily;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiRefillDcSnapshot {
|
||||
pub dc: i16,
|
||||
pub family: &'static str,
|
||||
pub inflight: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiRefillSnapshot {
|
||||
pub inflight_endpoints_total: usize,
|
||||
pub inflight_dc_total: usize,
|
||||
pub by_dc: Vec<MeApiRefillDcSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiNatReflectionSnapshot {
|
||||
pub addr: std::net::SocketAddr,
|
||||
pub age_secs: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiNatStunSnapshot {
|
||||
pub nat_probe_enabled: bool,
|
||||
pub nat_probe_disabled_runtime: bool,
|
||||
pub nat_probe_attempts: u8,
|
||||
pub configured_servers: Vec<String>,
|
||||
pub live_servers: Vec<String>,
|
||||
pub reflection_v4: Option<MeApiNatReflectionSnapshot>,
|
||||
pub reflection_v6: Option<MeApiNatReflectionSnapshot>,
|
||||
pub stun_backoff_remaining_ms: Option<u64>,
|
||||
}
|
||||
|
||||
impl MePool {
|
||||
pub(crate) async fn api_refill_snapshot(&self) -> MeApiRefillSnapshot {
|
||||
let inflight_endpoints_total = self.refill_inflight.lock().await.len();
|
||||
let inflight_dc_keys = self
|
||||
.refill_inflight_dc
|
||||
.lock()
|
||||
.await
|
||||
.iter()
|
||||
.copied()
|
||||
.collect::<Vec<RefillDcKey>>();
|
||||
|
||||
let mut by_dc_map = HashMap::<(i16, &'static str), usize>::new();
|
||||
for key in inflight_dc_keys {
|
||||
let family = match key.family {
|
||||
IpFamily::V4 => "v4",
|
||||
IpFamily::V6 => "v6",
|
||||
};
|
||||
let dc = key.dc as i16;
|
||||
*by_dc_map.entry((dc, family)).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
let mut by_dc = by_dc_map
|
||||
.into_iter()
|
||||
.map(|((dc, family), inflight)| MeApiRefillDcSnapshot {
|
||||
dc,
|
||||
family,
|
||||
inflight,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
by_dc.sort_by_key(|entry| (entry.dc, entry.family));
|
||||
|
||||
MeApiRefillSnapshot {
|
||||
inflight_endpoints_total,
|
||||
inflight_dc_total: by_dc.len(),
|
||||
by_dc,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn api_nat_stun_snapshot(&self) -> MeApiNatStunSnapshot {
|
||||
let now = Instant::now();
|
||||
let mut configured_servers = if !self.nat_stun_servers.is_empty() {
|
||||
self.nat_stun_servers.clone()
|
||||
} else if let Some(stun) = &self.nat_stun {
|
||||
if stun.trim().is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
vec![stun.clone()]
|
||||
}
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
configured_servers.sort();
|
||||
configured_servers.dedup();
|
||||
|
||||
let mut live_servers = self.nat_stun_live_servers.read().await.clone();
|
||||
live_servers.sort();
|
||||
live_servers.dedup();
|
||||
|
||||
let reflection = self.nat_reflection_cache.lock().await;
|
||||
let reflection_v4 = reflection.v4.map(|(ts, addr)| MeApiNatReflectionSnapshot {
|
||||
addr,
|
||||
age_secs: now.saturating_duration_since(ts).as_secs(),
|
||||
});
|
||||
let reflection_v6 = reflection.v6.map(|(ts, addr)| MeApiNatReflectionSnapshot {
|
||||
addr,
|
||||
age_secs: now.saturating_duration_since(ts).as_secs(),
|
||||
});
|
||||
drop(reflection);
|
||||
|
||||
let backoff_until = *self.stun_backoff_until.read().await;
|
||||
let stun_backoff_remaining_ms = backoff_until.and_then(|until| {
|
||||
(until > now).then_some(until.duration_since(now).as_millis() as u64)
|
||||
});
|
||||
|
||||
MeApiNatStunSnapshot {
|
||||
nat_probe_enabled: self.nat_probe,
|
||||
nat_probe_disabled_runtime: self
|
||||
.nat_probe_disabled
|
||||
.load(std::sync::atomic::Ordering::Relaxed),
|
||||
nat_probe_attempts: self
|
||||
.nat_probe_attempts
|
||||
.load(std::sync::atomic::Ordering::Relaxed),
|
||||
configured_servers,
|
||||
live_servers,
|
||||
reflection_v4,
|
||||
reflection_v6,
|
||||
stun_backoff_remaining_ms,
|
||||
}
|
||||
}
|
||||
}
|
||||
636
src/transport/middle_proxy/pool_status.rs
Normal file
636
src/transport/middle_proxy/pool_status.rs
Normal file
@@ -0,0 +1,636 @@
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Instant;
|
||||
|
||||
use super::pool::{MePool, WriterContour};
|
||||
use crate::config::{MeBindStaleMode, MeFloorMode, MeSocksKdfPolicy};
|
||||
use crate::transport::upstream::IpPreference;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiWriterStatusSnapshot {
|
||||
pub writer_id: u64,
|
||||
pub dc: Option<i16>,
|
||||
pub endpoint: SocketAddr,
|
||||
pub generation: u64,
|
||||
pub state: &'static str,
|
||||
pub draining: bool,
|
||||
pub degraded: bool,
|
||||
pub bound_clients: usize,
|
||||
pub idle_for_secs: Option<u64>,
|
||||
pub rtt_ema_ms: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiDcStatusSnapshot {
|
||||
pub dc: i16,
|
||||
pub endpoints: Vec<SocketAddr>,
|
||||
pub endpoint_writers: Vec<MeApiDcEndpointWriterSnapshot>,
|
||||
pub available_endpoints: usize,
|
||||
pub available_pct: f64,
|
||||
pub required_writers: usize,
|
||||
pub floor_min: usize,
|
||||
pub floor_target: usize,
|
||||
pub floor_max: usize,
|
||||
pub floor_capped: bool,
|
||||
pub alive_writers: usize,
|
||||
pub coverage_pct: f64,
|
||||
pub rtt_ms: Option<f64>,
|
||||
pub load: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiDcEndpointWriterSnapshot {
|
||||
pub endpoint: SocketAddr,
|
||||
pub active_writers: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiStatusSnapshot {
|
||||
pub generated_at_epoch_secs: u64,
|
||||
pub configured_dc_groups: usize,
|
||||
pub configured_endpoints: usize,
|
||||
pub available_endpoints: usize,
|
||||
pub available_pct: f64,
|
||||
pub required_writers: usize,
|
||||
pub alive_writers: usize,
|
||||
pub coverage_pct: f64,
|
||||
pub writers: Vec<MeApiWriterStatusSnapshot>,
|
||||
pub dcs: Vec<MeApiDcStatusSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiQuarantinedEndpointSnapshot {
|
||||
pub endpoint: SocketAddr,
|
||||
pub remaining_ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiDcPathSnapshot {
|
||||
pub dc: i16,
|
||||
pub ip_preference: Option<&'static str>,
|
||||
pub selected_addr_v4: Option<SocketAddr>,
|
||||
pub selected_addr_v6: Option<SocketAddr>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeApiRuntimeSnapshot {
|
||||
pub active_generation: u64,
|
||||
pub warm_generation: u64,
|
||||
pub pending_hardswap_generation: u64,
|
||||
pub pending_hardswap_age_secs: Option<u64>,
|
||||
pub hardswap_enabled: bool,
|
||||
pub floor_mode: &'static str,
|
||||
pub adaptive_floor_idle_secs: u64,
|
||||
pub adaptive_floor_min_writers_single_endpoint: u8,
|
||||
pub adaptive_floor_min_writers_multi_endpoint: u8,
|
||||
pub adaptive_floor_recover_grace_secs: u64,
|
||||
pub adaptive_floor_writers_per_core_total: u16,
|
||||
pub adaptive_floor_cpu_cores_override: u16,
|
||||
pub adaptive_floor_max_extra_writers_single_per_core: u16,
|
||||
pub adaptive_floor_max_extra_writers_multi_per_core: u16,
|
||||
pub adaptive_floor_max_active_writers_per_core: u16,
|
||||
pub adaptive_floor_max_warm_writers_per_core: u16,
|
||||
pub adaptive_floor_max_active_writers_global: u32,
|
||||
pub adaptive_floor_max_warm_writers_global: u32,
|
||||
pub adaptive_floor_cpu_cores_detected: u32,
|
||||
pub adaptive_floor_cpu_cores_effective: u32,
|
||||
pub adaptive_floor_global_cap_raw: u64,
|
||||
pub adaptive_floor_global_cap_effective: u64,
|
||||
pub adaptive_floor_target_writers_total: u64,
|
||||
pub adaptive_floor_active_cap_configured: u64,
|
||||
pub adaptive_floor_active_cap_effective: u64,
|
||||
pub adaptive_floor_warm_cap_configured: u64,
|
||||
pub adaptive_floor_warm_cap_effective: u64,
|
||||
pub adaptive_floor_active_writers_current: u64,
|
||||
pub adaptive_floor_warm_writers_current: u64,
|
||||
pub me_keepalive_enabled: bool,
|
||||
pub me_keepalive_interval_secs: u64,
|
||||
pub me_keepalive_jitter_secs: u64,
|
||||
pub me_keepalive_payload_random: bool,
|
||||
pub rpc_proxy_req_every_secs: u64,
|
||||
pub me_reconnect_max_concurrent_per_dc: u32,
|
||||
pub me_reconnect_backoff_base_ms: u64,
|
||||
pub me_reconnect_backoff_cap_ms: u64,
|
||||
pub me_reconnect_fast_retry_count: u32,
|
||||
pub me_pool_drain_ttl_secs: u64,
|
||||
pub me_pool_force_close_secs: u64,
|
||||
pub me_pool_min_fresh_ratio: f32,
|
||||
pub me_bind_stale_mode: &'static str,
|
||||
pub me_bind_stale_ttl_secs: u64,
|
||||
pub me_single_endpoint_shadow_writers: u8,
|
||||
pub me_single_endpoint_outage_mode_enabled: bool,
|
||||
pub me_single_endpoint_outage_disable_quarantine: bool,
|
||||
pub me_single_endpoint_outage_backoff_min_ms: u64,
|
||||
pub me_single_endpoint_outage_backoff_max_ms: u64,
|
||||
pub me_single_endpoint_shadow_rotate_every_secs: u64,
|
||||
pub me_deterministic_writer_sort: bool,
|
||||
pub me_writer_pick_mode: &'static str,
|
||||
pub me_writer_pick_sample_size: u8,
|
||||
pub me_socks_kdf_policy: &'static str,
|
||||
pub quarantined_endpoints: Vec<MeApiQuarantinedEndpointSnapshot>,
|
||||
pub network_path: Vec<MeApiDcPathSnapshot>,
|
||||
}
|
||||
|
||||
impl MePool {
|
||||
pub(crate) async fn admission_ready_conditional_cast(&self) -> bool {
|
||||
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await.clone();
|
||||
extend_signed_endpoints(&mut endpoints_by_dc, map);
|
||||
}
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await.clone();
|
||||
extend_signed_endpoints(&mut endpoints_by_dc, map);
|
||||
}
|
||||
|
||||
if endpoints_by_dc.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let writers = self.writers.read().await.clone();
|
||||
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
|
||||
for writer in writers {
|
||||
if writer.draining.load(Ordering::Relaxed) {
|
||||
continue;
|
||||
}
|
||||
if let Ok(dc) = i16::try_from(writer.writer_dc) {
|
||||
*live_writers_by_dc.entry(dc).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for dc in endpoints_by_dc.keys() {
|
||||
let alive = live_writers_by_dc.get(dc).copied().unwrap_or(0);
|
||||
if alive == 0 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub(crate) async fn admission_ready_full_floor(&self) -> bool {
|
||||
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await.clone();
|
||||
extend_signed_endpoints(&mut endpoints_by_dc, map);
|
||||
}
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await.clone();
|
||||
extend_signed_endpoints(&mut endpoints_by_dc, map);
|
||||
}
|
||||
|
||||
if endpoints_by_dc.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let writers = self.writers.read().await.clone();
|
||||
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
|
||||
for writer in writers {
|
||||
if writer.draining.load(Ordering::Relaxed) {
|
||||
continue;
|
||||
}
|
||||
if let Ok(dc) = i16::try_from(writer.writer_dc) {
|
||||
*live_writers_by_dc.entry(dc).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (dc, endpoints) in endpoints_by_dc {
|
||||
let endpoint_count = endpoints.len();
|
||||
if endpoint_count == 0 {
|
||||
return false;
|
||||
}
|
||||
let required = self.required_writers_for_dc_with_floor_mode(endpoint_count, false);
|
||||
let alive = live_writers_by_dc.get(&dc).copied().unwrap_or(0);
|
||||
if alive < required {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot {
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
|
||||
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
|
||||
if self.decision.ipv4_me {
|
||||
let map = self.proxy_map_v4.read().await.clone();
|
||||
extend_signed_endpoints(&mut endpoints_by_dc, map);
|
||||
}
|
||||
if self.decision.ipv6_me {
|
||||
let map = self.proxy_map_v6.read().await.clone();
|
||||
extend_signed_endpoints(&mut endpoints_by_dc, map);
|
||||
}
|
||||
|
||||
let configured_dc_groups = endpoints_by_dc.len();
|
||||
let configured_endpoints = endpoints_by_dc.values().map(BTreeSet::len).sum();
|
||||
|
||||
let required_writers = endpoints_by_dc
|
||||
.values()
|
||||
.map(|endpoints| self.required_writers_for_dc_with_floor_mode(endpoints.len(), false))
|
||||
.sum();
|
||||
|
||||
let idle_since = self.registry.writer_idle_since_snapshot().await;
|
||||
let activity = self.registry.writer_activity_snapshot().await;
|
||||
let rtt = self.rtt_stats.lock().await.clone();
|
||||
let writers = self.writers.read().await.clone();
|
||||
|
||||
let mut live_writers_by_dc_endpoint = HashMap::<(i16, SocketAddr), usize>::new();
|
||||
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
|
||||
let mut dc_rtt_agg = HashMap::<i16, (f64, u64)>::new();
|
||||
let mut writer_rows = Vec::<MeApiWriterStatusSnapshot>::with_capacity(writers.len());
|
||||
|
||||
for writer in writers {
|
||||
let endpoint = writer.addr;
|
||||
let dc = i16::try_from(writer.writer_dc).ok();
|
||||
let draining = writer.draining.load(Ordering::Relaxed);
|
||||
let degraded = writer.degraded.load(Ordering::Relaxed);
|
||||
let bound_clients = activity
|
||||
.bound_clients_by_writer
|
||||
.get(&writer.id)
|
||||
.copied()
|
||||
.unwrap_or(0);
|
||||
let idle_for_secs = idle_since
|
||||
.get(&writer.id)
|
||||
.map(|idle_ts| now_epoch_secs.saturating_sub(*idle_ts));
|
||||
let rtt_ema_ms = rtt.get(&writer.id).map(|(_, ema)| *ema);
|
||||
let state = match WriterContour::from_u8(writer.contour.load(Ordering::Relaxed)) {
|
||||
WriterContour::Warm => "warm",
|
||||
WriterContour::Active => "active",
|
||||
WriterContour::Draining => "draining",
|
||||
};
|
||||
|
||||
if !draining {
|
||||
if let Some(dc_idx) = dc {
|
||||
*live_writers_by_dc_endpoint
|
||||
.entry((dc_idx, endpoint))
|
||||
.or_insert(0) += 1;
|
||||
*live_writers_by_dc.entry(dc_idx).or_insert(0) += 1;
|
||||
if let Some(ema_ms) = rtt_ema_ms {
|
||||
let entry = dc_rtt_agg.entry(dc_idx).or_insert((0.0, 0));
|
||||
entry.0 += ema_ms;
|
||||
entry.1 += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer_rows.push(MeApiWriterStatusSnapshot {
|
||||
writer_id: writer.id,
|
||||
dc,
|
||||
endpoint,
|
||||
generation: writer.generation,
|
||||
state,
|
||||
draining,
|
||||
degraded,
|
||||
bound_clients,
|
||||
idle_for_secs,
|
||||
rtt_ema_ms,
|
||||
});
|
||||
}
|
||||
|
||||
writer_rows.sort_by_key(|row| (row.dc.unwrap_or(i16::MAX), row.endpoint, row.writer_id));
|
||||
|
||||
let mut dcs = Vec::<MeApiDcStatusSnapshot>::with_capacity(endpoints_by_dc.len());
|
||||
let mut available_endpoints = 0usize;
|
||||
let mut alive_writers = 0usize;
|
||||
let floor_mode = self.floor_mode();
|
||||
let adaptive_cpu_cores = (self
|
||||
.me_adaptive_floor_cpu_cores_effective
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1);
|
||||
for (dc, endpoints) in endpoints_by_dc {
|
||||
let endpoint_count = endpoints.len();
|
||||
let dc_available_endpoints = endpoints
|
||||
.iter()
|
||||
.filter(|endpoint| live_writers_by_dc_endpoint.contains_key(&(dc, **endpoint)))
|
||||
.count();
|
||||
let base_required = self.required_writers_for_dc(endpoint_count);
|
||||
let dc_required_writers =
|
||||
self.required_writers_for_dc_with_floor_mode(endpoint_count, false);
|
||||
let floor_min = if endpoint_count <= 1 {
|
||||
(self
|
||||
.me_adaptive_floor_min_writers_single_endpoint
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
.min(base_required.max(1))
|
||||
} else {
|
||||
(self
|
||||
.me_adaptive_floor_min_writers_multi_endpoint
|
||||
.load(Ordering::Relaxed) as usize)
|
||||
.max(1)
|
||||
.min(base_required.max(1))
|
||||
};
|
||||
let extra_per_core = if endpoint_count <= 1 {
|
||||
self.me_adaptive_floor_max_extra_writers_single_per_core
|
||||
.load(Ordering::Relaxed) as usize
|
||||
} else {
|
||||
self.me_adaptive_floor_max_extra_writers_multi_per_core
|
||||
.load(Ordering::Relaxed) as usize
|
||||
};
|
||||
let floor_max = base_required.saturating_add(adaptive_cpu_cores.saturating_mul(extra_per_core));
|
||||
let floor_capped = matches!(floor_mode, MeFloorMode::Adaptive)
|
||||
&& dc_required_writers < base_required;
|
||||
let dc_alive_writers = live_writers_by_dc.get(&dc).copied().unwrap_or(0);
|
||||
let dc_load = activity
|
||||
.active_sessions_by_target_dc
|
||||
.get(&dc)
|
||||
.copied()
|
||||
.unwrap_or(0);
|
||||
let dc_rtt_ms = dc_rtt_agg
|
||||
.get(&dc)
|
||||
.and_then(|(sum, count)| (*count > 0).then_some(*sum / (*count as f64)));
|
||||
|
||||
available_endpoints += dc_available_endpoints;
|
||||
alive_writers += dc_alive_writers;
|
||||
|
||||
dcs.push(MeApiDcStatusSnapshot {
|
||||
dc,
|
||||
endpoint_writers: endpoints
|
||||
.iter()
|
||||
.map(|endpoint| MeApiDcEndpointWriterSnapshot {
|
||||
endpoint: *endpoint,
|
||||
active_writers: live_writers_by_dc_endpoint
|
||||
.get(&(dc, *endpoint))
|
||||
.copied()
|
||||
.unwrap_or(0),
|
||||
})
|
||||
.collect(),
|
||||
endpoints: endpoints.into_iter().collect(),
|
||||
available_endpoints: dc_available_endpoints,
|
||||
available_pct: ratio_pct(dc_available_endpoints, endpoint_count),
|
||||
required_writers: dc_required_writers,
|
||||
floor_min,
|
||||
floor_target: dc_required_writers,
|
||||
floor_max,
|
||||
floor_capped,
|
||||
alive_writers: dc_alive_writers,
|
||||
coverage_pct: ratio_pct(dc_alive_writers, dc_required_writers),
|
||||
rtt_ms: dc_rtt_ms,
|
||||
load: dc_load,
|
||||
});
|
||||
}
|
||||
|
||||
MeApiStatusSnapshot {
|
||||
generated_at_epoch_secs: now_epoch_secs,
|
||||
configured_dc_groups,
|
||||
configured_endpoints,
|
||||
available_endpoints,
|
||||
available_pct: ratio_pct(available_endpoints, configured_endpoints),
|
||||
required_writers,
|
||||
alive_writers,
|
||||
coverage_pct: ratio_pct(alive_writers, required_writers),
|
||||
writers: writer_rows,
|
||||
dcs,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn api_runtime_snapshot(&self) -> MeApiRuntimeSnapshot {
|
||||
let now = Instant::now();
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
let pending_started_at = self
|
||||
.pending_hardswap_started_at_epoch_secs
|
||||
.load(Ordering::Relaxed);
|
||||
let pending_hardswap_age_secs = (pending_started_at > 0)
|
||||
.then_some(now_epoch_secs.saturating_sub(pending_started_at));
|
||||
|
||||
let mut quarantined_endpoints = Vec::<MeApiQuarantinedEndpointSnapshot>::new();
|
||||
{
|
||||
let guard = self.endpoint_quarantine.lock().await;
|
||||
for (endpoint, expires_at) in guard.iter() {
|
||||
if *expires_at <= now {
|
||||
continue;
|
||||
}
|
||||
let remaining_ms = expires_at.duration_since(now).as_millis() as u64;
|
||||
quarantined_endpoints.push(MeApiQuarantinedEndpointSnapshot {
|
||||
endpoint: *endpoint,
|
||||
remaining_ms,
|
||||
});
|
||||
}
|
||||
}
|
||||
quarantined_endpoints.sort_by_key(|entry| entry.endpoint);
|
||||
|
||||
let mut network_path = Vec::<MeApiDcPathSnapshot>::new();
|
||||
if let Some(upstream) = &self.upstream {
|
||||
for dc in 1..=5 {
|
||||
let dc_idx = dc as i16;
|
||||
let ip_preference = upstream
|
||||
.get_dc_ip_preference(dc_idx)
|
||||
.await
|
||||
.map(ip_preference_label);
|
||||
let selected_addr_v4 = upstream.get_dc_addr(dc_idx, false).await;
|
||||
let selected_addr_v6 = upstream.get_dc_addr(dc_idx, true).await;
|
||||
network_path.push(MeApiDcPathSnapshot {
|
||||
dc: dc_idx,
|
||||
ip_preference,
|
||||
selected_addr_v4,
|
||||
selected_addr_v6,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
MeApiRuntimeSnapshot {
|
||||
active_generation: self.active_generation.load(Ordering::Relaxed),
|
||||
warm_generation: self.warm_generation.load(Ordering::Relaxed),
|
||||
pending_hardswap_generation: self.pending_hardswap_generation.load(Ordering::Relaxed),
|
||||
pending_hardswap_age_secs,
|
||||
hardswap_enabled: self.hardswap.load(Ordering::Relaxed),
|
||||
floor_mode: floor_mode_label(self.floor_mode()),
|
||||
adaptive_floor_idle_secs: self.me_adaptive_floor_idle_secs.load(Ordering::Relaxed),
|
||||
adaptive_floor_min_writers_single_endpoint: self
|
||||
.me_adaptive_floor_min_writers_single_endpoint
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_min_writers_multi_endpoint: self
|
||||
.me_adaptive_floor_min_writers_multi_endpoint
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_recover_grace_secs: self
|
||||
.me_adaptive_floor_recover_grace_secs
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_writers_per_core_total: self
|
||||
.me_adaptive_floor_writers_per_core_total
|
||||
.load(Ordering::Relaxed) as u16,
|
||||
adaptive_floor_cpu_cores_override: self
|
||||
.me_adaptive_floor_cpu_cores_override
|
||||
.load(Ordering::Relaxed) as u16,
|
||||
adaptive_floor_max_extra_writers_single_per_core: self
|
||||
.me_adaptive_floor_max_extra_writers_single_per_core
|
||||
.load(Ordering::Relaxed) as u16,
|
||||
adaptive_floor_max_extra_writers_multi_per_core: self
|
||||
.me_adaptive_floor_max_extra_writers_multi_per_core
|
||||
.load(Ordering::Relaxed) as u16,
|
||||
adaptive_floor_max_active_writers_per_core: self
|
||||
.me_adaptive_floor_max_active_writers_per_core
|
||||
.load(Ordering::Relaxed) as u16,
|
||||
adaptive_floor_max_warm_writers_per_core: self
|
||||
.me_adaptive_floor_max_warm_writers_per_core
|
||||
.load(Ordering::Relaxed) as u16,
|
||||
adaptive_floor_max_active_writers_global: self
|
||||
.me_adaptive_floor_max_active_writers_global
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_max_warm_writers_global: self
|
||||
.me_adaptive_floor_max_warm_writers_global
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_cpu_cores_detected: self
|
||||
.me_adaptive_floor_cpu_cores_detected
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_cpu_cores_effective: self
|
||||
.me_adaptive_floor_cpu_cores_effective
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_global_cap_raw: self
|
||||
.me_adaptive_floor_global_cap_raw
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_global_cap_effective: self
|
||||
.me_adaptive_floor_global_cap_effective
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_target_writers_total: self
|
||||
.me_adaptive_floor_target_writers_total
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_active_cap_configured: self
|
||||
.me_adaptive_floor_active_cap_configured
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_active_cap_effective: self
|
||||
.me_adaptive_floor_active_cap_effective
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_warm_cap_configured: self
|
||||
.me_adaptive_floor_warm_cap_configured
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_warm_cap_effective: self
|
||||
.me_adaptive_floor_warm_cap_effective
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_active_writers_current: self
|
||||
.me_adaptive_floor_active_writers_current
|
||||
.load(Ordering::Relaxed),
|
||||
adaptive_floor_warm_writers_current: self
|
||||
.me_adaptive_floor_warm_writers_current
|
||||
.load(Ordering::Relaxed),
|
||||
me_keepalive_enabled: self.me_keepalive_enabled,
|
||||
me_keepalive_interval_secs: self.me_keepalive_interval.as_secs(),
|
||||
me_keepalive_jitter_secs: self.me_keepalive_jitter.as_secs(),
|
||||
me_keepalive_payload_random: self.me_keepalive_payload_random,
|
||||
rpc_proxy_req_every_secs: self.rpc_proxy_req_every_secs.load(Ordering::Relaxed),
|
||||
me_reconnect_max_concurrent_per_dc: self.me_reconnect_max_concurrent_per_dc,
|
||||
me_reconnect_backoff_base_ms: self.me_reconnect_backoff_base.as_millis() as u64,
|
||||
me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64,
|
||||
me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count,
|
||||
me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed),
|
||||
me_pool_force_close_secs: self.me_pool_force_close_secs.load(Ordering::Relaxed),
|
||||
me_pool_min_fresh_ratio: Self::permille_to_ratio(
|
||||
self.me_pool_min_fresh_ratio_permille.load(Ordering::Relaxed),
|
||||
),
|
||||
me_bind_stale_mode: bind_stale_mode_label(self.bind_stale_mode()),
|
||||
me_bind_stale_ttl_secs: self.me_bind_stale_ttl_secs.load(Ordering::Relaxed),
|
||||
me_single_endpoint_shadow_writers: self
|
||||
.me_single_endpoint_shadow_writers
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_outage_mode_enabled: self
|
||||
.me_single_endpoint_outage_mode_enabled
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_outage_disable_quarantine: self
|
||||
.me_single_endpoint_outage_disable_quarantine
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_outage_backoff_min_ms: self
|
||||
.me_single_endpoint_outage_backoff_min_ms
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_outage_backoff_max_ms: self
|
||||
.me_single_endpoint_outage_backoff_max_ms
|
||||
.load(Ordering::Relaxed),
|
||||
me_single_endpoint_shadow_rotate_every_secs: self
|
||||
.me_single_endpoint_shadow_rotate_every_secs
|
||||
.load(Ordering::Relaxed),
|
||||
me_deterministic_writer_sort: self
|
||||
.me_deterministic_writer_sort
|
||||
.load(Ordering::Relaxed),
|
||||
me_writer_pick_mode: writer_pick_mode_label(self.writer_pick_mode()),
|
||||
me_writer_pick_sample_size: self.writer_pick_sample_size() as u8,
|
||||
me_socks_kdf_policy: socks_kdf_policy_label(self.socks_kdf_policy()),
|
||||
quarantined_endpoints,
|
||||
network_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ratio_pct(part: usize, total: usize) -> f64 {
|
||||
if total == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
let pct = ((part as f64) / (total as f64)) * 100.0;
|
||||
pct.clamp(0.0, 100.0)
|
||||
}
|
||||
|
||||
fn extend_signed_endpoints(
|
||||
endpoints_by_dc: &mut BTreeMap<i16, BTreeSet<SocketAddr>>,
|
||||
map: HashMap<i32, Vec<(IpAddr, u16)>>,
|
||||
) {
|
||||
for (dc, addrs) in map {
|
||||
if dc == 0 {
|
||||
continue;
|
||||
}
|
||||
let Ok(dc_idx) = i16::try_from(dc) else {
|
||||
continue;
|
||||
};
|
||||
let entry = endpoints_by_dc.entry(dc_idx).or_default();
|
||||
for (ip, port) in addrs {
|
||||
entry.insert(SocketAddr::new(ip, port));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn floor_mode_label(mode: MeFloorMode) -> &'static str {
|
||||
match mode {
|
||||
MeFloorMode::Static => "static",
|
||||
MeFloorMode::Adaptive => "adaptive",
|
||||
}
|
||||
}
|
||||
|
||||
fn bind_stale_mode_label(mode: MeBindStaleMode) -> &'static str {
|
||||
match mode {
|
||||
MeBindStaleMode::Never => "never",
|
||||
MeBindStaleMode::Ttl => "ttl",
|
||||
MeBindStaleMode::Always => "always",
|
||||
}
|
||||
}
|
||||
|
||||
fn writer_pick_mode_label(mode: crate::config::MeWriterPickMode) -> &'static str {
|
||||
match mode {
|
||||
crate::config::MeWriterPickMode::SortedRr => "sorted_rr",
|
||||
crate::config::MeWriterPickMode::P2c => "p2c",
|
||||
}
|
||||
}
|
||||
|
||||
fn socks_kdf_policy_label(policy: MeSocksKdfPolicy) -> &'static str {
|
||||
match policy {
|
||||
MeSocksKdfPolicy::Strict => "strict",
|
||||
MeSocksKdfPolicy::Compat => "compat",
|
||||
}
|
||||
}
|
||||
|
||||
fn ip_preference_label(preference: IpPreference) -> &'static str {
|
||||
match preference {
|
||||
IpPreference::Unknown => "unknown",
|
||||
IpPreference::PreferV6 => "prefer_v6",
|
||||
IpPreference::PreferV4 => "prefer_v4",
|
||||
IpPreference::BothWork => "both",
|
||||
IpPreference::Unavailable => "unavailable",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ratio_pct;
|
||||
|
||||
#[test]
|
||||
fn ratio_pct_is_zero_when_denominator_is_zero() {
|
||||
assert_eq!(ratio_pct(1, 0), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ratio_pct_is_capped_at_100() {
|
||||
assert_eq!(ratio_pct(7, 3), 100.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ratio_pct_reports_expected_value() {
|
||||
assert_eq!(ratio_pct(1, 4), 25.0);
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,10 @@
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use bytes::Bytes;
|
||||
use bytes::BytesMut;
|
||||
use rand::Rng;
|
||||
use tokio::sync::mpsc;
|
||||
@@ -49,12 +50,18 @@ impl MePool {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn connect_one(self: &Arc<Self>, addr: SocketAddr, rng: &SecureRandom) -> Result<()> {
|
||||
pub(crate) async fn connect_one_for_dc(
|
||||
self: &Arc<Self>,
|
||||
addr: SocketAddr,
|
||||
writer_dc: i32,
|
||||
rng: &SecureRandom,
|
||||
) -> Result<()> {
|
||||
self.connect_one_with_generation_contour(
|
||||
addr,
|
||||
rng,
|
||||
self.current_generation(),
|
||||
WriterContour::Active,
|
||||
writer_dc,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -65,23 +72,68 @@ impl MePool {
|
||||
rng: &SecureRandom,
|
||||
generation: u64,
|
||||
contour: WriterContour,
|
||||
writer_dc: i32,
|
||||
) -> Result<()> {
|
||||
self.connect_one_with_generation_contour_for_dc(addr, rng, generation, contour, writer_dc)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(super) async fn connect_one_with_generation_contour_for_dc(
|
||||
self: &Arc<Self>,
|
||||
addr: SocketAddr,
|
||||
rng: &SecureRandom,
|
||||
generation: u64,
|
||||
contour: WriterContour,
|
||||
writer_dc: i32,
|
||||
) -> Result<()> {
|
||||
self.connect_one_with_generation_contour_for_dc_with_cap_policy(
|
||||
addr,
|
||||
rng,
|
||||
generation,
|
||||
contour,
|
||||
writer_dc,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(super) async fn connect_one_with_generation_contour_for_dc_with_cap_policy(
|
||||
self: &Arc<Self>,
|
||||
addr: SocketAddr,
|
||||
rng: &SecureRandom,
|
||||
generation: u64,
|
||||
contour: WriterContour,
|
||||
writer_dc: i32,
|
||||
allow_coverage_override: bool,
|
||||
) -> Result<()> {
|
||||
if !self
|
||||
.can_open_writer_for_contour(contour, allow_coverage_override)
|
||||
.await
|
||||
{
|
||||
return Err(ProxyError::Proxy(format!(
|
||||
"ME {contour:?} writer cap reached"
|
||||
)));
|
||||
}
|
||||
|
||||
let secret_len = self.proxy_secret.read().await.secret.len();
|
||||
if secret_len < 32 {
|
||||
return Err(ProxyError::Proxy("proxy-secret too short for ME auth".into()));
|
||||
}
|
||||
|
||||
let (stream, _connect_ms, upstream_egress) = self.connect_tcp(addr).await?;
|
||||
let dc_idx = i16::try_from(writer_dc).ok();
|
||||
let (stream, _connect_ms, upstream_egress) = self.connect_tcp(addr, dc_idx).await?;
|
||||
let hs = self.handshake_only(stream, addr, upstream_egress, rng).await?;
|
||||
|
||||
let writer_id = self.next_writer_id.fetch_add(1, Ordering::Relaxed);
|
||||
let contour = Arc::new(AtomicU8::new(contour.as_u8()));
|
||||
let cancel = CancellationToken::new();
|
||||
let degraded = Arc::new(AtomicBool::new(false));
|
||||
let rtt_ema_ms_x10 = Arc::new(AtomicU32::new(0));
|
||||
let draining = Arc::new(AtomicBool::new(false));
|
||||
let draining_started_at_epoch_secs = Arc::new(AtomicU64::new(0));
|
||||
let drain_deadline_epoch_secs = Arc::new(AtomicU64::new(0));
|
||||
let allow_drain_fallback = Arc::new(AtomicBool::new(false));
|
||||
let (tx, mut rx) = mpsc::channel::<WriterCommand>(4096);
|
||||
let (tx, mut rx) = mpsc::channel::<WriterCommand>(self.writer_cmd_channel_capacity);
|
||||
let mut rpc_writer = RpcWriter {
|
||||
writer: hs.wr,
|
||||
key: hs.write_key,
|
||||
@@ -111,14 +163,17 @@ impl MePool {
|
||||
let writer = MeWriter {
|
||||
id: writer_id,
|
||||
addr,
|
||||
writer_dc,
|
||||
generation,
|
||||
contour: contour.clone(),
|
||||
created_at: Instant::now(),
|
||||
tx: tx.clone(),
|
||||
cancel: cancel.clone(),
|
||||
degraded: degraded.clone(),
|
||||
rtt_ema_ms_x10: rtt_ema_ms_x10.clone(),
|
||||
draining: draining.clone(),
|
||||
draining_started_at_epoch_secs: draining_started_at_epoch_secs.clone(),
|
||||
drain_deadline_epoch_secs: drain_deadline_epoch_secs.clone(),
|
||||
allow_drain_fallback: allow_drain_fallback.clone(),
|
||||
};
|
||||
self.writers.write().await.push(writer.clone());
|
||||
@@ -153,6 +208,7 @@ impl MePool {
|
||||
let keepalive_jitter_signal = self.me_keepalive_jitter;
|
||||
let cancel_reader_token = cancel.clone();
|
||||
let cancel_ping_token = cancel_ping.clone();
|
||||
let reader_route_data_wait_ms = self.me_reader_route_data_wait_ms.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let res = reader_loop(
|
||||
@@ -169,6 +225,8 @@ impl MePool {
|
||||
stats_reader,
|
||||
writer_id,
|
||||
degraded.clone(),
|
||||
rtt_ema_ms_x10.clone(),
|
||||
reader_route_data_wait_ms,
|
||||
cancel_reader_token.clone(),
|
||||
)
|
||||
.await;
|
||||
@@ -254,17 +312,47 @@ impl MePool {
|
||||
p.extend_from_slice(&sent_id.to_le_bytes());
|
||||
{
|
||||
let mut tracker = ping_tracker_ping.lock().await;
|
||||
let before = tracker.len();
|
||||
tracker.retain(|_, (ts, _)| ts.elapsed() < Duration::from_secs(120));
|
||||
let expired = before.saturating_sub(tracker.len());
|
||||
if expired > 0 {
|
||||
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
|
||||
let now_epoch_ms = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64;
|
||||
let mut run_cleanup = false;
|
||||
if let Some(pool) = pool_ping.upgrade() {
|
||||
let last_cleanup_ms = pool
|
||||
.ping_tracker_last_cleanup_epoch_ms
|
||||
.load(Ordering::Relaxed);
|
||||
if now_epoch_ms.saturating_sub(last_cleanup_ms) >= 30_000
|
||||
&& pool
|
||||
.ping_tracker_last_cleanup_epoch_ms
|
||||
.compare_exchange(
|
||||
last_cleanup_ms,
|
||||
now_epoch_ms,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Relaxed,
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
run_cleanup = true;
|
||||
}
|
||||
}
|
||||
|
||||
if run_cleanup {
|
||||
let before = tracker.len();
|
||||
tracker.retain(|_, (ts, _)| ts.elapsed() < Duration::from_secs(120));
|
||||
let expired = before.saturating_sub(tracker.len());
|
||||
if expired > 0 {
|
||||
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
|
||||
}
|
||||
}
|
||||
tracker.insert(sent_id, (std::time::Instant::now(), writer_id));
|
||||
}
|
||||
ping_id = ping_id.wrapping_add(1);
|
||||
stats_ping.increment_me_keepalive_sent();
|
||||
if tx_ping.send(WriterCommand::DataAndFlush(p)).await.is_err() {
|
||||
if tx_ping
|
||||
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
stats_ping.increment_me_keepalive_failed();
|
||||
debug!("ME ping failed, removing dead writer");
|
||||
cancel_ping.cancel();
|
||||
@@ -338,7 +426,11 @@ impl MePool {
|
||||
meta.proto_flags,
|
||||
);
|
||||
|
||||
if tx_signal.send(WriterCommand::DataAndFlush(payload)).await.is_err() {
|
||||
if tx_signal
|
||||
.send(WriterCommand::DataAndFlush(payload))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
|
||||
let _ = pool.registry.unregister(conn_id).await;
|
||||
cancel_signal.cancel();
|
||||
@@ -369,7 +461,7 @@ impl MePool {
|
||||
close_payload.extend_from_slice(&conn_id.to_le_bytes());
|
||||
|
||||
if tx_signal
|
||||
.send(WriterCommand::DataAndFlush(close_payload))
|
||||
.send(WriterCommand::DataAndFlush(Bytes::from(close_payload)))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
@@ -404,6 +496,7 @@ impl MePool {
|
||||
async fn remove_writer_only(self: &Arc<Self>, writer_id: u64) -> Vec<BoundConn> {
|
||||
let mut close_tx: Option<mpsc::Sender<WriterCommand>> = None;
|
||||
let mut removed_addr: Option<SocketAddr> = None;
|
||||
let mut removed_dc: Option<i32> = None;
|
||||
let mut removed_uptime: Option<Duration> = None;
|
||||
let mut trigger_refill = false;
|
||||
{
|
||||
@@ -417,6 +510,7 @@ impl MePool {
|
||||
self.stats.increment_me_writer_removed_total();
|
||||
w.cancel.cancel();
|
||||
removed_addr = Some(w.addr);
|
||||
removed_dc = Some(w.writer_dc);
|
||||
removed_uptime = Some(w.created_at.elapsed());
|
||||
trigger_refill = !was_draining;
|
||||
if trigger_refill {
|
||||
@@ -431,11 +525,12 @@ impl MePool {
|
||||
}
|
||||
if trigger_refill
|
||||
&& let Some(addr) = removed_addr
|
||||
&& let Some(writer_dc) = removed_dc
|
||||
{
|
||||
if let Some(uptime) = removed_uptime {
|
||||
self.maybe_quarantine_flapping_endpoint(addr, uptime).await;
|
||||
}
|
||||
self.trigger_immediate_refill(addr);
|
||||
self.trigger_immediate_refill_for_dc(addr, writer_dc);
|
||||
}
|
||||
self.rtt_stats.lock().await.remove(&writer_id);
|
||||
self.registry.writer_lost(writer_id).await
|
||||
@@ -454,8 +549,14 @@ impl MePool {
|
||||
let already_draining = w.draining.swap(true, Ordering::Relaxed);
|
||||
w.allow_drain_fallback
|
||||
.store(allow_drain_fallback, Ordering::Relaxed);
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
w.draining_started_at_epoch_secs
|
||||
.store(Self::now_epoch_secs(), Ordering::Relaxed);
|
||||
.store(now_epoch_secs, Ordering::Relaxed);
|
||||
let drain_deadline_epoch_secs = timeout
|
||||
.map(|duration| now_epoch_secs.saturating_add(duration.as_secs()))
|
||||
.unwrap_or(0);
|
||||
w.drain_deadline_epoch_secs
|
||||
.store(drain_deadline_epoch_secs, Ordering::Relaxed);
|
||||
if !already_draining {
|
||||
self.stats.increment_pool_drain_active();
|
||||
}
|
||||
@@ -479,26 +580,6 @@ impl MePool {
|
||||
allow_drain_fallback,
|
||||
"ME writer marked draining"
|
||||
);
|
||||
|
||||
let pool = Arc::downgrade(self);
|
||||
tokio::spawn(async move {
|
||||
let deadline = timeout.map(|t| Instant::now() + t);
|
||||
while let Some(p) = pool.upgrade() {
|
||||
if let Some(deadline_at) = deadline
|
||||
&& Instant::now() >= deadline_at
|
||||
{
|
||||
warn!(writer_id, "Drain timeout, force-closing");
|
||||
p.stats.increment_pool_force_close_total();
|
||||
let _ = p.remove_writer_and_close_clients(writer_id).await;
|
||||
break;
|
||||
}
|
||||
if p.registry.is_writer_empty(writer_id).await {
|
||||
let _ = p.remove_writer_only(writer_id).await;
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
pub(crate) async fn mark_writer_draining(self: &Arc<Self>, writer_id: u64) {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io::ErrorKind;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
|
||||
use std::time::Instant;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
@@ -34,6 +34,8 @@ pub(crate) async fn reader_loop(
|
||||
stats: Arc<Stats>,
|
||||
_writer_id: u64,
|
||||
degraded: Arc<AtomicBool>,
|
||||
writer_rtt_ema_ms_x10: Arc<AtomicU32>,
|
||||
reader_route_data_wait_ms: Arc<AtomicU64>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<()> {
|
||||
let mut raw = enc_leftover;
|
||||
@@ -56,17 +58,14 @@ pub(crate) async fn reader_loop(
|
||||
|
||||
let blocks = raw.len() / 16 * 16;
|
||||
if blocks > 0 {
|
||||
let mut chunk = raw.split_to(blocks);
|
||||
let mut new_iv = [0u8; 16];
|
||||
new_iv.copy_from_slice(&raw[blocks - 16..blocks]);
|
||||
|
||||
let mut chunk = vec![0u8; blocks];
|
||||
chunk.copy_from_slice(&raw[..blocks]);
|
||||
new_iv.copy_from_slice(&chunk[blocks - 16..blocks]);
|
||||
AesCbc::new(dk, div)
|
||||
.decrypt_in_place(&mut chunk)
|
||||
.decrypt_in_place(&mut chunk[..])
|
||||
.map_err(|e| ProxyError::Crypto(format!("{e}")))?;
|
||||
div = new_iv;
|
||||
dec.extend_from_slice(&chunk);
|
||||
let _ = raw.split_to(blocks);
|
||||
}
|
||||
|
||||
while dec.len() >= 12 {
|
||||
@@ -84,7 +83,7 @@ pub(crate) async fn reader_loop(
|
||||
break;
|
||||
}
|
||||
|
||||
let frame = dec.split_to(fl);
|
||||
let frame = dec.split_to(fl).freeze();
|
||||
let pe = fl - 4;
|
||||
let ec = u32::from_le_bytes(frame[pe..pe + 4].try_into().unwrap());
|
||||
let actual_crc = rpc_crc(crc_mode, &frame[..pe]);
|
||||
@@ -110,21 +109,27 @@ pub(crate) async fn reader_loop(
|
||||
}
|
||||
expected_seq = expected_seq.wrapping_add(1);
|
||||
|
||||
let payload = &frame[8..pe];
|
||||
let payload = frame.slice(8..pe);
|
||||
if payload.len() < 4 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let pt = u32::from_le_bytes(payload[0..4].try_into().unwrap());
|
||||
let body = &payload[4..];
|
||||
let body = payload.slice(4..);
|
||||
|
||||
if pt == RPC_PROXY_ANS_U32 && body.len() >= 12 {
|
||||
let flags = u32::from_le_bytes(body[0..4].try_into().unwrap());
|
||||
let cid = u64::from_le_bytes(body[4..12].try_into().unwrap());
|
||||
let data = Bytes::copy_from_slice(&body[12..]);
|
||||
let data = body.slice(12..);
|
||||
trace!(cid, flags, len = data.len(), "RPC_PROXY_ANS");
|
||||
|
||||
let routed = reg.route(cid, MeResponse::Data { flags, data }).await;
|
||||
let data_wait_ms = reader_route_data_wait_ms.load(Ordering::Relaxed);
|
||||
let routed = if data_wait_ms == 0 {
|
||||
reg.route_nowait(cid, MeResponse::Data { flags, data }).await
|
||||
} else {
|
||||
reg.route_with_timeout(cid, MeResponse::Data { flags, data }, data_wait_ms)
|
||||
.await
|
||||
};
|
||||
if !matches!(routed, RouteResult::Routed) {
|
||||
match routed {
|
||||
RouteResult::NoConn => stats.increment_me_route_drop_no_conn(),
|
||||
@@ -147,7 +152,7 @@ pub(crate) async fn reader_loop(
|
||||
let cfm = u32::from_le_bytes(body[8..12].try_into().unwrap());
|
||||
trace!(cid, cfm, "RPC_SIMPLE_ACK");
|
||||
|
||||
let routed = reg.route(cid, MeResponse::Ack(cfm)).await;
|
||||
let routed = reg.route_nowait(cid, MeResponse::Ack(cfm)).await;
|
||||
if !matches!(routed, RouteResult::Routed) {
|
||||
match routed {
|
||||
RouteResult::NoConn => stats.increment_me_route_drop_no_conn(),
|
||||
@@ -181,7 +186,11 @@ pub(crate) async fn reader_loop(
|
||||
let mut pong = Vec::with_capacity(12);
|
||||
pong.extend_from_slice(&RPC_PONG_U32.to_le_bytes());
|
||||
pong.extend_from_slice(&ping_id.to_le_bytes());
|
||||
if tx.send(WriterCommand::DataAndFlush(pong)).await.is_err() {
|
||||
if tx
|
||||
.send(WriterCommand::DataAndFlush(Bytes::from(pong)))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
warn!("PONG send failed");
|
||||
break;
|
||||
}
|
||||
@@ -204,6 +213,8 @@ pub(crate) async fn reader_loop(
|
||||
}
|
||||
let degraded_now = entry.1 > entry.0 * 2.0;
|
||||
degraded.store(degraded_now, Ordering::Relaxed);
|
||||
writer_rtt_ema_ms_x10
|
||||
.store((entry.1 * 10.0).clamp(0.0, u32::MAX as f64) as u32, Ordering::Relaxed);
|
||||
trace!(writer_id = wid, rtt_ms = rtt, ema_ms = entry.1, base_ms = entry.0, degraded = degraded_now, "ME RTT sample");
|
||||
}
|
||||
} else {
|
||||
@@ -222,5 +233,5 @@ async fn send_close_conn(tx: &mpsc::Sender<WriterCommand>, conn_id: u64) {
|
||||
p.extend_from_slice(&RPC_CLOSE_CONN_U32.to_le_bytes());
|
||||
p.extend_from_slice(&conn_id.to_le_bytes());
|
||||
|
||||
let _ = tx.send(WriterCommand::DataAndFlush(p)).await;
|
||||
let _ = tx.send(WriterCommand::DataAndFlush(Bytes::from(p))).await;
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ use tokio::sync::mpsc::error::TrySendError;
|
||||
use super::codec::WriterCommand;
|
||||
use super::MeResponse;
|
||||
|
||||
const ROUTE_CHANNEL_CAPACITY: usize = 4096;
|
||||
const ROUTE_BACKPRESSURE_BASE_TIMEOUT_MS: u64 = 25;
|
||||
const ROUTE_BACKPRESSURE_HIGH_TIMEOUT_MS: u64 = 120;
|
||||
const ROUTE_BACKPRESSURE_HIGH_WATERMARK_PCT: u8 = 80;
|
||||
@@ -45,6 +44,12 @@ pub struct ConnWriter {
|
||||
pub tx: mpsc::Sender<WriterCommand>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(super) struct WriterActivitySnapshot {
|
||||
pub bound_clients_by_writer: HashMap<u64, usize>,
|
||||
pub active_sessions_by_target_dc: HashMap<i16, usize>,
|
||||
}
|
||||
|
||||
struct RegistryInner {
|
||||
map: HashMap<u64, mpsc::Sender<MeResponse>>,
|
||||
writers: HashMap<u64, mpsc::Sender<WriterCommand>>,
|
||||
@@ -72,6 +77,7 @@ impl RegistryInner {
|
||||
pub struct ConnRegistry {
|
||||
inner: RwLock<RegistryInner>,
|
||||
next_id: AtomicU64,
|
||||
route_channel_capacity: usize,
|
||||
route_backpressure_base_timeout_ms: AtomicU64,
|
||||
route_backpressure_high_timeout_ms: AtomicU64,
|
||||
route_backpressure_high_watermark_pct: AtomicU8,
|
||||
@@ -85,11 +91,12 @@ impl ConnRegistry {
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
pub fn new() -> Self {
|
||||
pub fn with_route_channel_capacity(route_channel_capacity: usize) -> Self {
|
||||
let start = rand::random::<u64>() | 1;
|
||||
Self {
|
||||
inner: RwLock::new(RegistryInner::new()),
|
||||
next_id: AtomicU64::new(start),
|
||||
route_channel_capacity: route_channel_capacity.max(1),
|
||||
route_backpressure_base_timeout_ms: AtomicU64::new(
|
||||
ROUTE_BACKPRESSURE_BASE_TIMEOUT_MS,
|
||||
),
|
||||
@@ -102,6 +109,11 @@ impl ConnRegistry {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn new() -> Self {
|
||||
Self::with_route_channel_capacity(4096)
|
||||
}
|
||||
|
||||
pub fn update_route_backpressure_policy(
|
||||
&self,
|
||||
base_timeout_ms: u64,
|
||||
@@ -121,7 +133,7 @@ impl ConnRegistry {
|
||||
|
||||
pub async fn register(&self) -> (u64, mpsc::Receiver<MeResponse>) {
|
||||
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
|
||||
let (tx, rx) = mpsc::channel(ROUTE_CHANNEL_CAPACITY);
|
||||
let (tx, rx) = mpsc::channel(self.route_channel_capacity);
|
||||
self.inner.write().await.map.insert(id, tx);
|
||||
(id, rx)
|
||||
}
|
||||
@@ -173,11 +185,11 @@ impl ConnRegistry {
|
||||
.route_backpressure_high_watermark_pct
|
||||
.load(Ordering::Relaxed)
|
||||
.clamp(1, 100);
|
||||
let used = ROUTE_CHANNEL_CAPACITY.saturating_sub(tx.capacity());
|
||||
let used_pct = if ROUTE_CHANNEL_CAPACITY == 0 {
|
||||
let used = self.route_channel_capacity.saturating_sub(tx.capacity());
|
||||
let used_pct = if self.route_channel_capacity == 0 {
|
||||
100
|
||||
} else {
|
||||
(used.saturating_mul(100) / ROUTE_CHANNEL_CAPACITY) as u8
|
||||
(used.saturating_mul(100) / self.route_channel_capacity) as u8
|
||||
};
|
||||
let high_profile = used_pct >= high_watermark_pct;
|
||||
let timeout_ms = if high_profile {
|
||||
@@ -202,6 +214,74 @@ impl ConnRegistry {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn route_nowait(&self, id: u64, resp: MeResponse) -> RouteResult {
|
||||
let tx = {
|
||||
let inner = self.inner.read().await;
|
||||
inner.map.get(&id).cloned()
|
||||
};
|
||||
|
||||
let Some(tx) = tx else {
|
||||
return RouteResult::NoConn;
|
||||
};
|
||||
|
||||
match tx.try_send(resp) {
|
||||
Ok(()) => RouteResult::Routed,
|
||||
Err(TrySendError::Closed(_)) => RouteResult::ChannelClosed,
|
||||
Err(TrySendError::Full(_)) => RouteResult::QueueFullBase,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn route_with_timeout(
|
||||
&self,
|
||||
id: u64,
|
||||
resp: MeResponse,
|
||||
timeout_ms: u64,
|
||||
) -> RouteResult {
|
||||
if timeout_ms == 0 {
|
||||
return self.route_nowait(id, resp).await;
|
||||
}
|
||||
|
||||
let tx = {
|
||||
let inner = self.inner.read().await;
|
||||
inner.map.get(&id).cloned()
|
||||
};
|
||||
|
||||
let Some(tx) = tx else {
|
||||
return RouteResult::NoConn;
|
||||
};
|
||||
|
||||
match tx.try_send(resp) {
|
||||
Ok(()) => RouteResult::Routed,
|
||||
Err(TrySendError::Closed(_)) => RouteResult::ChannelClosed,
|
||||
Err(TrySendError::Full(resp)) => {
|
||||
let high_watermark_pct = self
|
||||
.route_backpressure_high_watermark_pct
|
||||
.load(Ordering::Relaxed)
|
||||
.clamp(1, 100);
|
||||
let used = self.route_channel_capacity.saturating_sub(tx.capacity());
|
||||
let used_pct = if self.route_channel_capacity == 0 {
|
||||
100
|
||||
} else {
|
||||
(used.saturating_mul(100) / self.route_channel_capacity) as u8
|
||||
};
|
||||
let high_profile = used_pct >= high_watermark_pct;
|
||||
let timeout_dur = Duration::from_millis(timeout_ms.max(1));
|
||||
|
||||
match tokio::time::timeout(timeout_dur, tx.send(resp)).await {
|
||||
Ok(Ok(())) => RouteResult::Routed,
|
||||
Ok(Err(_)) => RouteResult::ChannelClosed,
|
||||
Err(_) => {
|
||||
if high_profile {
|
||||
RouteResult::QueueFullHigh
|
||||
} else {
|
||||
RouteResult::QueueFullBase
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn bind_writer(
|
||||
&self,
|
||||
conn_id: u64,
|
||||
@@ -241,6 +321,43 @@ impl ConnRegistry {
|
||||
inner.writer_idle_since_epoch_secs.clone()
|
||||
}
|
||||
|
||||
pub async fn writer_idle_since_for_writer_ids(
|
||||
&self,
|
||||
writer_ids: &[u64],
|
||||
) -> HashMap<u64, u64> {
|
||||
let inner = self.inner.read().await;
|
||||
let mut out = HashMap::<u64, u64>::with_capacity(writer_ids.len());
|
||||
for writer_id in writer_ids {
|
||||
if let Some(idle_since) = inner.writer_idle_since_epoch_secs.get(writer_id).copied() {
|
||||
out.insert(*writer_id, idle_since);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
pub(super) async fn writer_activity_snapshot(&self) -> WriterActivitySnapshot {
|
||||
let inner = self.inner.read().await;
|
||||
let mut bound_clients_by_writer = HashMap::<u64, usize>::new();
|
||||
let mut active_sessions_by_target_dc = HashMap::<i16, usize>::new();
|
||||
|
||||
for (writer_id, conn_ids) in &inner.conns_for_writer {
|
||||
bound_clients_by_writer.insert(*writer_id, conn_ids.len());
|
||||
}
|
||||
for conn_meta in inner.meta.values() {
|
||||
if conn_meta.target_dc == 0 {
|
||||
continue;
|
||||
}
|
||||
*active_sessions_by_target_dc
|
||||
.entry(conn_meta.target_dc)
|
||||
.or_insert(0) += 1;
|
||||
}
|
||||
|
||||
WriterActivitySnapshot {
|
||||
bound_clients_by_writer,
|
||||
active_sessions_by_target_dc,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_writer(&self, conn_id: u64) -> Option<ConnWriter> {
|
||||
let inner = self.inner.read().await;
|
||||
let writer_id = inner.writer_for_conn.get(&conn_id).cloned()?;
|
||||
@@ -248,6 +365,11 @@ impl ConnRegistry {
|
||||
Some(ConnWriter { writer_id, tx: writer })
|
||||
}
|
||||
|
||||
pub async fn active_conn_ids(&self) -> Vec<u64> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.writer_for_conn.keys().copied().collect()
|
||||
}
|
||||
|
||||
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.writers.remove(&writer_id);
|
||||
@@ -288,3 +410,70 @@ impl ConnRegistry {
|
||||
.unwrap_or(true)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
|
||||
use super::ConnMeta;
|
||||
use super::ConnRegistry;
|
||||
|
||||
#[tokio::test]
|
||||
async fn writer_activity_snapshot_tracks_writer_and_dc_load() {
|
||||
let registry = ConnRegistry::new();
|
||||
|
||||
let (conn_a, _rx_a) = registry.register().await;
|
||||
let (conn_b, _rx_b) = registry.register().await;
|
||||
let (conn_c, _rx_c) = registry.register().await;
|
||||
let (writer_tx_a, _writer_rx_a) = tokio::sync::mpsc::channel(8);
|
||||
let (writer_tx_b, _writer_rx_b) = tokio::sync::mpsc::channel(8);
|
||||
|
||||
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 443);
|
||||
registry
|
||||
.bind_writer(
|
||||
conn_a,
|
||||
10,
|
||||
writer_tx_a.clone(),
|
||||
ConnMeta {
|
||||
target_dc: 2,
|
||||
client_addr: addr,
|
||||
our_addr: addr,
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
registry
|
||||
.bind_writer(
|
||||
conn_b,
|
||||
10,
|
||||
writer_tx_a,
|
||||
ConnMeta {
|
||||
target_dc: -2,
|
||||
client_addr: addr,
|
||||
our_addr: addr,
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
registry
|
||||
.bind_writer(
|
||||
conn_c,
|
||||
20,
|
||||
writer_tx_b,
|
||||
ConnMeta {
|
||||
target_dc: 4,
|
||||
client_addr: addr,
|
||||
our_addr: addr,
|
||||
proto_flags: 0,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let snapshot = registry.writer_activity_snapshot().await;
|
||||
assert_eq!(snapshot.bound_clients_by_writer.get(&10), Some(&2));
|
||||
assert_eq!(snapshot.bound_clients_by_writer.get(&20), Some(&1));
|
||||
assert_eq!(snapshot.active_sessions_by_target_dc.get(&2), Some(&1));
|
||||
assert_eq!(snapshot.active_sessions_by_target_dc.get(&-2), Some(&1));
|
||||
assert_eq!(snapshot.active_sessions_by_target_dc.get(&4), Some(&1));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::time::SystemTime;
|
||||
use httpdate;
|
||||
|
||||
use crate::error::{ProxyError, Result};
|
||||
use super::selftest::record_timeskew_sample;
|
||||
|
||||
pub const PROXY_SECRET_MIN_LEN: usize = 32;
|
||||
|
||||
@@ -98,6 +99,7 @@ pub async fn download_proxy_secret_with_max_len(max_len: usize) -> Result<Vec<u8
|
||||
})
|
||||
{
|
||||
let skew_secs = skew.as_secs();
|
||||
record_timeskew_sample("proxy_secret_date_header", skew_secs);
|
||||
if skew_secs > 60 {
|
||||
warn!(skew_secs, "Time skew >60s detected from proxy-secret Date header");
|
||||
} else if skew_secs > 30 {
|
||||
|
||||
260
src/transport/middle_proxy/selftest.rs
Normal file
260
src/transport/middle_proxy/selftest.rs
Normal file
@@ -0,0 +1,260 @@
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum BndAddrStatus {
|
||||
Ok,
|
||||
Bogon,
|
||||
Error,
|
||||
}
|
||||
|
||||
impl BndAddrStatus {
|
||||
pub(crate) fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Ok => "ok",
|
||||
Self::Bogon => "bogon",
|
||||
Self::Error => "error",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum BndPortStatus {
|
||||
Ok,
|
||||
Zero,
|
||||
Error,
|
||||
}
|
||||
|
||||
impl BndPortStatus {
|
||||
pub(crate) fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Ok => "ok",
|
||||
Self::Zero => "zero",
|
||||
Self::Error => "error",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeBndSnapshot {
|
||||
pub addr_status: &'static str,
|
||||
pub port_status: &'static str,
|
||||
pub last_addr: Option<SocketAddr>,
|
||||
pub last_seen_age_secs: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct MeUpstreamBndSnapshot {
|
||||
pub upstream_id: usize,
|
||||
pub addr_status: &'static str,
|
||||
pub port_status: &'static str,
|
||||
pub last_addr: Option<SocketAddr>,
|
||||
pub last_ip: Option<IpAddr>,
|
||||
pub last_seen_age_secs: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(crate) struct MeTimeskewSnapshot {
|
||||
pub max_skew_secs_15m: Option<u64>,
|
||||
pub samples_15m: usize,
|
||||
pub last_skew_secs: Option<u64>,
|
||||
pub last_source: Option<&'static str>,
|
||||
pub last_seen_age_secs: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct MeTimeskewSample {
|
||||
ts_epoch_secs: u64,
|
||||
skew_secs: u64,
|
||||
source: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MeSelftestState {
|
||||
bnd_addr_status: BndAddrStatus,
|
||||
bnd_port_status: BndPortStatus,
|
||||
bnd_last_addr: Option<SocketAddr>,
|
||||
bnd_last_seen_epoch_secs: Option<u64>,
|
||||
upstream_bnd: HashMap<usize, UpstreamBndState>,
|
||||
timeskew_samples: VecDeque<MeTimeskewSample>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct UpstreamBndState {
|
||||
addr_status: BndAddrStatus,
|
||||
port_status: BndPortStatus,
|
||||
last_addr: Option<SocketAddr>,
|
||||
last_ip: Option<IpAddr>,
|
||||
last_seen_epoch_secs: Option<u64>,
|
||||
}
|
||||
|
||||
impl Default for MeSelftestState {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
bnd_addr_status: BndAddrStatus::Error,
|
||||
bnd_port_status: BndPortStatus::Error,
|
||||
bnd_last_addr: None,
|
||||
bnd_last_seen_epoch_secs: None,
|
||||
upstream_bnd: HashMap::new(),
|
||||
timeskew_samples: VecDeque::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_TIMESKEW_SAMPLES: usize = 512;
|
||||
const TIMESKEW_WINDOW_SECS: u64 = 15 * 60;
|
||||
|
||||
static ME_SELFTEST_STATE: OnceLock<Mutex<MeSelftestState>> = OnceLock::new();
|
||||
|
||||
fn state() -> &'static Mutex<MeSelftestState> {
|
||||
ME_SELFTEST_STATE.get_or_init(|| Mutex::new(MeSelftestState::default()))
|
||||
}
|
||||
|
||||
pub(crate) fn record_bnd_status(
|
||||
addr_status: BndAddrStatus,
|
||||
port_status: BndPortStatus,
|
||||
last_addr: Option<SocketAddr>,
|
||||
) {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Ok(mut guard) = state().lock() else {
|
||||
return;
|
||||
};
|
||||
guard.bnd_addr_status = addr_status;
|
||||
guard.bnd_port_status = port_status;
|
||||
guard.bnd_last_addr = last_addr;
|
||||
guard.bnd_last_seen_epoch_secs = Some(now_epoch_secs);
|
||||
}
|
||||
|
||||
pub(crate) fn bnd_snapshot() -> MeBndSnapshot {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Ok(guard) = state().lock() else {
|
||||
return MeBndSnapshot {
|
||||
addr_status: BndAddrStatus::Error.as_str(),
|
||||
port_status: BndPortStatus::Error.as_str(),
|
||||
last_addr: None,
|
||||
last_seen_age_secs: None,
|
||||
};
|
||||
};
|
||||
MeBndSnapshot {
|
||||
addr_status: guard.bnd_addr_status.as_str(),
|
||||
port_status: guard.bnd_port_status.as_str(),
|
||||
last_addr: guard.bnd_last_addr,
|
||||
last_seen_age_secs: guard
|
||||
.bnd_last_seen_epoch_secs
|
||||
.map(|value| now_epoch_secs.saturating_sub(value)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn record_upstream_bnd_status(
|
||||
upstream_id: usize,
|
||||
addr_status: BndAddrStatus,
|
||||
port_status: BndPortStatus,
|
||||
last_addr: Option<SocketAddr>,
|
||||
last_ip: Option<IpAddr>,
|
||||
) {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Ok(mut guard) = state().lock() else {
|
||||
return;
|
||||
};
|
||||
guard.upstream_bnd.insert(
|
||||
upstream_id,
|
||||
UpstreamBndState {
|
||||
addr_status,
|
||||
port_status,
|
||||
last_addr,
|
||||
last_ip,
|
||||
last_seen_epoch_secs: Some(now_epoch_secs),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) fn upstream_bnd_snapshots() -> Vec<MeUpstreamBndSnapshot> {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Ok(guard) = state().lock() else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut out = Vec::with_capacity(guard.upstream_bnd.len());
|
||||
for (upstream_id, entry) in &guard.upstream_bnd {
|
||||
out.push(MeUpstreamBndSnapshot {
|
||||
upstream_id: *upstream_id,
|
||||
addr_status: entry.addr_status.as_str(),
|
||||
port_status: entry.port_status.as_str(),
|
||||
last_addr: entry.last_addr,
|
||||
last_ip: entry.last_ip,
|
||||
last_seen_age_secs: entry
|
||||
.last_seen_epoch_secs
|
||||
.map(|value| now_epoch_secs.saturating_sub(value)),
|
||||
});
|
||||
}
|
||||
out.sort_by_key(|entry| entry.upstream_id);
|
||||
out
|
||||
}
|
||||
|
||||
pub(crate) fn record_timeskew_sample(source: &'static str, skew_secs: u64) {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Ok(mut guard) = state().lock() else {
|
||||
return;
|
||||
};
|
||||
guard.timeskew_samples.push_back(MeTimeskewSample {
|
||||
ts_epoch_secs: now_epoch_secs,
|
||||
skew_secs,
|
||||
source,
|
||||
});
|
||||
while guard.timeskew_samples.len() > MAX_TIMESKEW_SAMPLES {
|
||||
guard.timeskew_samples.pop_front();
|
||||
}
|
||||
let cutoff = now_epoch_secs.saturating_sub(TIMESKEW_WINDOW_SECS * 2);
|
||||
while guard
|
||||
.timeskew_samples
|
||||
.front()
|
||||
.is_some_and(|sample| sample.ts_epoch_secs < cutoff)
|
||||
{
|
||||
guard.timeskew_samples.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn timeskew_snapshot() -> MeTimeskewSnapshot {
|
||||
let now_epoch_secs = now_epoch_secs();
|
||||
let Ok(guard) = state().lock() else {
|
||||
return MeTimeskewSnapshot::default();
|
||||
};
|
||||
|
||||
let mut max_skew_secs_15m = None;
|
||||
let mut samples_15m = 0usize;
|
||||
let window_start = now_epoch_secs.saturating_sub(TIMESKEW_WINDOW_SECS);
|
||||
for sample in &guard.timeskew_samples {
|
||||
if sample.ts_epoch_secs < window_start {
|
||||
continue;
|
||||
}
|
||||
samples_15m = samples_15m.saturating_add(1);
|
||||
max_skew_secs_15m = Some(max_skew_secs_15m.unwrap_or(0).max(sample.skew_secs));
|
||||
}
|
||||
|
||||
let (last_skew_secs, last_source, last_seen_age_secs) =
|
||||
if let Some(last) = guard.timeskew_samples.back() {
|
||||
(
|
||||
Some(last.skew_secs),
|
||||
Some(last.source),
|
||||
Some(now_epoch_secs.saturating_sub(last.ts_epoch_secs)),
|
||||
)
|
||||
} else {
|
||||
(None, None, None)
|
||||
};
|
||||
|
||||
MeTimeskewSnapshot {
|
||||
max_skew_secs_15m,
|
||||
samples_15m,
|
||||
last_skew_secs,
|
||||
last_source,
|
||||
last_seen_age_secs,
|
||||
}
|
||||
}
|
||||
|
||||
fn now_epoch_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
@@ -1,16 +1,18 @@
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use bytes::Bytes;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::config::{MeRouteNoWriterMode, MeWriterPickMode};
|
||||
use crate::error::{ProxyError, Result};
|
||||
use crate::network::IpFamily;
|
||||
use crate::protocol::constants::RPC_CLOSE_EXT_U32;
|
||||
use crate::protocol::constants::{RPC_CLOSE_CONN_U32, RPC_CLOSE_EXT_U32};
|
||||
|
||||
use super::MePool;
|
||||
use super::codec::WriterCommand;
|
||||
@@ -21,6 +23,11 @@ use super::registry::ConnMeta;
|
||||
|
||||
const IDLE_WRITER_PENALTY_MID_SECS: u64 = 45;
|
||||
const IDLE_WRITER_PENALTY_HIGH_SECS: u64 = 55;
|
||||
const HYBRID_GLOBAL_BURST_PERIOD_ROUNDS: u32 = 4;
|
||||
const PICK_PENALTY_WARM: u64 = 200;
|
||||
const PICK_PENALTY_DRAINING: u64 = 600;
|
||||
const PICK_PENALTY_STALE: u64 = 300;
|
||||
const PICK_PENALTY_DEGRADED: u64 = 250;
|
||||
|
||||
impl MePool {
|
||||
/// Send RPC_PROXY_REQ. `tag_override`: per-user ad_tag (from access.user_ad_tags); if None, uses pool default.
|
||||
@@ -49,7 +56,18 @@ impl MePool {
|
||||
our_addr,
|
||||
proto_flags,
|
||||
};
|
||||
let mut emergency_attempts = 0;
|
||||
let no_writer_mode =
|
||||
MeRouteNoWriterMode::from_u8(self.me_route_no_writer_mode.load(Ordering::Relaxed));
|
||||
let (routed_dc, unknown_target_dc) = self
|
||||
.resolve_target_dc_for_routing(target_dc as i32)
|
||||
.await;
|
||||
let mut no_writer_deadline: Option<Instant> = None;
|
||||
let mut emergency_attempts = 0u32;
|
||||
let mut async_recovery_triggered = false;
|
||||
let mut hybrid_recovery_round = 0u32;
|
||||
let mut hybrid_last_recovery_at: Option<Instant> = None;
|
||||
let hybrid_wait_step = self.me_route_no_writer_wait.max(Duration::from_millis(50));
|
||||
let mut hybrid_wait_current = hybrid_wait_step;
|
||||
|
||||
loop {
|
||||
if let Some(current) = self.registry.get_writer(conn_id).await {
|
||||
@@ -74,153 +92,271 @@ impl MePool {
|
||||
let mut writers_snapshot = {
|
||||
let ws = self.writers.read().await;
|
||||
if ws.is_empty() {
|
||||
// Create waiter before recovery attempts so notify_one permits are not missed.
|
||||
let waiter = self.writer_available.notified();
|
||||
drop(ws);
|
||||
for family in self.family_order() {
|
||||
let map = match family {
|
||||
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
|
||||
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
|
||||
};
|
||||
for (_dc, addrs) in map.iter() {
|
||||
for (ip, port) in addrs {
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
if self.connect_one(addr, self.rng.as_ref()).await.is_ok() {
|
||||
self.writer_available.notify_one();
|
||||
break;
|
||||
match no_writer_mode {
|
||||
MeRouteNoWriterMode::AsyncRecoveryFailfast => {
|
||||
let deadline = *no_writer_deadline.get_or_insert_with(|| {
|
||||
Instant::now() + self.me_route_no_writer_wait
|
||||
});
|
||||
if !async_recovery_triggered && !unknown_target_dc {
|
||||
let triggered =
|
||||
self.trigger_async_recovery_for_target_dc(routed_dc).await;
|
||||
if !triggered {
|
||||
self.trigger_async_recovery_global().await;
|
||||
}
|
||||
async_recovery_triggered = true;
|
||||
}
|
||||
if self.wait_for_writer_until(deadline).await {
|
||||
continue;
|
||||
}
|
||||
self.stats.increment_me_no_writer_failfast_total();
|
||||
return Err(ProxyError::Proxy(
|
||||
"No ME writer available in failfast window".into(),
|
||||
));
|
||||
}
|
||||
MeRouteNoWriterMode::InlineRecoveryLegacy => {
|
||||
self.stats.increment_me_inline_recovery_total();
|
||||
if !unknown_target_dc {
|
||||
for _ in 0..self.me_route_inline_recovery_attempts.max(1) {
|
||||
for family in self.family_order() {
|
||||
let map = match family {
|
||||
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
|
||||
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
|
||||
};
|
||||
for (dc, addrs) in &map {
|
||||
for (ip, port) in addrs {
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
let _ = self
|
||||
.connect_one_for_dc(addr, *dc, self.rng.as_ref())
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self.writers.read().await.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self.writers.read().await.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if tokio::time::timeout(Duration::from_secs(3), waiter).await.is_err() {
|
||||
if !self.writers.read().await.is_empty() {
|
||||
|
||||
if !self.writers.read().await.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let deadline = *no_writer_deadline
|
||||
.get_or_insert_with(|| Instant::now() + self.me_route_inline_recovery_wait);
|
||||
if !self.wait_for_writer_until(deadline).await {
|
||||
if !self.writers.read().await.is_empty() {
|
||||
continue;
|
||||
}
|
||||
self.stats.increment_me_no_writer_failfast_total();
|
||||
return Err(ProxyError::Proxy(
|
||||
"All ME connections dead (legacy wait timeout)".into(),
|
||||
));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
MeRouteNoWriterMode::HybridAsyncPersistent => {
|
||||
if !unknown_target_dc {
|
||||
self.maybe_trigger_hybrid_recovery(
|
||||
routed_dc,
|
||||
&mut hybrid_recovery_round,
|
||||
&mut hybrid_last_recovery_at,
|
||||
hybrid_wait_current,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let deadline = Instant::now() + hybrid_wait_current;
|
||||
let _ = self.wait_for_writer_until(deadline).await;
|
||||
hybrid_wait_current =
|
||||
(hybrid_wait_current.saturating_mul(2))
|
||||
.min(Duration::from_millis(400));
|
||||
continue;
|
||||
}
|
||||
return Err(ProxyError::Proxy("All ME connections dead (waited 3s)".into()));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
ws.clone()
|
||||
};
|
||||
|
||||
let mut candidate_indices = self
|
||||
.candidate_indices_for_dc(&writers_snapshot, target_dc, false)
|
||||
.candidate_indices_for_dc(&writers_snapshot, routed_dc, false)
|
||||
.await;
|
||||
if candidate_indices.is_empty() {
|
||||
candidate_indices = self
|
||||
.candidate_indices_for_dc(&writers_snapshot, target_dc, true)
|
||||
.candidate_indices_for_dc(&writers_snapshot, routed_dc, true)
|
||||
.await;
|
||||
}
|
||||
if candidate_indices.is_empty() {
|
||||
// Emergency connect-on-demand
|
||||
if emergency_attempts >= 3 {
|
||||
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
|
||||
}
|
||||
emergency_attempts += 1;
|
||||
for family in self.family_order() {
|
||||
let map_guard = match family {
|
||||
IpFamily::V4 => self.proxy_map_v4.read().await,
|
||||
IpFamily::V6 => self.proxy_map_v6.read().await,
|
||||
};
|
||||
if let Some(addrs) = map_guard.get(&(target_dc as i32)) {
|
||||
let mut shuffled = addrs.clone();
|
||||
shuffled.shuffle(&mut rand::rng());
|
||||
drop(map_guard);
|
||||
for (ip, port) in shuffled {
|
||||
let addr = SocketAddr::new(ip, port);
|
||||
if self.connect_one(addr, self.rng.as_ref()).await.is_ok() {
|
||||
let pick_mode = self.writer_pick_mode();
|
||||
match no_writer_mode {
|
||||
MeRouteNoWriterMode::AsyncRecoveryFailfast => {
|
||||
let deadline = *no_writer_deadline.get_or_insert_with(|| {
|
||||
Instant::now() + self.me_route_no_writer_wait
|
||||
});
|
||||
if !async_recovery_triggered && !unknown_target_dc {
|
||||
let triggered = self.trigger_async_recovery_for_target_dc(routed_dc).await;
|
||||
if !triggered {
|
||||
self.trigger_async_recovery_global().await;
|
||||
}
|
||||
async_recovery_triggered = true;
|
||||
}
|
||||
if self.wait_for_candidate_until(routed_dc, deadline).await {
|
||||
continue;
|
||||
}
|
||||
self.stats.increment_me_writer_pick_no_candidate_total(pick_mode);
|
||||
self.stats.increment_me_no_writer_failfast_total();
|
||||
return Err(ProxyError::Proxy(
|
||||
"No ME writers available for target DC in failfast window".into(),
|
||||
));
|
||||
}
|
||||
MeRouteNoWriterMode::InlineRecoveryLegacy => {
|
||||
self.stats.increment_me_inline_recovery_total();
|
||||
if unknown_target_dc {
|
||||
let deadline = *no_writer_deadline
|
||||
.get_or_insert_with(|| Instant::now() + self.me_route_inline_recovery_wait);
|
||||
if self.wait_for_candidate_until(routed_dc, deadline).await {
|
||||
continue;
|
||||
}
|
||||
self.stats.increment_me_writer_pick_no_candidate_total(pick_mode);
|
||||
self.stats.increment_me_no_writer_failfast_total();
|
||||
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
|
||||
}
|
||||
if emergency_attempts >= self.me_route_inline_recovery_attempts.max(1) {
|
||||
self.stats.increment_me_writer_pick_no_candidate_total(pick_mode);
|
||||
self.stats.increment_me_no_writer_failfast_total();
|
||||
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
|
||||
}
|
||||
emergency_attempts += 1;
|
||||
let mut endpoints = self.endpoint_candidates_for_target_dc(routed_dc).await;
|
||||
endpoints.shuffle(&mut rand::rng());
|
||||
for addr in endpoints {
|
||||
if self.connect_one_for_dc(addr, routed_dc, self.rng.as_ref()).await.is_ok() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts)).await;
|
||||
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts as u64)).await;
|
||||
let ws2 = self.writers.read().await;
|
||||
writers_snapshot = ws2.clone();
|
||||
drop(ws2);
|
||||
candidate_indices = self
|
||||
.candidate_indices_for_dc(&writers_snapshot, target_dc, false)
|
||||
.candidate_indices_for_dc(&writers_snapshot, routed_dc, false)
|
||||
.await;
|
||||
if candidate_indices.is_empty() {
|
||||
candidate_indices = self
|
||||
.candidate_indices_for_dc(&writers_snapshot, target_dc, true)
|
||||
.candidate_indices_for_dc(&writers_snapshot, routed_dc, true)
|
||||
.await;
|
||||
}
|
||||
if !candidate_indices.is_empty() {
|
||||
break;
|
||||
if candidate_indices.is_empty() {
|
||||
self.stats.increment_me_writer_pick_no_candidate_total(pick_mode);
|
||||
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if candidate_indices.is_empty() {
|
||||
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
|
||||
MeRouteNoWriterMode::HybridAsyncPersistent => {
|
||||
if !unknown_target_dc {
|
||||
self.maybe_trigger_hybrid_recovery(
|
||||
routed_dc,
|
||||
&mut hybrid_recovery_round,
|
||||
&mut hybrid_last_recovery_at,
|
||||
hybrid_wait_current,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let deadline = Instant::now() + hybrid_wait_current;
|
||||
let _ = self.wait_for_candidate_until(routed_dc, deadline).await;
|
||||
hybrid_wait_current = (hybrid_wait_current.saturating_mul(2))
|
||||
.min(Duration::from_millis(400));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
let writer_idle_since = self.registry.writer_idle_since_snapshot().await;
|
||||
hybrid_wait_current = hybrid_wait_step;
|
||||
let pick_mode = self.writer_pick_mode();
|
||||
let pick_sample_size = self.writer_pick_sample_size();
|
||||
let writer_ids: Vec<u64> = candidate_indices
|
||||
.iter()
|
||||
.map(|idx| writers_snapshot[*idx].id)
|
||||
.collect();
|
||||
let writer_idle_since = self
|
||||
.registry
|
||||
.writer_idle_since_for_writer_ids(&writer_ids)
|
||||
.await;
|
||||
let now_epoch_secs = Self::now_epoch_secs();
|
||||
|
||||
if self.me_deterministic_writer_sort.load(Ordering::Relaxed) {
|
||||
candidate_indices.sort_by(|lhs, rhs| {
|
||||
let left = &writers_snapshot[*lhs];
|
||||
let right = &writers_snapshot[*rhs];
|
||||
let left_key = (
|
||||
self.writer_contour_rank_for_selection(left),
|
||||
(left.generation < self.current_generation()) as usize,
|
||||
left.degraded.load(Ordering::Relaxed) as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
left,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(left.tx.capacity()),
|
||||
left.addr,
|
||||
left.id,
|
||||
);
|
||||
let right_key = (
|
||||
self.writer_contour_rank_for_selection(right),
|
||||
(right.generation < self.current_generation()) as usize,
|
||||
right.degraded.load(Ordering::Relaxed) as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
right,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(right.tx.capacity()),
|
||||
right.addr,
|
||||
right.id,
|
||||
);
|
||||
left_key.cmp(&right_key)
|
||||
});
|
||||
} else {
|
||||
candidate_indices.sort_by_key(|idx| {
|
||||
let w = &writers_snapshot[*idx];
|
||||
let degraded = w.degraded.load(Ordering::Relaxed);
|
||||
let stale = (w.generation < self.current_generation()) as usize;
|
||||
(
|
||||
self.writer_contour_rank_for_selection(w),
|
||||
stale,
|
||||
degraded as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
w,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(w.tx.capacity()),
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
let start = self.rr.fetch_add(1, Ordering::Relaxed) as usize % candidate_indices.len();
|
||||
let ordered_candidate_indices = if pick_mode == MeWriterPickMode::P2c {
|
||||
self.p2c_ordered_candidate_indices(
|
||||
&candidate_indices,
|
||||
&writers_snapshot,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
start,
|
||||
pick_sample_size,
|
||||
)
|
||||
} else {
|
||||
if self.me_deterministic_writer_sort.load(Ordering::Relaxed) {
|
||||
candidate_indices.sort_by(|lhs, rhs| {
|
||||
let left = &writers_snapshot[*lhs];
|
||||
let right = &writers_snapshot[*rhs];
|
||||
let left_key = (
|
||||
self.writer_contour_rank_for_selection(left),
|
||||
(left.generation < self.current_generation()) as usize,
|
||||
left.degraded.load(Ordering::Relaxed) as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
left,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(left.tx.capacity()),
|
||||
left.addr,
|
||||
left.id,
|
||||
);
|
||||
let right_key = (
|
||||
self.writer_contour_rank_for_selection(right),
|
||||
(right.generation < self.current_generation()) as usize,
|
||||
right.degraded.load(Ordering::Relaxed) as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
right,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(right.tx.capacity()),
|
||||
right.addr,
|
||||
right.id,
|
||||
);
|
||||
left_key.cmp(&right_key)
|
||||
});
|
||||
} else {
|
||||
candidate_indices.sort_by_key(|idx| {
|
||||
let w = &writers_snapshot[*idx];
|
||||
let degraded = w.degraded.load(Ordering::Relaxed);
|
||||
let stale = (w.generation < self.current_generation()) as usize;
|
||||
(
|
||||
self.writer_contour_rank_for_selection(w),
|
||||
stale,
|
||||
degraded as usize,
|
||||
self.writer_idle_rank_for_selection(
|
||||
w,
|
||||
&writer_idle_since,
|
||||
now_epoch_secs,
|
||||
),
|
||||
Reverse(w.tx.capacity()),
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
let mut ordered = Vec::<usize>::with_capacity(candidate_indices.len());
|
||||
for offset in 0..candidate_indices.len() {
|
||||
ordered.push(candidate_indices[(start + offset) % candidate_indices.len()]);
|
||||
}
|
||||
ordered
|
||||
};
|
||||
let mut fallback_blocking_idx: Option<usize> = None;
|
||||
|
||||
for offset in 0..candidate_indices.len() {
|
||||
let idx = candidate_indices[(start + offset) % candidate_indices.len()];
|
||||
for idx in ordered_candidate_indices {
|
||||
let w = &writers_snapshot[idx];
|
||||
if !self.writer_accepts_new_binding(w) {
|
||||
continue;
|
||||
}
|
||||
match w.tx.try_send(WriterCommand::Data(payload.clone())) {
|
||||
Ok(()) => {
|
||||
self.stats.increment_me_writer_pick_success_try_total(pick_mode);
|
||||
self.registry
|
||||
.bind_writer(conn_id, w.id, w.tx.clone(), meta.clone())
|
||||
.await;
|
||||
@@ -242,6 +378,7 @@ impl MePool {
|
||||
}
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
self.stats.increment_me_writer_pick_closed_total(pick_mode);
|
||||
warn!(writer_id = w.id, "ME writer channel closed");
|
||||
self.remove_writer_and_close_clients(w.id).await;
|
||||
continue;
|
||||
@@ -250,15 +387,20 @@ impl MePool {
|
||||
}
|
||||
|
||||
let Some(blocking_idx) = fallback_blocking_idx else {
|
||||
self.stats.increment_me_writer_pick_full_total(pick_mode);
|
||||
continue;
|
||||
};
|
||||
|
||||
let w = writers_snapshot[blocking_idx].clone();
|
||||
if !self.writer_accepts_new_binding(&w) {
|
||||
self.stats.increment_me_writer_pick_full_total(pick_mode);
|
||||
continue;
|
||||
}
|
||||
self.stats.increment_me_writer_pick_blocking_fallback_total();
|
||||
match w.tx.send(WriterCommand::Data(payload.clone())).await {
|
||||
Ok(()) => {
|
||||
self.stats
|
||||
.increment_me_writer_pick_success_fallback_total(pick_mode);
|
||||
self.registry
|
||||
.bind_writer(conn_id, w.id, w.tx.clone(), meta.clone())
|
||||
.await;
|
||||
@@ -268,6 +410,7 @@ impl MePool {
|
||||
return Ok(());
|
||||
}
|
||||
Err(_) => {
|
||||
self.stats.increment_me_writer_pick_closed_total(pick_mode);
|
||||
warn!(writer_id = w.id, "ME writer channel closed (blocking)");
|
||||
self.remove_writer_and_close_clients(w.id).await;
|
||||
}
|
||||
@@ -275,12 +418,136 @@ impl MePool {
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_writer_until(&self, deadline: Instant) -> bool {
|
||||
let waiter = self.writer_available.notified();
|
||||
if !self.writers.read().await.is_empty() {
|
||||
return true;
|
||||
}
|
||||
let now = Instant::now();
|
||||
if now >= deadline {
|
||||
return !self.writers.read().await.is_empty();
|
||||
}
|
||||
let timeout = deadline.saturating_duration_since(now);
|
||||
if tokio::time::timeout(timeout, waiter).await.is_ok() {
|
||||
return true;
|
||||
}
|
||||
!self.writers.read().await.is_empty()
|
||||
}
|
||||
|
||||
async fn wait_for_candidate_until(&self, routed_dc: i32, deadline: Instant) -> bool {
|
||||
loop {
|
||||
if self.has_candidate_for_target_dc(routed_dc).await {
|
||||
return true;
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
if now >= deadline {
|
||||
return self.has_candidate_for_target_dc(routed_dc).await;
|
||||
}
|
||||
|
||||
let waiter = self.writer_available.notified();
|
||||
if self.has_candidate_for_target_dc(routed_dc).await {
|
||||
return true;
|
||||
}
|
||||
let remaining = deadline.saturating_duration_since(Instant::now());
|
||||
if remaining.is_zero() {
|
||||
return self.has_candidate_for_target_dc(routed_dc).await;
|
||||
}
|
||||
if tokio::time::timeout(remaining, waiter).await.is_err() {
|
||||
return self.has_candidate_for_target_dc(routed_dc).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn has_candidate_for_target_dc(&self, routed_dc: i32) -> bool {
|
||||
let writers_snapshot = {
|
||||
let ws = self.writers.read().await;
|
||||
if ws.is_empty() {
|
||||
return false;
|
||||
}
|
||||
ws.clone()
|
||||
};
|
||||
let mut candidate_indices = self
|
||||
.candidate_indices_for_dc(&writers_snapshot, routed_dc, false)
|
||||
.await;
|
||||
if candidate_indices.is_empty() {
|
||||
candidate_indices = self
|
||||
.candidate_indices_for_dc(&writers_snapshot, routed_dc, true)
|
||||
.await;
|
||||
}
|
||||
!candidate_indices.is_empty()
|
||||
}
|
||||
|
||||
async fn trigger_async_recovery_for_target_dc(self: &Arc<Self>, routed_dc: i32) -> bool {
|
||||
let endpoints = self.endpoint_candidates_for_target_dc(routed_dc).await;
|
||||
if endpoints.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.stats.increment_me_async_recovery_trigger_total();
|
||||
for addr in endpoints.into_iter().take(8) {
|
||||
self.trigger_immediate_refill_for_dc(addr, routed_dc);
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
async fn trigger_async_recovery_global(self: &Arc<Self>) {
|
||||
self.stats.increment_me_async_recovery_trigger_total();
|
||||
let mut seen = HashSet::<(i32, SocketAddr)>::new();
|
||||
for family in self.family_order() {
|
||||
let map_guard = match family {
|
||||
IpFamily::V4 => self.proxy_map_v4.read().await,
|
||||
IpFamily::V6 => self.proxy_map_v6.read().await,
|
||||
};
|
||||
for (dc, addrs) in map_guard.iter() {
|
||||
for (ip, port) in addrs {
|
||||
let addr = SocketAddr::new(*ip, *port);
|
||||
if seen.insert((*dc, addr)) {
|
||||
self.trigger_immediate_refill_for_dc(addr, *dc);
|
||||
}
|
||||
if seen.len() >= 8 {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn endpoint_candidates_for_target_dc(&self, routed_dc: i32) -> Vec<SocketAddr> {
|
||||
self.preferred_endpoints_for_dc(routed_dc).await
|
||||
}
|
||||
|
||||
async fn maybe_trigger_hybrid_recovery(
|
||||
self: &Arc<Self>,
|
||||
routed_dc: i32,
|
||||
hybrid_recovery_round: &mut u32,
|
||||
hybrid_last_recovery_at: &mut Option<Instant>,
|
||||
hybrid_wait_step: Duration,
|
||||
) {
|
||||
if let Some(last) = *hybrid_last_recovery_at
|
||||
&& last.elapsed() < hybrid_wait_step
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
let round = *hybrid_recovery_round;
|
||||
let target_triggered = self.trigger_async_recovery_for_target_dc(routed_dc).await;
|
||||
if !target_triggered || round % HYBRID_GLOBAL_BURST_PERIOD_ROUNDS == 0 {
|
||||
self.trigger_async_recovery_global().await;
|
||||
}
|
||||
*hybrid_recovery_round = round.saturating_add(1);
|
||||
*hybrid_last_recovery_at = Some(Instant::now());
|
||||
}
|
||||
|
||||
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
|
||||
if let Some(w) = self.registry.get_writer(conn_id).await {
|
||||
let mut p = Vec::with_capacity(12);
|
||||
p.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
|
||||
p.extend_from_slice(&conn_id.to_le_bytes());
|
||||
if w.tx.send(WriterCommand::DataAndFlush(p)).await.is_err() {
|
||||
if w.tx
|
||||
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
debug!("ME close write failed");
|
||||
self.remove_writer_and_close_clients(w.writer_id).await;
|
||||
}
|
||||
@@ -292,6 +559,37 @@ impl MePool {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn send_close_conn(self: &Arc<Self>, conn_id: u64) -> Result<()> {
|
||||
if let Some(w) = self.registry.get_writer(conn_id).await {
|
||||
let mut p = Vec::with_capacity(12);
|
||||
p.extend_from_slice(&RPC_CLOSE_CONN_U32.to_le_bytes());
|
||||
p.extend_from_slice(&conn_id.to_le_bytes());
|
||||
match w.tx.try_send(WriterCommand::DataAndFlush(Bytes::from(p))) {
|
||||
Ok(()) => {}
|
||||
Err(TrySendError::Full(cmd)) => {
|
||||
let _ = tokio::time::timeout(Duration::from_millis(50), w.tx.send(cmd)).await;
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
debug!(conn_id, "ME close_conn skipped: writer channel closed");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
debug!(conn_id, "ME close_conn skipped (writer missing)");
|
||||
}
|
||||
|
||||
self.registry.unregister(conn_id).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn shutdown_send_close_conn_all(self: &Arc<Self>) -> usize {
|
||||
let conn_ids = self.registry.active_conn_ids().await;
|
||||
let total = conn_ids.len();
|
||||
for conn_id in conn_ids {
|
||||
let _ = self.send_close_conn(conn_id).await;
|
||||
}
|
||||
total
|
||||
}
|
||||
|
||||
pub fn connection_count(&self) -> usize {
|
||||
self.conn_count.load(Ordering::Relaxed)
|
||||
}
|
||||
@@ -299,53 +597,12 @@ impl MePool {
|
||||
pub(super) async fn candidate_indices_for_dc(
|
||||
&self,
|
||||
writers: &[super::pool::MeWriter],
|
||||
target_dc: i16,
|
||||
routed_dc: i32,
|
||||
include_warm: bool,
|
||||
) -> Vec<usize> {
|
||||
let key = target_dc as i32;
|
||||
let mut preferred = Vec::<SocketAddr>::new();
|
||||
|
||||
for family in self.family_order() {
|
||||
let map_guard = match family {
|
||||
IpFamily::V4 => self.proxy_map_v4.read().await,
|
||||
IpFamily::V6 => self.proxy_map_v6.read().await,
|
||||
};
|
||||
|
||||
if let Some(v) = map_guard.get(&key) {
|
||||
preferred.extend(v.iter().map(|(ip, port)| SocketAddr::new(*ip, *port)));
|
||||
}
|
||||
if preferred.is_empty() {
|
||||
let abs = key.abs();
|
||||
if let Some(v) = map_guard.get(&abs) {
|
||||
preferred.extend(v.iter().map(|(ip, port)| SocketAddr::new(*ip, *port)));
|
||||
}
|
||||
}
|
||||
if preferred.is_empty() {
|
||||
let abs = key.abs();
|
||||
if let Some(v) = map_guard.get(&-abs) {
|
||||
preferred.extend(v.iter().map(|(ip, port)| SocketAddr::new(*ip, *port)));
|
||||
}
|
||||
}
|
||||
if preferred.is_empty() {
|
||||
let def = self.default_dc.load(Ordering::Relaxed);
|
||||
if def != 0
|
||||
&& let Some(v) = map_guard.get(&def)
|
||||
{
|
||||
preferred.extend(v.iter().map(|(ip, port)| SocketAddr::new(*ip, *port)));
|
||||
}
|
||||
}
|
||||
|
||||
drop(map_guard);
|
||||
|
||||
if !preferred.is_empty() && !self.decision.effective_multipath {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let preferred = self.preferred_endpoints_for_dc(routed_dc).await;
|
||||
if preferred.is_empty() {
|
||||
return (0..writers.len())
|
||||
.filter(|i| self.writer_eligible_for_selection(&writers[*i], include_warm))
|
||||
.collect();
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut out = Vec::new();
|
||||
@@ -353,15 +610,10 @@ impl MePool {
|
||||
if !self.writer_eligible_for_selection(w, include_warm) {
|
||||
continue;
|
||||
}
|
||||
if preferred.contains(&w.addr) {
|
||||
if w.writer_dc == routed_dc && preferred.iter().any(|endpoint| *endpoint == w.addr) {
|
||||
out.push(idx);
|
||||
}
|
||||
}
|
||||
if out.is_empty() {
|
||||
return (0..writers.len())
|
||||
.filter(|i| self.writer_eligible_for_selection(&writers[*i], include_warm))
|
||||
.collect();
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
@@ -407,4 +659,87 @@ impl MePool {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
fn writer_pick_score(
|
||||
&self,
|
||||
writer: &super::pool::MeWriter,
|
||||
idle_since_by_writer: &HashMap<u64, u64>,
|
||||
now_epoch_secs: u64,
|
||||
) -> u64 {
|
||||
let contour_penalty = match WriterContour::from_u8(writer.contour.load(Ordering::Relaxed)) {
|
||||
WriterContour::Active => 0,
|
||||
WriterContour::Warm => PICK_PENALTY_WARM,
|
||||
WriterContour::Draining => PICK_PENALTY_DRAINING,
|
||||
};
|
||||
let stale_penalty = if writer.generation < self.current_generation() {
|
||||
PICK_PENALTY_STALE
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let degraded_penalty = if writer.degraded.load(Ordering::Relaxed) {
|
||||
PICK_PENALTY_DEGRADED
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let idle_penalty =
|
||||
(self.writer_idle_rank_for_selection(writer, idle_since_by_writer, now_epoch_secs) as u64)
|
||||
* 100;
|
||||
let queue_cap = self.writer_cmd_channel_capacity.max(1) as u64;
|
||||
let queue_remaining = writer.tx.capacity() as u64;
|
||||
let queue_used = queue_cap.saturating_sub(queue_remaining.min(queue_cap));
|
||||
let queue_util_pct = queue_used.saturating_mul(100) / queue_cap;
|
||||
let queue_penalty = queue_util_pct.saturating_mul(4);
|
||||
let rtt_penalty = ((writer.rtt_ema_ms_x10.load(Ordering::Relaxed) as u64).saturating_add(5) / 10)
|
||||
.min(400);
|
||||
|
||||
contour_penalty
|
||||
.saturating_add(stale_penalty)
|
||||
.saturating_add(degraded_penalty)
|
||||
.saturating_add(idle_penalty)
|
||||
.saturating_add(queue_penalty)
|
||||
.saturating_add(rtt_penalty)
|
||||
}
|
||||
|
||||
fn p2c_ordered_candidate_indices(
|
||||
&self,
|
||||
candidate_indices: &[usize],
|
||||
writers_snapshot: &[super::pool::MeWriter],
|
||||
idle_since_by_writer: &HashMap<u64, u64>,
|
||||
now_epoch_secs: u64,
|
||||
start: usize,
|
||||
sample_size: usize,
|
||||
) -> Vec<usize> {
|
||||
let total = candidate_indices.len();
|
||||
if total == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut sampled = Vec::<usize>::with_capacity(sample_size.min(total));
|
||||
let mut seen = HashSet::<usize>::with_capacity(total);
|
||||
for offset in 0..sample_size.min(total) {
|
||||
let idx = candidate_indices[(start + offset) % total];
|
||||
if seen.insert(idx) {
|
||||
sampled.push(idx);
|
||||
}
|
||||
}
|
||||
|
||||
sampled.sort_by_key(|idx| {
|
||||
let writer = &writers_snapshot[*idx];
|
||||
(
|
||||
self.writer_pick_score(writer, idle_since_by_writer, now_epoch_secs),
|
||||
writer.addr,
|
||||
writer.id,
|
||||
)
|
||||
});
|
||||
|
||||
let mut ordered = Vec::<usize>::with_capacity(total);
|
||||
ordered.extend(sampled.iter().copied());
|
||||
for offset in 0..total {
|
||||
let idx = candidate_indices[(start + offset) % total];
|
||||
if seen.insert(idx) {
|
||||
ordered.push(idx);
|
||||
}
|
||||
}
|
||||
ordered
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use bytes::Bytes;
|
||||
|
||||
use crate::protocol::constants::*;
|
||||
|
||||
@@ -48,7 +49,7 @@ pub(crate) fn build_proxy_req_payload(
|
||||
data: &[u8],
|
||||
proxy_tag: Option<&[u8]>,
|
||||
proto_flags: u32,
|
||||
) -> Vec<u8> {
|
||||
) -> Bytes {
|
||||
let mut b = Vec::with_capacity(128 + data.len());
|
||||
|
||||
b.extend_from_slice(&RPC_PROXY_REQ_U32.to_le_bytes());
|
||||
@@ -85,7 +86,7 @@ pub(crate) fn build_proxy_req_payload(
|
||||
}
|
||||
|
||||
b.extend_from_slice(data);
|
||||
b
|
||||
Bytes::from(b)
|
||||
}
|
||||
|
||||
pub fn proto_flags_for_tag(tag: crate::protocol::constants::ProtoTag, has_proxy_tag: bool) -> u32 {
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
//! TCP Socket Configuration
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use std::collections::HashSet;
|
||||
#[cfg(target_os = "linux")]
|
||||
use std::fs;
|
||||
use std::io::Result;
|
||||
use std::net::{SocketAddr, IpAddr};
|
||||
@@ -44,6 +46,7 @@ pub fn configure_tcp_socket(
|
||||
pub fn configure_client_socket(
|
||||
stream: &TcpStream,
|
||||
keepalive_secs: u64,
|
||||
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
|
||||
ack_timeout_secs: u64,
|
||||
) -> Result<()> {
|
||||
let socket = socket2::SockRef::from(stream);
|
||||
@@ -65,17 +68,27 @@ pub fn configure_client_socket(
|
||||
// is implemented in relay_bidirectional instead
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
use std::io::{Error, ErrorKind};
|
||||
use std::os::unix::io::AsRawFd;
|
||||
|
||||
let fd = stream.as_raw_fd();
|
||||
let timeout_ms = (ack_timeout_secs * 1000) as libc::c_int;
|
||||
unsafe {
|
||||
let timeout_ms_u64 = ack_timeout_secs
|
||||
.checked_mul(1000)
|
||||
.ok_or_else(|| Error::new(ErrorKind::InvalidInput, "ack_timeout_secs is too large"))?;
|
||||
let timeout_ms = i32::try_from(timeout_ms_u64)
|
||||
.map_err(|_| Error::new(ErrorKind::InvalidInput, "ack_timeout_secs exceeds TCP_USER_TIMEOUT range"))?;
|
||||
|
||||
let rc = unsafe {
|
||||
libc::setsockopt(
|
||||
fd,
|
||||
libc::IPPROTO_TCP,
|
||||
libc::TCP_USER_TIMEOUT,
|
||||
&timeout_ms as *const _ as *const libc::c_void,
|
||||
&timeout_ms as *const libc::c_int as *const libc::c_void,
|
||||
std::mem::size_of::<libc::c_int>() as libc::socklen_t,
|
||||
);
|
||||
)
|
||||
};
|
||||
if rc != 0 {
|
||||
return Err(Error::last_os_error());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -373,6 +386,7 @@ fn listening_inodes_for_port(addr: SocketAddr) -> HashSet<u64> {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::ErrorKind;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::TcpListener;
|
||||
|
||||
#[tokio::test]
|
||||
@@ -396,6 +410,142 @@ mod tests {
|
||||
panic!("configure_tcp_socket failed: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_configure_client_socket() {
|
||||
let listener = match TcpListener::bind("127.0.0.1:0").await {
|
||||
Ok(l) => l,
|
||||
Err(e) if e.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(e) => panic!("bind failed: {e}"),
|
||||
};
|
||||
let addr = match listener.local_addr() {
|
||||
Ok(addr) => addr,
|
||||
Err(e) => panic!("local_addr failed: {e}"),
|
||||
};
|
||||
|
||||
let stream = match TcpStream::connect(addr).await {
|
||||
Ok(s) => s,
|
||||
Err(e) if e.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(e) => panic!("connect failed: {e}"),
|
||||
};
|
||||
|
||||
if let Err(e) = configure_client_socket(&stream, 30, 30) {
|
||||
if e.kind() == ErrorKind::PermissionDenied {
|
||||
return;
|
||||
}
|
||||
panic!("configure_client_socket failed: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_configure_client_socket_zero_ack_timeout() {
|
||||
let listener = match TcpListener::bind("127.0.0.1:0").await {
|
||||
Ok(l) => l,
|
||||
Err(e) if e.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(e) => panic!("bind failed: {e}"),
|
||||
};
|
||||
let addr = match listener.local_addr() {
|
||||
Ok(addr) => addr,
|
||||
Err(e) => panic!("local_addr failed: {e}"),
|
||||
};
|
||||
|
||||
let stream = match TcpStream::connect(addr).await {
|
||||
Ok(s) => s,
|
||||
Err(e) if e.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(e) => panic!("connect failed: {e}"),
|
||||
};
|
||||
|
||||
if let Err(e) = configure_client_socket(&stream, 30, 0) {
|
||||
if e.kind() == ErrorKind::PermissionDenied {
|
||||
return;
|
||||
}
|
||||
panic!("configure_client_socket with zero ack timeout failed: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_configure_client_socket_roundtrip_io() {
|
||||
let listener = match TcpListener::bind("127.0.0.1:0").await {
|
||||
Ok(l) => l,
|
||||
Err(e) if e.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(e) => panic!("bind failed: {e}"),
|
||||
};
|
||||
let addr = match listener.local_addr() {
|
||||
Ok(addr) => addr,
|
||||
Err(e) => panic!("local_addr failed: {e}"),
|
||||
};
|
||||
|
||||
let server_task = tokio::spawn(async move {
|
||||
let (mut accepted, _) = match listener.accept().await {
|
||||
Ok(v) => v,
|
||||
Err(e) => panic!("accept failed: {e}"),
|
||||
};
|
||||
let mut payload = [0u8; 4];
|
||||
if let Err(e) = accepted.read_exact(&mut payload).await {
|
||||
panic!("server read_exact failed: {e}");
|
||||
}
|
||||
if let Err(e) = accepted.write_all(b"pong").await {
|
||||
panic!("server write_all failed: {e}");
|
||||
}
|
||||
payload
|
||||
});
|
||||
|
||||
let mut stream = match TcpStream::connect(addr).await {
|
||||
Ok(s) => s,
|
||||
Err(e) if e.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(e) => panic!("connect failed: {e}"),
|
||||
};
|
||||
|
||||
if let Err(e) = configure_client_socket(&stream, 30, 30) {
|
||||
if e.kind() == ErrorKind::PermissionDenied {
|
||||
return;
|
||||
}
|
||||
panic!("configure_client_socket failed: {e}");
|
||||
}
|
||||
|
||||
if let Err(e) = stream.write_all(b"ping").await {
|
||||
panic!("client write_all failed: {e}");
|
||||
}
|
||||
|
||||
let mut reply = [0u8; 4];
|
||||
if let Err(e) = stream.read_exact(&mut reply).await {
|
||||
panic!("client read_exact failed: {e}");
|
||||
}
|
||||
assert_eq!(&reply, b"pong");
|
||||
|
||||
let server_seen = match server_task.await {
|
||||
Ok(value) => value,
|
||||
Err(e) => panic!("server task join failed: {e}"),
|
||||
};
|
||||
assert_eq!(&server_seen, b"ping");
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[tokio::test]
|
||||
async fn test_configure_client_socket_ack_timeout_overflow_rejected() {
|
||||
let listener = match TcpListener::bind("127.0.0.1:0").await {
|
||||
Ok(l) => l,
|
||||
Err(e) if e.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(e) => panic!("bind failed: {e}"),
|
||||
};
|
||||
let addr = match listener.local_addr() {
|
||||
Ok(addr) => addr,
|
||||
Err(e) => panic!("local_addr failed: {e}"),
|
||||
};
|
||||
|
||||
let stream = match TcpStream::connect(addr).await {
|
||||
Ok(s) => s,
|
||||
Err(e) if e.kind() == ErrorKind::PermissionDenied => return,
|
||||
Err(e) => panic!("connect failed: {e}"),
|
||||
};
|
||||
|
||||
let too_large_secs = (i32::MAX as u64 / 1000) + 1;
|
||||
let err = match configure_client_socket(&stream, 30, too_large_secs) {
|
||||
Ok(()) => panic!("expected overflow validation error"),
|
||||
Err(e) => e,
|
||||
};
|
||||
assert_eq!(err.kind(), ErrorKind::InvalidInput);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_ip() {
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::net::{SocketAddr, IpAddr};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
|
||||
use std::time::Duration;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::RwLock;
|
||||
@@ -165,8 +165,55 @@ pub enum UpstreamRouteKind {
|
||||
Socks5,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UpstreamApiDcSnapshot {
|
||||
pub dc: i16,
|
||||
pub latency_ema_ms: Option<f64>,
|
||||
pub ip_preference: IpPreference,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UpstreamApiItemSnapshot {
|
||||
pub upstream_id: usize,
|
||||
pub route_kind: UpstreamRouteKind,
|
||||
pub address: String,
|
||||
pub weight: u16,
|
||||
pub scopes: String,
|
||||
pub healthy: bool,
|
||||
pub fails: u32,
|
||||
pub last_check_age_secs: u64,
|
||||
pub effective_latency_ms: Option<f64>,
|
||||
pub dc: Vec<UpstreamApiDcSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct UpstreamApiSummarySnapshot {
|
||||
pub configured_total: usize,
|
||||
pub healthy_total: usize,
|
||||
pub unhealthy_total: usize,
|
||||
pub direct_total: usize,
|
||||
pub socks4_total: usize,
|
||||
pub socks5_total: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UpstreamApiSnapshot {
|
||||
pub summary: UpstreamApiSummarySnapshot,
|
||||
pub upstreams: Vec<UpstreamApiItemSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct UpstreamApiPolicySnapshot {
|
||||
pub connect_retry_attempts: u32,
|
||||
pub connect_retry_backoff_ms: u64,
|
||||
pub connect_budget_ms: u64,
|
||||
pub unhealthy_fail_threshold: u32,
|
||||
pub connect_failfast_hard_errors: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct UpstreamEgressInfo {
|
||||
pub upstream_id: usize,
|
||||
pub route_kind: UpstreamRouteKind,
|
||||
pub local_addr: Option<SocketAddr>,
|
||||
pub direct_bind_ip: Option<IpAddr>,
|
||||
@@ -188,8 +235,11 @@ pub struct UpstreamManager {
|
||||
upstreams: Arc<RwLock<Vec<UpstreamState>>>,
|
||||
connect_retry_attempts: u32,
|
||||
connect_retry_backoff: Duration,
|
||||
connect_budget: Duration,
|
||||
unhealthy_fail_threshold: u32,
|
||||
connect_failfast_hard_errors: bool,
|
||||
no_upstreams_warn_epoch_ms: Arc<AtomicU64>,
|
||||
no_healthy_warn_epoch_ms: Arc<AtomicU64>,
|
||||
stats: Arc<Stats>,
|
||||
}
|
||||
|
||||
@@ -198,6 +248,7 @@ impl UpstreamManager {
|
||||
configs: Vec<UpstreamConfig>,
|
||||
connect_retry_attempts: u32,
|
||||
connect_retry_backoff_ms: u64,
|
||||
connect_budget_ms: u64,
|
||||
unhealthy_fail_threshold: u32,
|
||||
connect_failfast_hard_errors: bool,
|
||||
stats: Arc<Stats>,
|
||||
@@ -211,12 +262,106 @@ impl UpstreamManager {
|
||||
upstreams: Arc::new(RwLock::new(states)),
|
||||
connect_retry_attempts: connect_retry_attempts.max(1),
|
||||
connect_retry_backoff: Duration::from_millis(connect_retry_backoff_ms),
|
||||
connect_budget: Duration::from_millis(connect_budget_ms.max(1)),
|
||||
unhealthy_fail_threshold: unhealthy_fail_threshold.max(1),
|
||||
connect_failfast_hard_errors,
|
||||
no_upstreams_warn_epoch_ms: Arc::new(AtomicU64::new(0)),
|
||||
no_healthy_warn_epoch_ms: Arc::new(AtomicU64::new(0)),
|
||||
stats,
|
||||
}
|
||||
}
|
||||
|
||||
fn now_epoch_ms() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64
|
||||
}
|
||||
|
||||
fn should_emit_warn(last_epoch_ms: &AtomicU64, cooldown_ms: u64) -> bool {
|
||||
let now_epoch_ms = Self::now_epoch_ms();
|
||||
let previous_epoch_ms = last_epoch_ms.load(Ordering::Relaxed);
|
||||
if now_epoch_ms.saturating_sub(previous_epoch_ms) < cooldown_ms {
|
||||
return false;
|
||||
}
|
||||
last_epoch_ms
|
||||
.compare_exchange(
|
||||
previous_epoch_ms,
|
||||
now_epoch_ms,
|
||||
Ordering::AcqRel,
|
||||
Ordering::Relaxed,
|
||||
)
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
pub fn try_api_snapshot(&self) -> Option<UpstreamApiSnapshot> {
|
||||
let guard = self.upstreams.try_read().ok()?;
|
||||
let now = std::time::Instant::now();
|
||||
|
||||
let mut summary = UpstreamApiSummarySnapshot {
|
||||
configured_total: guard.len(),
|
||||
..UpstreamApiSummarySnapshot::default()
|
||||
};
|
||||
let mut upstreams = Vec::with_capacity(guard.len());
|
||||
|
||||
for (idx, upstream) in guard.iter().enumerate() {
|
||||
if upstream.healthy {
|
||||
summary.healthy_total += 1;
|
||||
} else {
|
||||
summary.unhealthy_total += 1;
|
||||
}
|
||||
|
||||
let (route_kind, address) = match &upstream.config.upstream_type {
|
||||
UpstreamType::Direct { .. } => {
|
||||
summary.direct_total += 1;
|
||||
(UpstreamRouteKind::Direct, "direct".to_string())
|
||||
}
|
||||
UpstreamType::Socks4 { address, .. } => {
|
||||
summary.socks4_total += 1;
|
||||
(UpstreamRouteKind::Socks4, address.clone())
|
||||
}
|
||||
UpstreamType::Socks5 { address, .. } => {
|
||||
summary.socks5_total += 1;
|
||||
(UpstreamRouteKind::Socks5, address.clone())
|
||||
}
|
||||
};
|
||||
|
||||
let mut dc = Vec::with_capacity(NUM_DCS);
|
||||
for dc_idx in 0..NUM_DCS {
|
||||
dc.push(UpstreamApiDcSnapshot {
|
||||
dc: (dc_idx + 1) as i16,
|
||||
latency_ema_ms: upstream.dc_latency[dc_idx].get(),
|
||||
ip_preference: upstream.dc_ip_pref[dc_idx],
|
||||
});
|
||||
}
|
||||
|
||||
upstreams.push(UpstreamApiItemSnapshot {
|
||||
upstream_id: idx,
|
||||
route_kind,
|
||||
address,
|
||||
weight: upstream.config.weight,
|
||||
scopes: upstream.config.scopes.clone(),
|
||||
healthy: upstream.healthy,
|
||||
fails: upstream.fails,
|
||||
last_check_age_secs: now.saturating_duration_since(upstream.last_check).as_secs(),
|
||||
effective_latency_ms: upstream.effective_latency(None),
|
||||
dc,
|
||||
});
|
||||
}
|
||||
|
||||
Some(UpstreamApiSnapshot { summary, upstreams })
|
||||
}
|
||||
|
||||
pub fn api_policy_snapshot(&self) -> UpstreamApiPolicySnapshot {
|
||||
UpstreamApiPolicySnapshot {
|
||||
connect_retry_attempts: self.connect_retry_attempts,
|
||||
connect_retry_backoff_ms: self.connect_retry_backoff.as_millis() as u64,
|
||||
connect_budget_ms: self.connect_budget.as_millis() as u64,
|
||||
unhealthy_fail_threshold: self.unhealthy_fail_threshold,
|
||||
connect_failfast_hard_errors: self.connect_failfast_hard_errors,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn resolve_interface_addrs(name: &str, want_ipv6: bool) -> Vec<IpAddr> {
|
||||
use nix::ifaddrs::getifaddrs;
|
||||
@@ -416,12 +561,22 @@ impl UpstreamManager {
|
||||
.collect();
|
||||
|
||||
if filtered_upstreams.is_empty() {
|
||||
warn!(scope = scope, "No upstreams available! Using first (direct?)");
|
||||
if Self::should_emit_warn(
|
||||
self.no_upstreams_warn_epoch_ms.as_ref(),
|
||||
5_000,
|
||||
) {
|
||||
warn!(scope = scope, "No upstreams available! Using first (direct?)");
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
if healthy.is_empty() {
|
||||
warn!(scope = scope, "No healthy upstreams available! Using random.");
|
||||
if Self::should_emit_warn(
|
||||
self.no_healthy_warn_epoch_ms.as_ref(),
|
||||
5_000,
|
||||
) {
|
||||
warn!(scope = scope, "No healthy upstreams available! Using random.");
|
||||
}
|
||||
return Some(filtered_upstreams[rand::rng().gen_range(0..filtered_upstreams.len())]);
|
||||
}
|
||||
|
||||
@@ -498,11 +653,27 @@ impl UpstreamManager {
|
||||
let mut last_error: Option<ProxyError> = None;
|
||||
let mut attempts_used = 0u32;
|
||||
for attempt in 1..=self.connect_retry_attempts {
|
||||
let elapsed = connect_started_at.elapsed();
|
||||
if elapsed >= self.connect_budget {
|
||||
last_error = Some(ProxyError::ConnectionTimeout {
|
||||
addr: target.to_string(),
|
||||
});
|
||||
break;
|
||||
}
|
||||
let remaining_budget = self.connect_budget.saturating_sub(elapsed);
|
||||
let attempt_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS)
|
||||
.min(remaining_budget);
|
||||
if attempt_timeout.is_zero() {
|
||||
last_error = Some(ProxyError::ConnectionTimeout {
|
||||
addr: target.to_string(),
|
||||
});
|
||||
break;
|
||||
}
|
||||
attempts_used = attempt;
|
||||
self.stats.increment_upstream_connect_attempt_total();
|
||||
let start = Instant::now();
|
||||
match self
|
||||
.connect_via_upstream(&upstream, target, bind_rr.clone())
|
||||
.connect_via_upstream(idx, &upstream, target, bind_rr.clone(), attempt_timeout)
|
||||
.await
|
||||
{
|
||||
Ok((stream, egress)) => {
|
||||
@@ -609,9 +780,11 @@ impl UpstreamManager {
|
||||
|
||||
async fn connect_via_upstream(
|
||||
&self,
|
||||
upstream_id: usize,
|
||||
config: &UpstreamConfig,
|
||||
target: SocketAddr,
|
||||
bind_rr: Option<Arc<AtomicUsize>>,
|
||||
connect_timeout: Duration,
|
||||
) -> Result<(TcpStream, UpstreamEgressInfo)> {
|
||||
match &config.upstream_type {
|
||||
UpstreamType::Direct { interface, bind_addresses } => {
|
||||
@@ -640,7 +813,6 @@ impl UpstreamManager {
|
||||
let std_stream: std::net::TcpStream = socket.into();
|
||||
let stream = TcpStream::from_std(std_stream)?;
|
||||
|
||||
let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS);
|
||||
match tokio::time::timeout(connect_timeout, stream.writable()).await {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(e)) => return Err(ProxyError::Io(e)),
|
||||
@@ -658,6 +830,7 @@ impl UpstreamManager {
|
||||
Ok((
|
||||
stream,
|
||||
UpstreamEgressInfo {
|
||||
upstream_id,
|
||||
route_kind: UpstreamRouteKind::Direct,
|
||||
local_addr,
|
||||
direct_bind_ip: bind_ip,
|
||||
@@ -667,7 +840,6 @@ impl UpstreamManager {
|
||||
))
|
||||
},
|
||||
UpstreamType::Socks4 { address, interface, user_id } => {
|
||||
let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS);
|
||||
// Try to parse as SocketAddr first (IP:port), otherwise treat as hostname:port
|
||||
let mut stream = if let Ok(proxy_addr) = address.parse::<SocketAddr>() {
|
||||
// IP:port format - use socket with optional interface binding
|
||||
@@ -737,6 +909,7 @@ impl UpstreamManager {
|
||||
Ok((
|
||||
stream,
|
||||
UpstreamEgressInfo {
|
||||
upstream_id,
|
||||
route_kind: UpstreamRouteKind::Socks4,
|
||||
local_addr,
|
||||
direct_bind_ip: None,
|
||||
@@ -746,7 +919,6 @@ impl UpstreamManager {
|
||||
))
|
||||
},
|
||||
UpstreamType::Socks5 { address, interface, username, password } => {
|
||||
let connect_timeout = Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS);
|
||||
// Try to parse as SocketAddr first (IP:port), otherwise treat as hostname:port
|
||||
let mut stream = if let Ok(proxy_addr) = address.parse::<SocketAddr>() {
|
||||
// IP:port format - use socket with optional interface binding
|
||||
@@ -818,6 +990,7 @@ impl UpstreamManager {
|
||||
Ok((
|
||||
stream,
|
||||
UpstreamEgressInfo {
|
||||
upstream_id,
|
||||
route_kind: UpstreamRouteKind::Socks5,
|
||||
local_addr,
|
||||
direct_bind_ip: None,
|
||||
@@ -880,7 +1053,7 @@ impl UpstreamManager {
|
||||
|
||||
let result = tokio::time::timeout(
|
||||
Duration::from_secs(DC_PING_TIMEOUT_SECS),
|
||||
self.ping_single_dc(upstream_config, Some(bind_rr.clone()), addr_v6)
|
||||
self.ping_single_dc(*upstream_idx, upstream_config, Some(bind_rr.clone()), addr_v6)
|
||||
).await;
|
||||
|
||||
let ping_result = match result {
|
||||
@@ -931,7 +1104,7 @@ impl UpstreamManager {
|
||||
|
||||
let result = tokio::time::timeout(
|
||||
Duration::from_secs(DC_PING_TIMEOUT_SECS),
|
||||
self.ping_single_dc(upstream_config, Some(bind_rr.clone()), addr_v4)
|
||||
self.ping_single_dc(*upstream_idx, upstream_config, Some(bind_rr.clone()), addr_v4)
|
||||
).await;
|
||||
|
||||
let ping_result = match result {
|
||||
@@ -994,7 +1167,7 @@ impl UpstreamManager {
|
||||
}
|
||||
let result = tokio::time::timeout(
|
||||
Duration::from_secs(DC_PING_TIMEOUT_SECS),
|
||||
self.ping_single_dc(upstream_config, Some(bind_rr.clone()), addr)
|
||||
self.ping_single_dc(*upstream_idx, upstream_config, Some(bind_rr.clone()), addr)
|
||||
).await;
|
||||
|
||||
let ping_result = match result {
|
||||
@@ -1065,12 +1238,21 @@ impl UpstreamManager {
|
||||
|
||||
async fn ping_single_dc(
|
||||
&self,
|
||||
upstream_id: usize,
|
||||
config: &UpstreamConfig,
|
||||
bind_rr: Option<Arc<AtomicUsize>>,
|
||||
target: SocketAddr,
|
||||
) -> Result<f64> {
|
||||
let start = Instant::now();
|
||||
let _ = self.connect_via_upstream(config, target, bind_rr).await?;
|
||||
let _ = self
|
||||
.connect_via_upstream(
|
||||
upstream_id,
|
||||
config,
|
||||
target,
|
||||
bind_rr,
|
||||
Duration::from_secs(DC_PING_TIMEOUT_SECS),
|
||||
)
|
||||
.await?;
|
||||
Ok(start.elapsed().as_secs_f64() * 1000.0)
|
||||
}
|
||||
|
||||
@@ -1242,7 +1424,13 @@ impl UpstreamManager {
|
||||
let start = Instant::now();
|
||||
let result = tokio::time::timeout(
|
||||
Duration::from_secs(HEALTH_CHECK_CONNECT_TIMEOUT_SECS),
|
||||
self.connect_via_upstream(&config, endpoint, Some(bind_rr.clone())),
|
||||
self.connect_via_upstream(
|
||||
i,
|
||||
&config,
|
||||
endpoint,
|
||||
Some(bind_rr.clone()),
|
||||
Duration::from_secs(HEALTH_CHECK_CONNECT_TIMEOUT_SECS),
|
||||
),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
[Unit]
|
||||
Description=Telemt
|
||||
After=network.target
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/bin
|
||||
WorkingDirectory=/etc/telemt
|
||||
ExecStart=/bin/telemt /etc/telemt.toml
|
||||
Restart=on-failure
|
||||
LimitNOFILE=65536
|
||||
LimitNOFILE=262144
|
||||
TasksMax=8192
|
||||
MemoryAccounting=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
403
tools/aesdiag.py
Normal file
403
tools/aesdiag.py
Normal file
@@ -0,0 +1,403 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AES-CBC validation tool for telemt middle proxy logs with support for noop padding.
|
||||
|
||||
Parses log lines containing:
|
||||
- "ME diag: derived keys and handshake plaintext" (provides write_key, write_iv, hs_plain)
|
||||
- "ME diag: handshake ciphertext" (provides hs_cipher)
|
||||
|
||||
For each pair it:
|
||||
- Decrypts the ciphertext using the provided key and IV.
|
||||
- Compares the beginning of the decrypted data with hs_plain.
|
||||
- Attempts to identify the actual padding scheme (PKCS#7, zero padding, noop padding).
|
||||
- Re-encrypts with different paddings and reports mismatches block by block.
|
||||
- Accumulates statistics for final summary.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from Crypto.Cipher import AES
|
||||
|
||||
# Constants
|
||||
NOOP_FRAME = bytes([0x04, 0x00, 0x00, 0x00]) # noop frame used for padding
|
||||
|
||||
def hex_str_to_bytes(hex_str):
|
||||
"""Convert a hex string like 'aa bb cc' to bytes."""
|
||||
return bytes.fromhex(hex_str.replace(' ', ''))
|
||||
|
||||
def parse_params(line):
|
||||
"""Extract key=value pairs where value is a space-separated hex string."""
|
||||
pattern = r'(\w+)=((?:[0-9a-f]{2} )*[0-9a-f]{2})'
|
||||
return {key: val for key, val in re.findall(pattern, line)}
|
||||
|
||||
def pkcs7_pad(data, block_size=16):
|
||||
"""Apply PKCS#7 padding to the given data."""
|
||||
pad_len = block_size - (len(data) % block_size)
|
||||
if pad_len == 0:
|
||||
pad_len = block_size
|
||||
return data + bytes([pad_len]) * pad_len
|
||||
|
||||
def zero_pad(data, block_size=16):
|
||||
"""Pad with zeros to the next block boundary."""
|
||||
pad_len = block_size - (len(data) % block_size)
|
||||
if pad_len == block_size:
|
||||
return data # already full blocks, no zero padding needed
|
||||
return data + bytes(pad_len)
|
||||
|
||||
def noop_pad(data):
|
||||
"""
|
||||
Pad with minimal number of noop frames (b'\\x04\\x00\\x00\\x00')
|
||||
to reach a multiple of 16 bytes.
|
||||
"""
|
||||
block_size = 16
|
||||
frame_len = len(NOOP_FRAME) # 4
|
||||
remainder = len(data) % block_size
|
||||
if remainder == 0:
|
||||
return data # no padding needed
|
||||
# We need to add k frames such that (len(data) + k*frame_len) % block_size == 0
|
||||
# => k*frame_len ≡ -remainder (mod block_size)
|
||||
# Since frame_len=4 and block_size=16, we need k*4 ≡ (16-remainder) mod 16
|
||||
# k must be an integer in {1,2,3} (because 4*4=16 ≡0 mod16, so k=4 gives remainder 0, but then total increase=16,
|
||||
# but if remainder==0 we already handled; if remainder!=0, k=4 gives (len+16)%16 == remainder, not 0,
|
||||
# so k=4 doesn't solve unless remainder=0. Actually 4*4=16 ≡0, so k=4 gives (len+16)%16 = remainder, so still not 0.
|
||||
# The equation is k*4 ≡ (16-remainder) mod 16. Let r=16-remainder (1..15). Then k ≡ r*inv(4) mod 4? Since mod 16,
|
||||
# 4 has no inverse modulo 16 because gcd(4,16)=4. So solutions exist only if r is multiple of 4.
|
||||
# Therefore remainder must be 4,8,12 (so that r = 12,8,4). This matches the idea that noop padding is only added
|
||||
# when the plaintext length mod 16 is 4,8,12. In our logs it's always 44 mod16=12, so r=4, so k=1 works.
|
||||
# For safety, we compute k as (block_size - remainder) // frame_len, but this only works if that value is integer.
|
||||
need = block_size - remainder
|
||||
if need % frame_len != 0:
|
||||
# This shouldn't happen by protocol, but if it does, fall back to adding full blocks of noop until multiple.
|
||||
# We'll add ceil(need/frame_len) frames.
|
||||
k = (need + frame_len - 1) // frame_len
|
||||
else:
|
||||
k = need // frame_len
|
||||
return data + NOOP_FRAME * k
|
||||
|
||||
def unpad_pkcs7(data):
|
||||
"""Remove PKCS#7 padding (assumes correct padding)."""
|
||||
if not data:
|
||||
return data
|
||||
pad_len = data[-1]
|
||||
if pad_len < 1 or pad_len > 16:
|
||||
return data # not valid PKCS#7, return as is
|
||||
# Check that all padding bytes are equal to pad_len
|
||||
if all(b == pad_len for b in data[-pad_len:]):
|
||||
return data[:-pad_len]
|
||||
return data
|
||||
|
||||
def is_noop_padded(decrypted, plain_log):
|
||||
"""
|
||||
Check if the extra bytes after plain_log in decrypted consist of one or more NOOP_FRAMEs.
|
||||
Returns True if they do, False otherwise.
|
||||
"""
|
||||
extra = decrypted[len(plain_log):]
|
||||
if len(extra) == 0:
|
||||
return False
|
||||
# Split into chunks of 4
|
||||
if len(extra) % 4 != 0:
|
||||
return False
|
||||
for i in range(0, len(extra), 4):
|
||||
if extra[i:i+4] != NOOP_FRAME:
|
||||
return False
|
||||
return True
|
||||
|
||||
def main():
|
||||
derived_list = [] # entries from "derived keys and handshake plaintext"
|
||||
cipher_list = [] # entries from "handshake ciphertext"
|
||||
|
||||
for line in sys.stdin:
|
||||
if 'ME diag: derived keys and handshake plaintext' in line:
|
||||
params = parse_params(line)
|
||||
if all(k in params for k in ('write_key', 'write_iv', 'hs_plain')):
|
||||
derived_list.append(params)
|
||||
elif 'ME diag: handshake ciphertext' in line:
|
||||
params = parse_params(line)
|
||||
if 'hs_cipher' in params:
|
||||
cipher_list.append(params)
|
||||
|
||||
# Warn about count mismatch but process as many pairs as possible
|
||||
n_pairs = min(len(derived_list), len(cipher_list))
|
||||
if len(derived_list) != len(cipher_list):
|
||||
print(f"\n[WARN] Number of derived entries ({len(derived_list)}) "
|
||||
f"differs from cipher entries ({len(cipher_list)}). "
|
||||
f"Processing first {n_pairs} pairs.\n")
|
||||
|
||||
# Statistics accumulators
|
||||
stats = {
|
||||
'total': n_pairs,
|
||||
'key_length_ok': 0,
|
||||
'iv_length_ok': 0,
|
||||
'cipher_aligned': 0,
|
||||
'decryption_match_start': 0, # first bytes equal hs_plain
|
||||
'pkcs7_after_unpad_matches': 0, # after removing PKCS7, equals hs_plain
|
||||
'extra_bytes_all_zero': 0, # extra bytes after hs_plain are zero
|
||||
'extra_bytes_noop': 0, # extra bytes are noop frames
|
||||
'pkcs7_encrypt_ok': 0, # re-encryption with PKCS7 matches ciphertext
|
||||
'zero_encrypt_ok': 0, # re-encryption with zero padding matches
|
||||
'noop_encrypt_ok': 0, # re-encryption with noop padding matches
|
||||
'no_padding_encrypt_ok': 0, # only if plaintext multiple of 16 and matches
|
||||
'no_padding_applicable': 0, # number of tests where plaintext len %16 ==0
|
||||
}
|
||||
|
||||
detailed_results = [] # store per-test summary for final heuristic
|
||||
|
||||
for idx, (der, ciph) in enumerate(zip(derived_list[:n_pairs], cipher_list[:n_pairs]), 1):
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Test #{idx}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Local stats for this test
|
||||
test_stats = defaultdict(bool)
|
||||
|
||||
try:
|
||||
key = hex_str_to_bytes(der['write_key'])
|
||||
iv = hex_str_to_bytes(der['write_iv'])
|
||||
plain_log = hex_str_to_bytes(der['hs_plain'])
|
||||
ciphertext = hex_str_to_bytes(ciph['hs_cipher'])
|
||||
|
||||
# Basic sanity checks
|
||||
print(f"[INFO] Key length : {len(key)} bytes (expected 32)")
|
||||
print(f"[INFO] IV length : {len(iv)} bytes (expected 16)")
|
||||
print(f"[INFO] hs_plain length : {len(plain_log)} bytes")
|
||||
print(f"[INFO] hs_cipher length : {len(ciphertext)} bytes")
|
||||
|
||||
if len(key) == 32:
|
||||
stats['key_length_ok'] += 1
|
||||
test_stats['key_ok'] = True
|
||||
else:
|
||||
print("[WARN] Key length is not 32 bytes – AES-256 requires 32-byte key.")
|
||||
|
||||
if len(iv) == 16:
|
||||
stats['iv_length_ok'] += 1
|
||||
test_stats['iv_ok'] = True
|
||||
else:
|
||||
print("[WARN] IV length is not 16 bytes – AES-CBC requires 16-byte IV.")
|
||||
|
||||
if len(ciphertext) % 16 == 0:
|
||||
stats['cipher_aligned'] += 1
|
||||
test_stats['cipher_aligned'] = True
|
||||
else:
|
||||
print("[ERROR] Ciphertext length is not a multiple of 16 – invalid AES-CBC block alignment.")
|
||||
# Skip further processing for this test
|
||||
detailed_results.append(test_stats)
|
||||
continue
|
||||
|
||||
# --- Decryption test ---
|
||||
cipher_dec = AES.new(key, AES.MODE_CBC, iv)
|
||||
decrypted = cipher_dec.decrypt(ciphertext)
|
||||
print(f"[INFO] Decrypted ({len(decrypted)} bytes): {decrypted.hex()}")
|
||||
|
||||
# Compare beginning with hs_plain
|
||||
match_len = min(len(plain_log), len(decrypted))
|
||||
if decrypted[:match_len] == plain_log[:match_len]:
|
||||
print(f"[OK] First {match_len} bytes match hs_plain.")
|
||||
stats['decryption_match_start'] += 1
|
||||
test_stats['decrypt_start_ok'] = True
|
||||
else:
|
||||
print(f"[FAIL] First bytes do NOT match hs_plain.")
|
||||
for i in range(match_len):
|
||||
if decrypted[i] != plain_log[i]:
|
||||
print(f" First mismatch at byte {i}: hs_plain={plain_log[i]:02x}, decrypted={decrypted[i]:02x}")
|
||||
break
|
||||
test_stats['decrypt_start_ok'] = False
|
||||
|
||||
# --- Try to identify actual padding ---
|
||||
# Remove possible PKCS#7 padding from decrypted data
|
||||
decrypted_unpadded = unpad_pkcs7(decrypted)
|
||||
if decrypted_unpadded != decrypted:
|
||||
print(f"[INFO] After removing PKCS#7 padding: {len(decrypted_unpadded)} bytes left.")
|
||||
if decrypted_unpadded == plain_log:
|
||||
print("[OK] Decrypted data with PKCS#7 removed exactly matches hs_plain.")
|
||||
stats['pkcs7_after_unpad_matches'] += 1
|
||||
test_stats['pkcs7_unpad_matches'] = True
|
||||
else:
|
||||
print("[INFO] Decrypted (PKCS#7 removed) does NOT match hs_plain.")
|
||||
test_stats['pkcs7_unpad_matches'] = False
|
||||
else:
|
||||
print("[INFO] No valid PKCS#7 padding detected in decrypted data.")
|
||||
test_stats['pkcs7_unpad_matches'] = False
|
||||
|
||||
# Check if the extra bytes after hs_plain in decrypted are all zero (zero padding)
|
||||
extra = decrypted[len(plain_log):]
|
||||
if extra and all(b == 0 for b in extra):
|
||||
print("[INFO] Extra bytes after hs_plain are all zeros – likely zero padding.")
|
||||
stats['extra_bytes_all_zero'] += 1
|
||||
test_stats['extra_zero'] = True
|
||||
else:
|
||||
test_stats['extra_zero'] = False
|
||||
|
||||
# Check for noop padding in extra bytes
|
||||
if is_noop_padded(decrypted, plain_log):
|
||||
print(f"[OK] Extra bytes after hs_plain consist of noop frames ({NOOP_FRAME.hex()}).")
|
||||
stats['extra_bytes_noop'] += 1
|
||||
test_stats['extra_noop'] = True
|
||||
else:
|
||||
test_stats['extra_noop'] = False
|
||||
if extra:
|
||||
print(f"[INFO] Extra bytes after hs_plain (hex): {extra.hex()}")
|
||||
|
||||
# --- Re-encryption tests ---
|
||||
# PKCS#7
|
||||
padded_pkcs7 = pkcs7_pad(plain_log)
|
||||
cipher_enc = AES.new(key, AES.MODE_CBC, iv)
|
||||
computed_pkcs7 = cipher_enc.encrypt(padded_pkcs7)
|
||||
if computed_pkcs7 == ciphertext:
|
||||
print("[OK] PKCS#7 padding produces the expected ciphertext.")
|
||||
stats['pkcs7_encrypt_ok'] += 1
|
||||
test_stats['pkcs7_enc_ok'] = True
|
||||
else:
|
||||
print("[FAIL] PKCS#7 padding does NOT match the ciphertext.")
|
||||
test_stats['pkcs7_enc_ok'] = False
|
||||
# Show block where first difference occurs
|
||||
block_size = 16
|
||||
for blk in range(len(ciphertext)//block_size):
|
||||
start = blk*block_size
|
||||
exp = ciphertext[start:start+block_size]
|
||||
comp = computed_pkcs7[start:start+block_size]
|
||||
if exp != comp:
|
||||
print(f" First difference in block {blk}:")
|
||||
print(f" expected : {exp.hex()}")
|
||||
print(f" computed : {comp.hex()}")
|
||||
break
|
||||
|
||||
# Zero padding
|
||||
padded_zero = zero_pad(plain_log)
|
||||
# Ensure multiple of 16
|
||||
if len(padded_zero) % 16 != 0:
|
||||
padded_zero += bytes(16 - (len(padded_zero)%16))
|
||||
cipher_enc_zero = AES.new(key, AES.MODE_CBC, iv)
|
||||
computed_zero = cipher_enc_zero.encrypt(padded_zero)
|
||||
if computed_zero == ciphertext:
|
||||
print("[OK] Zero padding produces the expected ciphertext.")
|
||||
stats['zero_encrypt_ok'] += 1
|
||||
test_stats['zero_enc_ok'] = True
|
||||
else:
|
||||
print("[INFO] Zero padding does NOT match (expected, unless log used PKCS#7).")
|
||||
test_stats['zero_enc_ok'] = False
|
||||
|
||||
# Noop padding
|
||||
padded_noop = noop_pad(plain_log)
|
||||
# Ensure multiple of 16 (noop_pad already returns multiple of 16)
|
||||
cipher_enc_noop = AES.new(key, AES.MODE_CBC, iv)
|
||||
computed_noop = cipher_enc_noop.encrypt(padded_noop)
|
||||
if computed_noop == ciphertext:
|
||||
print("[OK] Noop padding produces the expected ciphertext.")
|
||||
stats['noop_encrypt_ok'] += 1
|
||||
test_stats['noop_enc_ok'] = True
|
||||
else:
|
||||
print("[FAIL] Noop padding does NOT match the ciphertext.")
|
||||
test_stats['noop_enc_ok'] = False
|
||||
# Show block difference if needed
|
||||
for blk in range(len(ciphertext)//16):
|
||||
start = blk*16
|
||||
if computed_noop[start:start+16] != ciphertext[start:start+16]:
|
||||
print(f" First difference in block {blk}:")
|
||||
print(f" expected : {ciphertext[start:start+16].hex()}")
|
||||
print(f" computed : {computed_noop[start:start+16].hex()}")
|
||||
break
|
||||
|
||||
# No padding (only possible if plaintext is already multiple of 16)
|
||||
if len(plain_log) % 16 == 0:
|
||||
stats['no_padding_applicable'] += 1
|
||||
cipher_enc_nopad = AES.new(key, AES.MODE_CBC, iv)
|
||||
computed_nopad = cipher_enc_nopad.encrypt(plain_log)
|
||||
if computed_nopad == ciphertext:
|
||||
print("[OK] No padding (plaintext multiple of 16) matches.")
|
||||
stats['no_padding_encrypt_ok'] += 1
|
||||
test_stats['no_pad_enc_ok'] = True
|
||||
else:
|
||||
print("[INFO] No padding does NOT match.")
|
||||
test_stats['no_pad_enc_ok'] = False
|
||||
else:
|
||||
print("[INFO] Skipping no‑padding test because plaintext length is not a multiple of 16.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[EXCEPTION] {e}")
|
||||
test_stats['exception'] = True
|
||||
|
||||
detailed_results.append(test_stats)
|
||||
|
||||
# --- Final statistics and heuristic summary ---
|
||||
print("\n" + "="*60)
|
||||
print("STATISTICS SUMMARY")
|
||||
print("="*60)
|
||||
print(f"Total tests processed : {stats['total']}")
|
||||
print(f"Key length OK (32) : {stats['key_length_ok']}/{stats['total']}")
|
||||
print(f"IV length OK (16) : {stats['iv_length_ok']}/{stats['total']}")
|
||||
print(f"Ciphertext 16-byte aligned : {stats['cipher_aligned']}/{stats['total']}")
|
||||
print(f"Decryption starts with hs_plain : {stats['decryption_match_start']}/{stats['total']}")
|
||||
print(f"After PKCS#7 removal matches : {stats['pkcs7_after_unpad_matches']}/{stats['total']}")
|
||||
print(f"Extra bytes after hs_plain are 0 : {stats['extra_bytes_all_zero']}/{stats['total']}")
|
||||
print(f"Extra bytes are noop frames : {stats['extra_bytes_noop']}/{stats['total']}")
|
||||
print(f"PKCS#7 re-encryption OK : {stats['pkcs7_encrypt_ok']}/{stats['total']}")
|
||||
print(f"Zero padding re-encryption OK : {stats['zero_encrypt_ok']}/{stats['total']}")
|
||||
print(f"Noop padding re-encryption OK : {stats['noop_encrypt_ok']}/{stats['total']}")
|
||||
if stats['no_padding_applicable'] > 0:
|
||||
print(f"No-padding applicable tests : {stats['no_padding_applicable']}")
|
||||
print(f"No-padding re-encryption OK : {stats['no_padding_encrypt_ok']}/{stats['no_padding_applicable']}")
|
||||
|
||||
# Heuristic: determine most likely padding
|
||||
print("\n" + "="*60)
|
||||
print("HEURISTIC CONCLUSION")
|
||||
print("="*60)
|
||||
|
||||
if stats['decryption_match_start'] == stats['total']:
|
||||
print("✓ All tests: first bytes of decrypted data match hs_plain → keys and IV are correct.")
|
||||
else:
|
||||
print("✗ Some tests: first bytes mismatch → possible key/IV issues or corrupted ciphertext.")
|
||||
|
||||
# Guess padding based on re-encryption success and extra bytes
|
||||
candidates = []
|
||||
if stats['pkcs7_encrypt_ok'] == stats['total']:
|
||||
candidates.append("PKCS#7")
|
||||
if stats['zero_encrypt_ok'] == stats['total']:
|
||||
candidates.append("zero padding")
|
||||
if stats['noop_encrypt_ok'] == stats['total']:
|
||||
candidates.append("noop padding")
|
||||
if stats['no_padding_applicable'] == stats['total'] and stats['no_padding_encrypt_ok'] == stats['total']:
|
||||
candidates.append("no padding")
|
||||
|
||||
if len(candidates) == 1:
|
||||
print(f"✓ All tests consistent with padding scheme: {candidates[0]}.")
|
||||
elif len(candidates) > 1:
|
||||
print(f"⚠ Multiple padding schemes succeed in all tests: {', '.join(candidates)}. This is unusual.")
|
||||
else:
|
||||
# No scheme succeeded in all tests – look at ratios
|
||||
print("Mixed padding results:")
|
||||
total = stats['total']
|
||||
pkcs7_ratio = stats['pkcs7_encrypt_ok'] / total if total else 0
|
||||
zero_ratio = stats['zero_encrypt_ok'] / total if total else 0
|
||||
noop_ratio = stats['noop_encrypt_ok'] / total if total else 0
|
||||
print(f" PKCS#7 success = {stats['pkcs7_encrypt_ok']}/{total} ({pkcs7_ratio*100:.1f}%)")
|
||||
print(f" Zero success = {stats['zero_encrypt_ok']}/{total} ({zero_ratio*100:.1f}%)")
|
||||
print(f" Noop success = {stats['noop_encrypt_ok']}/{total} ({noop_ratio*100:.1f}%)")
|
||||
|
||||
if noop_ratio > max(pkcs7_ratio, zero_ratio):
|
||||
print("→ Noop padding is most frequent. Check if extra bytes are indeed noop frames.")
|
||||
elif pkcs7_ratio > zero_ratio:
|
||||
print("→ PKCS#7 is most frequent, but fails in some tests.")
|
||||
elif zero_ratio > pkcs7_ratio:
|
||||
print("→ Zero padding is most frequent, but fails in some tests.")
|
||||
else:
|
||||
print("→ No clear winner; possibly a different padding scheme or random data.")
|
||||
|
||||
# Additional heuristics based on extra bytes
|
||||
if stats['extra_bytes_noop'] == stats['total']:
|
||||
print("✓ All tests: extra bytes after hs_plain are noop frames → strongly indicates noop padding.")
|
||||
if stats['extra_bytes_all_zero'] == stats['total']:
|
||||
print("✓ All tests: extra bytes are zeros → suggests zero padding.")
|
||||
|
||||
# Final health check
|
||||
if (stats['decryption_match_start'] == stats['total'] and
|
||||
(stats['pkcs7_encrypt_ok'] == stats['total'] or
|
||||
stats['zero_encrypt_ok'] == stats['total'] or
|
||||
stats['noop_encrypt_ok'] == stats['total'] or
|
||||
stats['no_padding_encrypt_ok'] == stats['no_padding_applicable'] == stats['total'])):
|
||||
print("\n✅ OVERALL: All tests consistent. The encryption parameters and padding are correct.")
|
||||
else:
|
||||
print("\n⚠️ OVERALL: Inconsistencies detected. Review the detailed output for failing tests.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -47,6 +47,54 @@ zabbix_export:
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Server connections'
|
||||
- uuid: 2af8ff0f27e4408db3f9798dc3141457
|
||||
name: 'Full forensic desync logs emitted'
|
||||
type: DEPENDENT
|
||||
key: telemt.desync_full_logged_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_desync_full_logged_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: f4439948a49f4b1d85c3eeee963259bc
|
||||
name: 'Suppressed desync forensic events'
|
||||
type: DEPENDENT
|
||||
key: telemt.desync_suppressed_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_desync_suppressed_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 721627b8c10a414a82be1e08873604c1
|
||||
name: 'Total crypto-desync detections'
|
||||
type: DEPENDENT
|
||||
key: telemt.desync_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_desync_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 1618272cf68e44509425f5fab029db7b
|
||||
name: 'Handshake timeouts total'
|
||||
type: DEPENDENT
|
||||
@@ -64,6 +112,152 @@ zabbix_export:
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Server connections'
|
||||
- uuid: 4e5c0d10a4494c959445b4cd7a2e696e
|
||||
name: 'ME CRC mismatches'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_crc_mismatch_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_crc_mismatch_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Middle-End connections'
|
||||
- uuid: 21a4a48b6e98457d87c56c3ae7b56c55
|
||||
name: 'ME endpoint quarantines due to rapid flaps'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_endpoint_quarantine_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_endpoint_quarantine_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: c8ffc30dc3d94a6d9085ac79413fbdd6
|
||||
name: 'Runtime ME writer floor policy mode'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_floor_mode
|
||||
delay: '0'
|
||||
value_type: TEXT
|
||||
trends: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- 'telemt_me_floor_mode == 1'
|
||||
- label
|
||||
- mode
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 4814b52d5d184f63b64654e7635bdf6a
|
||||
name: 'ME handshake rejects from upstream'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_handshake_reject_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_handshake_reject_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 72d11caecefb4472b6c3e07f1ee90053
|
||||
name: 'Hardswap cycles that reused an existing pending generation'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_hardswap_pending_reuse_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_hardswap_pending_reuse_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 447030854e8840a393874f54e25861d5
|
||||
name: 'Pending hardswap generations reset by TTL expiration'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_hardswap_pending_ttl_expired_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_hardswap_pending_ttl_expired_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 47f55dd7d9394405b1c0eba6e6eb3e5c
|
||||
name: 'ME idle writers closed by peer'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_idle_close_by_peer_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_idle_close_by_peer_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 9e4598efbfe246fab9360270002b0cfa
|
||||
name: 'ME KDF input drift detections'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_kdf_drift_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_kdf_drift_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 565cc9780c5541bfb7acbb1f4973b5fc
|
||||
name: 'ME KDF client-port changes with stable non-port material'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_kdf_port_only_drift_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_kdf_port_only_drift_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: fb95391c7f894e3eb6984b92885813d2
|
||||
name: 'ME keepalive send failures'
|
||||
type: DEPENDENT
|
||||
@@ -81,6 +275,22 @@ zabbix_export:
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Middle-End connections'
|
||||
- uuid: 7b5995401195430e9f9e02e5dd8c3313
|
||||
name: 'ME keepalive pong replies'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_keepalive_pong_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_keepalive_pong_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Middle-End connections'
|
||||
- uuid: fb95391c7f894e3eb6984b92885813c2
|
||||
name: 'ME keepalive frames sent'
|
||||
type: DEPENDENT
|
||||
@@ -98,6 +308,38 @@ zabbix_export:
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Middle-End connections'
|
||||
- uuid: da5af5fd691d4f40bc6cad78b4758eac
|
||||
name: 'ME keepalive ping timeouts'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_keepalive_timeout_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_keepalive_timeout_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Middle-End connections'
|
||||
- uuid: 50b45e494d584a7b86fca8b80c727411
|
||||
name: 'ME reader EOF terminations'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_reader_eof_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_reader_eof_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: fb95391c7f894e3eb6984b92885811a2
|
||||
name: 'ME reconnect attempts'
|
||||
type: DEPENDENT
|
||||
@@ -132,6 +374,470 @@ zabbix_export:
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Middle-End connections'
|
||||
- uuid: 6288b537b7964aadb8a483abd716855a
|
||||
name: 'Immediate ME refill failures'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_refill_failed_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_refill_failed_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 8450bdb48f9b4505beb8fdfc665b37c5
|
||||
name: 'Immediate ME refill skips due to inflight dedup'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_refill_skipped_inflight_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_refill_skipped_inflight_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: cb192264c03a40578140863970333515
|
||||
name: 'Immediate ME refill runs started'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_refill_triggered_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_refill_triggered_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 8f46b374332848fba0daba72e17eaad0
|
||||
name: 'ME route drops: channel closed'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_route_drop_channel_closed_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_route_drop_channel_closed_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Middle-End connections'
|
||||
- uuid: de5fa7a316554d099bcf5e000b33bfed
|
||||
name: 'ME route drops: no conn'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_route_drop_no_conn_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_route_drop_no_conn_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Middle-End connections'
|
||||
- uuid: d9e1630ce38946f7a8d179187793f12c
|
||||
name: 'ME route drops: queue full by adaptive profile'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_route_drop_queue_full_profile_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- 'telemt_me_route_drop_queue_full_profile_total == 1'
|
||||
- label
|
||||
- profile
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: d5caefb8978e4f3eac4dcdecd4655c46
|
||||
name: 'ME route drops: queue full'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_route_drop_queue_full_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_route_drop_queue_full_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: f682298c2dfc46dda45771a58faa9ffa
|
||||
name: 'Service RPC_CLOSE_EXT sent after activity signals'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_rpc_proxy_req_signal_close_sent_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_rpc_proxy_req_signal_close_sent_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 5db4bdc93959473eade9281c221e34b6
|
||||
name: 'Service RPC_PROXY_REQ activity signal failures'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_rpc_proxy_req_signal_failed_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_rpc_proxy_req_signal_failed_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 4e75611bc3854415b63a1863e9bf176f
|
||||
name: 'Service RPC_PROXY_REQ responses observed'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_rpc_proxy_req_signal_response_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_rpc_proxy_req_signal_response_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: ecbffb29f2784839bea0ce2a38393438
|
||||
name: 'Service RPC_PROXY_REQ activity signals sent'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_rpc_proxy_req_signal_sent_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_rpc_proxy_req_signal_sent_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 078eff3deeec435597f0c531457bb906
|
||||
name: 'Service RPC_PROXY_REQ skipped due to missing writer metadata'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_rpc_proxy_req_signal_skipped_no_meta_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_rpc_proxy_req_signal_skipped_no_meta_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 7429ffbd94a340d7a600bc1690eb57e7
|
||||
name: 'ME sequence mismatches'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_seq_mismatch_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_seq_mismatch_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 0f1f77ae34df4a48b36ad263359b5ad3
|
||||
name: 'Single-endpoint DC outage transitions to active state'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_single_endpoint_outage_enter_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_single_endpoint_outage_enter_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 63d44ef672ff4df288914eb98f6fa72c
|
||||
name: 'Single-endpoint DC outage recovery transitions'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_single_endpoint_outage_exit_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_single_endpoint_outage_exit_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 1b72ff95f1ba4fb2924aa3a129b22f4d
|
||||
name: 'Reconnect attempts performed during single-endpoint outages'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_single_endpoint_outage_reconnect_attempt_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_single_endpoint_outage_reconnect_attempt_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 466bb352d55946a0bb78efc63e1ed71e
|
||||
name: 'Successful reconnect attempts during single-endpoint outages'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_single_endpoint_outage_reconnect_success_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_single_endpoint_outage_reconnect_success_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 295b4a519a4d46f7b1ddbdf5b5268751
|
||||
name: 'Outage reconnect attempts that bypassed quarantine'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_single_endpoint_quarantine_bypass_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_single_endpoint_quarantine_bypass_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: bffa4861f83f4445bb0b2259e100e04c
|
||||
name: 'Shadow rotations skipped because endpoint is quarantined'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_single_endpoint_shadow_rotate_skipped_quarantine_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_single_endpoint_shadow_rotate_skipped_quarantine_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: f80ce02b50824f8ea0ddabac9ff97757
|
||||
name: 'Successful periodic shadow rotations for single-endpoint DC groups'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_single_endpoint_shadow_rotate_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_single_endpoint_shadow_rotate_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: bf2a0ff89c314f78904aa43351601111
|
||||
name: 'Total ME writer removals'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_writer_removed_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_writer_removed_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 0d12ea02187745eba55498dfb16daa5c
|
||||
name: 'Unexpected writer removals not yet compensated by restore'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_writer_removed_unexpected_minus_restored_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_writer_removed_unexpected_minus_restored_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 644278e7f87947e1a49483ba4487e32b
|
||||
name: 'Unexpected ME writer removals that triggered refill'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_writer_removed_unexpected_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_writer_removed_unexpected_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: a6c24dfc85d643dab1c81fc1e63fe3cc
|
||||
name: 'Refilled ME writer restored via fallback endpoint'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_writer_restored_fallback_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_writer_restored_fallback_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: d7d0a78ca6da4bb9b4a0991fd83149cf
|
||||
name: 'Refilled ME writer restored on the same endpoint'
|
||||
type: DEPENDENT
|
||||
key: telemt.me_writer_restored_same_endpoint_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_me_writer_restored_same_endpoint_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: beb906ab89564cf9adfbb7b1d4553c44
|
||||
name: 'Active draining ME writers'
|
||||
type: DEPENDENT
|
||||
key: telemt.pool_drain_active
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_pool_drain_active
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 2f0926e00d7a4e5aa1783cb33b1192ea
|
||||
name: 'Forced close events for draining writers'
|
||||
type: DEPENDENT
|
||||
key: telemt.pool_force_close_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_pool_force_close_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 70d0b4da6079435ebe978e99bda8f1d3
|
||||
name: 'Stale writer fallback picks for new binds'
|
||||
type: DEPENDENT
|
||||
key: telemt.pool_stale_pick_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_pool_stale_pick_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 8a1d240b9b554905a8add9bf730bf1f4
|
||||
name: 'Successful ME pool swaps'
|
||||
type: DEPENDENT
|
||||
key: telemt.pool_swap_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_pool_swap_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 991b1858e3f94b3098ff0f84859efc41
|
||||
name: 'Prometheus metrics'
|
||||
type: HTTP_AGENT
|
||||
@@ -139,11 +845,158 @@ zabbix_export:
|
||||
value_type: TEXT
|
||||
trends: '0'
|
||||
url: '{$TELEMT_URL}'
|
||||
- uuid: cef2547bb9464d10b11b6c19beac089d
|
||||
name: 'Invalid secure frame lengths'
|
||||
type: DEPENDENT
|
||||
key: telemt.secure_padding_invalid_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_secure_padding_invalid_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: c164d7b59bdc4429a23b908558de8cf4
|
||||
name: 'Runtime core telemetry switch'
|
||||
type: DEPENDENT
|
||||
key: telemt.telemetry_core_enabled
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_telemetry_core_enabled
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: ff16438417d842178d26033d13520833
|
||||
name: 'Runtime ME telemetry level flag'
|
||||
type: DEPENDENT
|
||||
key: telemt.telemetry_me_level
|
||||
delay: '0'
|
||||
value_type: TEXT
|
||||
trends: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- 'telemt_telemetry_me_level == 1'
|
||||
- label
|
||||
- level
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 9fec0bb7c3c84ada96668b74d5849556
|
||||
name: 'Runtime per-user telemetry switch'
|
||||
type: DEPENDENT
|
||||
key: telemt.telemetry_user_enabled
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_telemetry_user_enabled
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 378b765aa7bc4a4ea87d3bc876c50d12
|
||||
name: 'User-labeled metric series suppression flag'
|
||||
type: DEPENDENT
|
||||
key: telemt.telemetry_user_series_suppressed
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_telemetry_user_series_suppressed
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 17972d992fa84fc1b53fdefed123ccd8
|
||||
name: 'Upstream connect attempts across all requests'
|
||||
type: DEPENDENT
|
||||
key: telemt.upstream_connect_attempt_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_upstream_connect_attempt_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 38627dd1cb7145e180d111bdee1d2c23
|
||||
name: 'Hard errors that triggered upstream connect failfast'
|
||||
type: DEPENDENT
|
||||
key: telemt.upstream_connect_failfast_hard_error_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_upstream_connect_failfast_hard_error_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 0ffd4c35b6734c83bd77c59f30bf3246
|
||||
name: 'Failed upstream connect request cycles'
|
||||
type: DEPENDENT
|
||||
key: telemt.upstream_connect_fail_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_upstream_connect_fail_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: 7da255f4f38c4095921bc876d16d3586
|
||||
name: 'Successful upstream connect request cycles'
|
||||
type: DEPENDENT
|
||||
key: telemt.upstream_connect_success_total
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- telemt_upstream_connect_success_total
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Telemt other'
|
||||
- uuid: fb95391c7f894e3eb6984b92885813b2
|
||||
name: 'Telemt Uptime'
|
||||
type: DEPENDENT
|
||||
key: telemt.uptime
|
||||
delay: '0'
|
||||
value_type: FLOAT
|
||||
trends: '0'
|
||||
units: s
|
||||
preprocessing:
|
||||
@@ -180,6 +1033,56 @@ zabbix_export:
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Users connections'
|
||||
- uuid: f7ad02d1635542b584bba5941375ae41
|
||||
name: 'Current number of unique active IPs by {#TELEMT_USER}'
|
||||
type: DEPENDENT
|
||||
key: 'telemt.ips_current_[{#TELEMT_USER}]'
|
||||
delay: '0'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- 'telemt_user_unique_ips_current{user="{#TELEMT_USER}"}'
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Users IPs'
|
||||
- uuid: 100b09bf1cff420495c5c105bdb0af6c
|
||||
name: 'Configured unique IP limit to {#TELEMT_USER}'
|
||||
type: DEPENDENT
|
||||
key: 'telemt.ips_limit_[{#TELEMT_USER}]'
|
||||
delay: '0'
|
||||
description: '0 means unlimited'
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- 'telemt_user_unique_ips_limit{user="{#TELEMT_USER}"}'
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Users IPs'
|
||||
- uuid: ef3ac8f5c5d746bbaa4b0b698ba0d9f6
|
||||
name: 'Unique IP usage ratio by {#TELEMT_USER}'
|
||||
type: DEPENDENT
|
||||
key: 'telemt.ips_utilization_[{#TELEMT_USER}]'
|
||||
delay: '0'
|
||||
value_type: FLOAT
|
||||
preprocessing:
|
||||
- type: PROMETHEUS_PATTERN
|
||||
parameters:
|
||||
- 'telemt_user_unique_ips_utilization{user="{#TELEMT_USER}"}'
|
||||
- value
|
||||
- ''
|
||||
master_item:
|
||||
key: telemt.prom_metrics
|
||||
tags:
|
||||
- tag: Application
|
||||
value: 'Users IPs'
|
||||
- uuid: 3ccce91ab5d54b4d972280c7b7bda910
|
||||
name: 'Messages received from {#TELEMT_USER}'
|
||||
type: DEPENDENT
|
||||
|
||||
Reference in New Issue
Block a user