mirror of
https://github.com/telemt/telemt.git
synced 2026-04-20 12:04:10 +03:00
Improve ME downstream retries for queued fairness backlog
This commit is contained in:
@@ -7,7 +7,6 @@ mod model;
|
|||||||
mod pressure;
|
mod pressure;
|
||||||
mod scheduler;
|
mod scheduler;
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub(crate) use model::PressureState;
|
pub(crate) use model::PressureState;
|
||||||
pub(crate) use model::{AdmissionDecision, DispatchAction, DispatchFeedback, SchedulerDecision};
|
pub(crate) use model::{AdmissionDecision, DispatchAction, DispatchFeedback, SchedulerDecision};
|
||||||
pub(crate) use scheduler::{WorkerFairnessConfig, WorkerFairnessSnapshot, WorkerFairnessState};
|
pub(crate) use scheduler::{WorkerFairnessConfig, WorkerFairnessSnapshot, WorkerFairnessState};
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::collections::HashMap;
|
|||||||
use std::io::ErrorKind;
|
use std::io::ErrorKind;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
|
||||||
use std::time::Instant;
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use bytes::{Bytes, BytesMut};
|
use bytes::{Bytes, BytesMut};
|
||||||
use tokio::io::AsyncReadExt;
|
use tokio::io::AsyncReadExt;
|
||||||
@@ -21,8 +21,8 @@ use crate::stats::Stats;
|
|||||||
|
|
||||||
use super::codec::{RpcChecksumMode, WriterCommand, rpc_crc};
|
use super::codec::{RpcChecksumMode, WriterCommand, rpc_crc};
|
||||||
use super::fairness::{
|
use super::fairness::{
|
||||||
AdmissionDecision, DispatchAction, DispatchFeedback, SchedulerDecision, WorkerFairnessConfig,
|
AdmissionDecision, DispatchAction, DispatchFeedback, PressureState, SchedulerDecision,
|
||||||
WorkerFairnessSnapshot, WorkerFairnessState,
|
WorkerFairnessConfig, WorkerFairnessSnapshot, WorkerFairnessState,
|
||||||
};
|
};
|
||||||
use super::registry::RouteResult;
|
use super::registry::RouteResult;
|
||||||
use super::{ConnRegistry, MeResponse};
|
use super::{ConnRegistry, MeResponse};
|
||||||
@@ -45,10 +45,22 @@ fn is_data_route_queue_full(result: RouteResult) -> bool {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn should_close_on_queue_full_streak(streak: u8) -> bool {
|
fn should_close_on_queue_full_streak(streak: u8, pressure_state: PressureState) -> bool {
|
||||||
|
if pressure_state < PressureState::Shedding {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
streak >= DATA_ROUTE_QUEUE_FULL_STARVATION_THRESHOLD
|
streak >= DATA_ROUTE_QUEUE_FULL_STARVATION_THRESHOLD
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn should_schedule_fairness_retry(snapshot: &WorkerFairnessSnapshot) -> bool {
|
||||||
|
snapshot.total_queued_bytes > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fairness_retry_delay(route_wait_ms: u64) -> Duration {
|
||||||
|
Duration::from_millis(route_wait_ms.max(1))
|
||||||
|
}
|
||||||
|
|
||||||
async fn route_data_with_retry(
|
async fn route_data_with_retry(
|
||||||
reg: &ConnRegistry,
|
reg: &ConnRegistry,
|
||||||
conn_id: u64,
|
conn_id: u64,
|
||||||
@@ -157,7 +169,7 @@ async fn drain_fairness_scheduler(
|
|||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
let cid = candidate.frame.conn_id;
|
let cid = candidate.frame.conn_id;
|
||||||
let _pressure_state = candidate.pressure_state;
|
let pressure_state = candidate.pressure_state;
|
||||||
let _flow_class = candidate.flow_class;
|
let _flow_class = candidate.flow_class;
|
||||||
let routed = route_data_with_retry(
|
let routed = route_data_with_retry(
|
||||||
reg,
|
reg,
|
||||||
@@ -176,7 +188,7 @@ async fn drain_fairness_scheduler(
|
|||||||
if is_data_route_queue_full(routed) {
|
if is_data_route_queue_full(routed) {
|
||||||
let streak = data_route_queue_full_streak.entry(cid).or_insert(0);
|
let streak = data_route_queue_full_streak.entry(cid).or_insert(0);
|
||||||
*streak = streak.saturating_add(1);
|
*streak = streak.saturating_add(1);
|
||||||
if should_close_on_queue_full_streak(*streak) {
|
if should_close_on_queue_full_streak(*streak, pressure_state) {
|
||||||
fairness.remove_flow(cid);
|
fairness.remove_flow(cid);
|
||||||
data_route_queue_full_streak.remove(&cid);
|
data_route_queue_full_streak.remove(&cid);
|
||||||
reg.unregister(cid).await;
|
reg.unregister(cid).await;
|
||||||
@@ -231,10 +243,33 @@ pub(crate) async fn reader_loop(
|
|||||||
let mut fairness_snapshot = fairness.snapshot();
|
let mut fairness_snapshot = fairness.snapshot();
|
||||||
loop {
|
loop {
|
||||||
let mut tmp = [0u8; 65_536];
|
let mut tmp = [0u8; 65_536];
|
||||||
|
let backlog_retry_enabled = should_schedule_fairness_retry(&fairness_snapshot);
|
||||||
|
let backlog_retry_delay =
|
||||||
|
fairness_retry_delay(reader_route_data_wait_ms.load(Ordering::Relaxed));
|
||||||
|
let mut retry_only = false;
|
||||||
let n = tokio::select! {
|
let n = tokio::select! {
|
||||||
res = rd.read(&mut tmp) => res.map_err(ProxyError::Io)?,
|
res = rd.read(&mut tmp) => res.map_err(ProxyError::Io)?,
|
||||||
|
_ = tokio::time::sleep(backlog_retry_delay), if backlog_retry_enabled => {
|
||||||
|
retry_only = true;
|
||||||
|
0usize
|
||||||
|
},
|
||||||
_ = cancel.cancelled() => return Ok(()),
|
_ = cancel.cancelled() => return Ok(()),
|
||||||
};
|
};
|
||||||
|
if retry_only {
|
||||||
|
let route_wait_ms = reader_route_data_wait_ms.load(Ordering::Relaxed);
|
||||||
|
drain_fairness_scheduler(
|
||||||
|
&mut fairness,
|
||||||
|
reg.as_ref(),
|
||||||
|
&tx,
|
||||||
|
&mut data_route_queue_full_streak,
|
||||||
|
route_wait_ms,
|
||||||
|
stats.as_ref(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let current_snapshot = fairness.snapshot();
|
||||||
|
apply_fairness_metrics_delta(stats.as_ref(), &mut fairness_snapshot, current_snapshot);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
stats.increment_me_reader_eof_total();
|
stats.increment_me_reader_eof_total();
|
||||||
return Err(ProxyError::Io(std::io::Error::new(
|
return Err(ProxyError::Io(std::io::Error::new(
|
||||||
@@ -317,12 +352,9 @@ pub(crate) async fn reader_loop(
|
|||||||
stats.increment_me_route_drop_queue_full_high();
|
stats.increment_me_route_drop_queue_full_high();
|
||||||
let streak = data_route_queue_full_streak.entry(cid).or_insert(0);
|
let streak = data_route_queue_full_streak.entry(cid).or_insert(0);
|
||||||
*streak = streak.saturating_add(1);
|
*streak = streak.saturating_add(1);
|
||||||
if should_close_on_queue_full_streak(*streak)
|
let pressure_state = fairness.pressure_state();
|
||||||
|| matches!(
|
if should_close_on_queue_full_streak(*streak, pressure_state)
|
||||||
admission,
|
|| matches!(admission, AdmissionDecision::RejectSaturated)
|
||||||
AdmissionDecision::RejectSaturated
|
|
||||||
| AdmissionDecision::RejectStandingFlow
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
fairness.remove_flow(cid);
|
fairness.remove_flow(cid);
|
||||||
data_route_queue_full_streak.remove(&cid);
|
data_route_queue_full_streak.remove(&cid);
|
||||||
@@ -445,14 +477,18 @@ pub(crate) async fn reader_loop(
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
|
|
||||||
|
use super::PressureState;
|
||||||
use crate::transport::middle_proxy::ConnRegistry;
|
use crate::transport::middle_proxy::ConnRegistry;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
MeResponse, RouteResult, is_data_route_queue_full, route_data_with_retry,
|
MeResponse, RouteResult, WorkerFairnessSnapshot, fairness_retry_delay,
|
||||||
should_close_on_queue_full_streak, should_close_on_route_result_for_ack,
|
is_data_route_queue_full, route_data_with_retry, should_close_on_queue_full_streak,
|
||||||
should_close_on_route_result_for_data,
|
should_close_on_route_result_for_ack, should_close_on_route_result_for_data,
|
||||||
|
should_schedule_fairness_retry,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -475,10 +511,29 @@ mod tests {
|
|||||||
assert!(is_data_route_queue_full(RouteResult::QueueFullBase));
|
assert!(is_data_route_queue_full(RouteResult::QueueFullBase));
|
||||||
assert!(is_data_route_queue_full(RouteResult::QueueFullHigh));
|
assert!(is_data_route_queue_full(RouteResult::QueueFullHigh));
|
||||||
assert!(!is_data_route_queue_full(RouteResult::NoConn));
|
assert!(!is_data_route_queue_full(RouteResult::NoConn));
|
||||||
assert!(!should_close_on_queue_full_streak(1));
|
assert!(!should_close_on_queue_full_streak(1, PressureState::Normal));
|
||||||
assert!(!should_close_on_queue_full_streak(2));
|
assert!(!should_close_on_queue_full_streak(2, PressureState::Pressured));
|
||||||
assert!(should_close_on_queue_full_streak(3));
|
assert!(!should_close_on_queue_full_streak(3, PressureState::Pressured));
|
||||||
assert!(should_close_on_queue_full_streak(u8::MAX));
|
assert!(should_close_on_queue_full_streak(3, PressureState::Shedding));
|
||||||
|
assert!(should_close_on_queue_full_streak(
|
||||||
|
u8::MAX,
|
||||||
|
PressureState::Saturated
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fairness_retry_is_scheduled_only_when_queue_has_pending_bytes() {
|
||||||
|
let mut snapshot = WorkerFairnessSnapshot::default();
|
||||||
|
assert!(!should_schedule_fairness_retry(&snapshot));
|
||||||
|
|
||||||
|
snapshot.total_queued_bytes = 1;
|
||||||
|
assert!(should_schedule_fairness_retry(&snapshot));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fairness_retry_delay_never_drops_below_one_millisecond() {
|
||||||
|
assert_eq!(fairness_retry_delay(0), Duration::from_millis(1));
|
||||||
|
assert_eq!(fairness_retry_delay(2), Duration::from_millis(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
Reference in New Issue
Block a user