benches: expose preemption knobs + sweep runner
Config API changes (src/preempt.rs, src/runtime.rs):
- preempt: promote ALLOC_INTERVAL and TIMESLICE_CYCLES from bare consts to
DEFAULT_ALLOC_INTERVAL / DEFAULT_TIMESLICE_CYCLES; store active values in
thread-locals set on each actor resume so multiple runtimes can use
different settings concurrently.
- runtime: add alloc_interval / timeslice_cycles fields to Config; add
Config::alloc_interval(n) and Config::timeslice_cycles(c) builder methods;
thread the values through RuntimeInner to the reset_timeslice() call in
schedule_loop.
Bench changes:
- Add bench_cfg(threads) helper to general/tokio_favored/smarm_favored that
wraps Config::exact and reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES
env vars, so the sweep script can vary knobs without recompiling.
Sweep tooling (benches/sweep.py):
- 'run': run the 3-file bench suite once; --save-baseline persists JSON
- 'regress': compare current run against baseline.json, exit 1 on any bench
that regresses >10% vs stored medians
- 'sweep': run the full SWEEP_GRID (10 points), print comparison table,
optional --save-csv; binaries pre-built so no recompile per point
Sweep results (10-point grid, 1-CPU sandbox):
- The preemption knobs have very little effect on this single-CPU machine.
Most benches move <5% across the entire grid.
- Longer timeslices (tc=600k, tc=1200k) reliably hurt spawn_storm_busy
(+11-15%) and catch_unwind_panics (+10-12%) because actors hold the
scheduler mutex longer per timeslice, stalling the storm of joinable tasks.
- Shorter timeslices (tc=150k) give a small improvement on many_timers
(-3-4%) and a wash everywhere else.
- yield_in_hot_loop and uncontended_channel are essentially flat across all
knobs — both are scheduling-dominated and call yield_now explicitly, so
the RDTSC-driven preemption path is irrelevant.
- Conclusion: the knobs matter primarily under contention (multi-core).
Re-run sweep on a multi-core machine before drawing tuning conclusions.
This commit is contained in:
@@ -83,7 +83,7 @@ fn bench_storm_smarm(threads: usize) -> (u64, u128) {
|
||||
let s2 = stop.clone();
|
||||
|
||||
let start = Instant::now();
|
||||
smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
|
||||
smarm::runtime::init(bench_cfg(threads)).run(move || {
|
||||
// Background actors: yield in a tight loop until told to stop.
|
||||
let mut bg_handles = Vec::new();
|
||||
for _ in 0..STORM_BACKGROUND {
|
||||
@@ -189,7 +189,7 @@ const MPSC_PER_PRODUCER: u64 = 10_000;
|
||||
|
||||
fn bench_mpsc_smarm(threads: usize) -> (u64, u128) {
|
||||
let start = Instant::now();
|
||||
smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(|| {
|
||||
smarm::runtime::init(bench_cfg(threads)).run(|| {
|
||||
let (tx, rx) = smarm::channel::<u64>();
|
||||
let mut prod_handles = Vec::new();
|
||||
for p in 0..MPSC_PRODUCERS {
|
||||
@@ -289,7 +289,7 @@ fn timer_delay_ms(i: u64) -> u64 {
|
||||
|
||||
fn bench_timers_smarm(threads: usize) -> (u64, u128) {
|
||||
let start = Instant::now();
|
||||
smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(|| {
|
||||
smarm::runtime::init(bench_cfg(threads)).run(|| {
|
||||
let mut handles = Vec::new();
|
||||
for i in 0..TIMER_ACTORS {
|
||||
let ms = timer_delay_ms(i);
|
||||
@@ -373,7 +373,7 @@ fn bench_scaling_smarm(threads: usize) -> (u64, u128) {
|
||||
let total = Arc::new(AtomicU64::new(0));
|
||||
let t2 = total.clone();
|
||||
let start = Instant::now();
|
||||
smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
|
||||
smarm::runtime::init(bench_cfg(threads)).run(move || {
|
||||
let mut handles = Vec::new();
|
||||
for w in 0..SCALING_WORKERS {
|
||||
let (lo, hi) = scaling_slice(w);
|
||||
@@ -413,6 +413,23 @@ fn bench_scaling_tokio_multi(threads: usize) -> (u64, u128) {
|
||||
// main
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars
|
||||
// so the sweep script can override the preemption knobs without recompiling.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn bench_cfg(threads: usize) -> smarm::runtime::Config {
|
||||
let mut cfg = smarm::runtime::Config::exact(threads);
|
||||
if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") {
|
||||
if let Ok(n) = v.parse::<u32>() { cfg = cfg.alloc_interval(n); }
|
||||
}
|
||||
if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") {
|
||||
if let Ok(n) = v.parse::<u64>() { cfg = cfg.timeslice_cycles(n); }
|
||||
}
|
||||
cfg
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let n = available_threads();
|
||||
println!("smarm tokio-favored benchmarks");
|
||||
|
||||
Reference in New Issue
Block a user