benches: expose preemption knobs + sweep runner

Config API changes (src/preempt.rs, src/runtime.rs): - preempt: promote ALLOC_INTERVAL and TIMESLICE_CYCLES from bare consts to DEFAULT_ALLOC_INTERVAL / DEFAULT_TIMESLICE_CYCLES; store active values in thread-locals set on each actor resume so multiple runtimes can use different settings concurrently. - runtime: add alloc_interval / timeslice_cycles fields to Config; add Config::alloc_interval(n) and Config::timeslice_cycles(c) builder methods; thread the values through RuntimeInner to the reset_timeslice() call in schedule_loop. Bench changes: - Add bench_cfg(threads) helper to general/tokio_favored/smarm_favored that wraps Config::exact and reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars, so the sweep script can vary knobs without recompiling. Sweep tooling (benches/sweep.py): - 'run': run the 3-file bench suite once; --save-baseline persists JSON - 'regress': compare current run against baseline.json, exit 1 on any bench that regresses >10% vs stored medians - 'sweep': run the full SWEEP_GRID (10 points), print comparison table, optional --save-csv; binaries pre-built so no recompile per point Sweep results (10-point grid, 1-CPU sandbox): - The preemption knobs have very little effect on this single-CPU machine. Most benches move <5% across the entire grid. - Longer timeslices (tc=600k, tc=1200k) reliably hurt spawn_storm_busy (+11-15%) and catch_unwind_panics (+10-12%) because actors hold the scheduler mutex longer per timeslice, stalling the storm of joinable tasks. - Shorter timeslices (tc=150k) give a small improvement on many_timers (-3-4%) and a wash everywhere else. - yield_in_hot_loop and uncontended_channel are essentially flat across all knobs — both are scheduling-dominated and call yield_now explicitly, so the RDTSC-driven preemption path is irrelevant. - Conclusion: the knobs matter primarily under contention (multi-core). Re-run sweep on a multi-core machine before drawing tuning conclusions.
2026-05-25 13:04:58 +00:00
parent 6d1c59fb99
commit 3da6ffaa77
15 changed files with 2315 additions and 8 deletions
@@ -84,7 +84,7 @@ fn bench_recurse_smarm(threads: usize) -> (u64, u128) {
    let total = Arc::new(AtomicU64::new(0));
    let t2 = total.clone();
    let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
+    smarm::runtime::init(bench_cfg(threads)).run(move || {
        // Plain Rust recursion on the actor's own (growable) stack.
        fn recurse(c: &AtomicU64, n: u64) -> u64 {
            if n == 0 {
@@ -170,7 +170,7 @@ const HOT_YIELDS: u64 = 500_000;

 fn bench_hot_smarm() -> (u64, u128) {
    let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(1)).run(|| {
+    smarm::runtime::init(bench_cfg(1)).run(|| {
        let ha = smarm::spawn(|| {
            for _ in 0..HOT_YIELDS {
                smarm::yield_now();
@@ -216,7 +216,7 @@ const UNCONT_MSGS: u64 = 1_000_000;

 fn bench_unc_smarm() -> (u64, u128) {
    let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(1)).run(|| {
+    smarm::runtime::init(bench_cfg(1)).run(|| {
        let (tx, rx) = smarm::channel::<u64>();
        let consumer = smarm::spawn(move || {
            let mut count = 0u64;
@@ -273,7 +273,7 @@ fn bench_panic_smarm(threads: usize) -> (u64, u128) {
    let ok2 = ok.clone();
    let err2 = err.clone();
    let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
+    smarm::runtime::init(bench_cfg(threads)).run(move || {
        let mut handles = Vec::new();
        for i in 0..PANIC_TASKS {
            handles.push(smarm::spawn(move || {
@@ -355,6 +355,23 @@ fn bench_panic_tokio_multi() -> (u64, u128) {
 // main
 // ---------------------------------------------------------------------------

+
+// ---------------------------------------------------------------------------
+// Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars
+// so the sweep script can override the preemption knobs without recompiling.
+// ---------------------------------------------------------------------------
+
+fn bench_cfg(threads: usize) -> smarm::runtime::Config {
+    let mut cfg = smarm::runtime::Config::exact(threads);
+    if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") {
+        if let Ok(n) = v.parse::<u32>() { cfg = cfg.alloc_interval(n); }
+    }
+    if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") {
+        if let Ok(n) = v.parse::<u64>() { cfg = cfg.timeslice_cycles(n); }
+    }
+    cfg
+}
+
 fn main() {
    let n = available_threads();
    println!("smarm smarm-favored benchmarks");