From 3da6ffaa77c652d4a42cd1c60c892d93dbccb50f Mon Sep 17 00:00:00 2001
From: Bench <bench@smarm>
Date: Sun, 24 May 2026 11:48:15 +0000
Subject: [PATCH] benches: expose preemption knobs + sweep runner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Config API changes (src/preempt.rs, src/runtime.rs):
- preempt: promote ALLOC_INTERVAL and TIMESLICE_CYCLES from bare consts to
  DEFAULT_ALLOC_INTERVAL / DEFAULT_TIMESLICE_CYCLES; store active values in
  thread-locals set on each actor resume so multiple runtimes can use
  different settings concurrently.
- runtime: add alloc_interval / timeslice_cycles fields to Config; add
  Config::alloc_interval(n) and Config::timeslice_cycles(c) builder methods;
  thread the values through RuntimeInner to the reset_timeslice() call in
  schedule_loop.

Bench changes:
- Add bench_cfg(threads) helper to general/tokio_favored/smarm_favored that
  wraps Config::exact and reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES
  env vars, so the sweep script can vary knobs without recompiling.

Sweep tooling (benches/sweep.py):
- 'run':     run the 3-file bench suite once; --save-baseline persists JSON
- 'regress': compare current run against baseline.json, exit 1 on any bench
             that regresses >10% vs stored medians
- 'sweep':   run the full SWEEP_GRID (10 points), print comparison table,
             optional --save-csv; binaries pre-built so no recompile per point

Sweep results (10-point grid, 1-CPU sandbox):
- The preemption knobs have very little effect on this single-CPU machine.
  Most benches move <5% across the entire grid.
- Longer timeslices (tc=600k, tc=1200k) reliably hurt spawn_storm_busy
  (+11-15%) and catch_unwind_panics (+10-12%) because actors hold the
  scheduler mutex longer per timeslice, stalling the storm of joinable tasks.
- Shorter timeslices (tc=150k) give a small improvement on many_timers
  (-3-4%) and a wash everywhere else.
- yield_in_hot_loop and uncontended_channel are essentially flat across all
  knobs — both are scheduling-dominated and call yield_now explicitly, so
  the RDTSC-driven preemption path is irrelevant.
- Conclusion: the knobs matter primarily under contention (multi-core).
  Re-run sweep on a multi-core machine before drawing tuning conclusions.
---
 .../baseline-output/sweep/ai128_tc1200k.txt   | 126 +++++
 .../baseline-output/sweep/ai128_tc150k.txt    | 126 +++++
 .../baseline-output/sweep/ai128_tc300k.txt    | 126 +++++
 .../baseline-output/sweep/ai128_tc600k.txt    | 126 +++++
 .../baseline-output/sweep/ai256_tc300k.txt    | 126 +++++
 benches/baseline-output/sweep/ai32_tc150k.txt | 126 +++++
 benches/baseline-output/sweep/ai32_tc300k.txt | 126 +++++
 .../baseline-output/sweep/ai512_tc300k.txt    | 126 +++++
 benches/baseline-output/sweep/ai64_tc150k.txt | 126 +++++
 benches/baseline-output/sweep/ai64_tc300k.txt | 126 +++++
 benches/baseline.json                         | 224 +++++++++
 benches/general.rs                            | 442 ++++++++++++++++++
 benches/smarm_favored.rs                      |  25 +-
 benches/sweep.py                              | 347 ++++++++++++++
 benches/tokio_favored.rs                      |  25 +-
 15 files changed, 2315 insertions(+), 8 deletions(-)
 create mode 100644 benches/baseline-output/sweep/ai128_tc1200k.txt
 create mode 100644 benches/baseline-output/sweep/ai128_tc150k.txt
 create mode 100644 benches/baseline-output/sweep/ai128_tc300k.txt
 create mode 100644 benches/baseline-output/sweep/ai128_tc600k.txt
 create mode 100644 benches/baseline-output/sweep/ai256_tc300k.txt
 create mode 100644 benches/baseline-output/sweep/ai32_tc150k.txt
 create mode 100644 benches/baseline-output/sweep/ai32_tc300k.txt
 create mode 100644 benches/baseline-output/sweep/ai512_tc300k.txt
 create mode 100644 benches/baseline-output/sweep/ai64_tc150k.txt
 create mode 100644 benches/baseline-output/sweep/ai64_tc300k.txt
 create mode 100644 benches/baseline.json
 create mode 100644 benches/general.rs
 create mode 100755 benches/sweep.py

diff --git a/benches/baseline-output/sweep/ai128_tc1200k.txt b/benches/baseline-output/sweep/ai128_tc1200k.txt
new file mode 100644
index 0000000..b268001
--- /dev/null
+++ b/benches/baseline-output/sweep/ai128_tc1200k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8720 |       8526 |       9319
+            smarm 1-thread |         1000 |       8662 |       8571 |       8991
+      tokio current_thread |         1000 |        123 |        123 |        152
+        tokio multi-thread |         1000 |        188 |        184 |        230
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      41530 |      41242 |      43501
+            smarm 1-thread |       200000 |      41575 |      41187 |      43323
+      tokio current_thread |       200000 |      15098 |      15020 |      15348
+        tokio multi-thread |       200000 |      15900 |      15827 |      16012
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29573 |      29435 |      31647
+            smarm 1-thread |        33860 |      29521 |      29453 |      29847
+      tokio current_thread |        33860 |      28495 |      28441 |      30150
+        tokio multi-thread |        33860 |      34384 |      34297 |      34745
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      17190 |      16994 |      17541
+            smarm 1-thread |         1000 |      17078 |      16916 |      19139
+      tokio current_thread |         1000 |        899 |        896 |       1000
+        tokio multi-thread |         1000 |       4198 |       4116 |       4573
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     138556 |     136165 |     140947
+            smarm 1-thread |        10000 |     140223 |     136325 |     146781
+      tokio current_thread |        10000 |       2671 |       2622 |       2913
+        tokio multi-thread |        10000 |       6004 |       4360 |      12576
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9051 |       8967 |      11152
+            smarm 1-thread |       320000 |       9058 |       9008 |       9998
+      tokio current_thread |       320000 |      17375 |      17131 |      18514
+        tokio multi-thread |       320000 |      17955 |      17452 |      18508
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     156969 |     153124 |     167711
+            smarm 1-thread |        10000 |     150638 |     146070 |     168286
+      tokio current_thread |        10000 |      13823 |      13482 |      14796
+        tokio multi-thread |        10000 |      15034 |      14425 |      15320
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30075 |      29707 |      30720
+      tokio multi 1-thread |        33860 |      29060 |      28835 |      44378
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         86 |         79 |        130
+            smarm 1-thread |            1 |         83 |         78 |        146
+      tokio current_thread |            1 |         25 |         25 |         31
+        tokio multi-thread |            1 |         49 |         46 |         85
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     190902 |     187600 |     194333
+      tokio current_thread |      1000000 |     150279 |     148175 |     188184
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      27687 |      27198 |      29555
+      tokio current_thread |      1000000 |      54465 |      54048 |      55954
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     160308 |     154365 |     167009
+            smarm 1-thread |        10000 |     158662 |     155458 |     168896
+      tokio current_thread |        10000 |     267762 |     260876 |     294092
+        tokio multi-thread |        10000 |     275097 |     269344 |     287681
diff --git a/benches/baseline-output/sweep/ai128_tc150k.txt b/benches/baseline-output/sweep/ai128_tc150k.txt
new file mode 100644
index 0000000..f5a95b0
--- /dev/null
+++ b/benches/baseline-output/sweep/ai128_tc150k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8596 |       8491 |       8805
+            smarm 1-thread |         1000 |       8552 |       8461 |       9003
+      tokio current_thread |         1000 |        125 |        125 |        260
+        tokio multi-thread |         1000 |        190 |        184 |        338
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      41885 |      41112 |      43292
+            smarm 1-thread |       200000 |      42174 |      41063 |      43145
+      tokio current_thread |       200000 |      15195 |      15010 |      15589
+        tokio multi-thread |       200000 |      16037 |      15869 |      17057
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29872 |      29629 |      31596
+            smarm 1-thread |        33860 |      29776 |      29528 |      30003
+      tokio current_thread |        33860 |      28705 |      28605 |      30287
+        tokio multi-thread |        33860 |      34655 |      34503 |      36596
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      16898 |      16574 |      17386
+            smarm 1-thread |         1000 |      16871 |      16677 |      18467
+      tokio current_thread |         1000 |        897 |        857 |        991
+        tokio multi-thread |         1000 |       4325 |       4228 |       4458
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     133462 |     129526 |     138685
+            smarm 1-thread |        10000 |     130118 |     127633 |     142344
+      tokio current_thread |        10000 |       2713 |       2608 |       2831
+        tokio multi-thread |        10000 |       7367 |       4345 |      11741
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9077 |       8944 |       9287
+            smarm 1-thread |       320000 |       9100 |       9033 |      10604
+      tokio current_thread |       320000 |      17310 |      17122 |      18616
+        tokio multi-thread |       320000 |      17484 |      17413 |      17748
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     140039 |     135577 |     145123
+            smarm 1-thread |        10000 |     139931 |     135513 |     143841
+      tokio current_thread |        10000 |      14524 |      14378 |      14564
+        tokio multi-thread |        10000 |      15066 |      14677 |      15336
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29620 |      29511 |      31347
+      tokio multi 1-thread |        33860 |      29046 |      28817 |      29687
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         94 |         79 |        371
+            smarm 1-thread |            1 |        183 |         83 |        317
+      tokio current_thread |            1 |         25 |         25 |         31
+        tokio multi-thread |            1 |         54 |         41 |         71
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     189034 |     187674 |     192204
+      tokio current_thread |      1000000 |     151106 |     149564 |     155601
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      26949 |      26838 |      30868
+      tokio current_thread |      1000000 |      52984 |      52149 |      55141
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     145860 |     143015 |     152734
+            smarm 1-thread |        10000 |     144550 |     141592 |     149247
+      tokio current_thread |        10000 |     267500 |     265301 |     278751
+        tokio multi-thread |        10000 |     275320 |     268986 |     286891
diff --git a/benches/baseline-output/sweep/ai128_tc300k.txt b/benches/baseline-output/sweep/ai128_tc300k.txt
new file mode 100644
index 0000000..b62387c
--- /dev/null
+++ b/benches/baseline-output/sweep/ai128_tc300k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8469 |       8414 |       8717
+            smarm 1-thread |         1000 |       8625 |       8479 |      10212
+      tokio current_thread |         1000 |        124 |        123 |        175
+        tokio multi-thread |         1000 |        194 |        184 |        317
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      41949 |      41419 |      43784
+            smarm 1-thread |       200000 |      42005 |      41491 |      45224
+      tokio current_thread |       200000 |      15139 |      15049 |      16352
+        tokio multi-thread |       200000 |      15985 |      15931 |      16306
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29640 |      29515 |      31229
+            smarm 1-thread |        33860 |      29777 |      29642 |      30056
+      tokio current_thread |        33860 |      28704 |      28584 |      30317
+        tokio multi-thread |        33860 |      34870 |      34569 |      35876
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      17098 |      16968 |      18688
+            smarm 1-thread |         1000 |      16918 |      16736 |      17326
+      tokio current_thread |         1000 |        915 |        882 |       1000
+        tokio multi-thread |         1000 |       4371 |       4265 |       4834
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     127075 |     124760 |     130259
+            smarm 1-thread |        10000 |     125976 |     125121 |     128728
+      tokio current_thread |        10000 |       2703 |       2646 |       2807
+        tokio multi-thread |        10000 |       7201 |       4267 |      12853
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9116 |       8985 |       9237
+            smarm 1-thread |       320000 |       9062 |       8947 |      10648
+      tokio current_thread |       320000 |      17380 |      17192 |      18363
+        tokio multi-thread |       320000 |      17854 |      17554 |      18219
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     137944 |     132081 |     141862
+            smarm 1-thread |        10000 |     143773 |     137448 |     153703
+      tokio current_thread |        10000 |      14174 |      13751 |      15079
+        tokio multi-thread |        10000 |      15244 |      14625 |      16700
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30832 |      30082 |      33360
+      tokio multi 1-thread |        33860 |      29736 |      29321 |      29958
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         84 |         78 |        122
+            smarm 1-thread |            1 |         90 |         79 |        157
+      tokio current_thread |            1 |         25 |         25 |         31
+        tokio multi-thread |            1 |         48 |         47 |         62
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     190830 |     188562 |     196621
+      tokio current_thread |      1000000 |     151537 |     150038 |     165825
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      27265 |      26969 |      29317
+      tokio current_thread |      1000000 |      53894 |      53380 |      56189
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     145006 |     144092 |     149002
+            smarm 1-thread |        10000 |     144417 |     142000 |     148224
+      tokio current_thread |        10000 |     265376 |     260227 |     272279
+        tokio multi-thread |        10000 |     277432 |     270860 |     283266
diff --git a/benches/baseline-output/sweep/ai128_tc600k.txt b/benches/baseline-output/sweep/ai128_tc600k.txt
new file mode 100644
index 0000000..a1e563f
--- /dev/null
+++ b/benches/baseline-output/sweep/ai128_tc600k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8721 |       8398 |       8994
+            smarm 1-thread |         1000 |       8587 |       8440 |       8810
+      tokio current_thread |         1000 |        124 |        124 |        294
+        tokio multi-thread |         1000 |        188 |        184 |        299
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      42588 |      42084 |      45080
+            smarm 1-thread |       200000 |      42252 |      41963 |      43615
+      tokio current_thread |       200000 |      15101 |      14994 |      15573
+        tokio multi-thread |       200000 |      15979 |      15890 |      16356
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29686 |      29491 |      31263
+            smarm 1-thread |        33860 |      29841 |      29586 |      30570
+      tokio current_thread |        33860 |      28652 |      28510 |      30359
+        tokio multi-thread |        33860 |      34677 |      34461 |      35318
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      16909 |      16579 |      20782
+            smarm 1-thread |         1000 |      16888 |      16537 |      20808
+      tokio current_thread |         1000 |        925 |        911 |       1021
+        tokio multi-thread |         1000 |       4192 |       4079 |       4531
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     145813 |     142042 |     152501
+            smarm 1-thread |        10000 |     145119 |     141282 |     161294
+      tokio current_thread |        10000 |       2968 |       2899 |       3231
+        tokio multi-thread |        10000 |       6288 |       4289 |      12226
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9662 |       9254 |      11370
+            smarm 1-thread |       320000 |       9673 |       9331 |       9989
+      tokio current_thread |       320000 |      18015 |      17334 |      21096
+        tokio multi-thread |       320000 |      18384 |      17837 |      19534
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     160492 |     154795 |     180307
+            smarm 1-thread |        10000 |     161716 |     156498 |     191986
+      tokio current_thread |        10000 |      13895 |      13576 |      14913
+        tokio multi-thread |        10000 |      15074 |      14665 |      16070
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30001 |      29600 |      38039
+      tokio multi 1-thread |        33860 |      29419 |      28906 |      30079
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         91 |         79 |        186
+            smarm 1-thread |            1 |         87 |         81 |        131
+      tokio current_thread |            1 |         25 |         25 |        103
+        tokio multi-thread |            1 |         56 |         47 |         64
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     190023 |     188250 |     193824
+      tokio current_thread |      1000000 |     154681 |     152074 |     187328
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      27264 |      26772 |      29512
+      tokio current_thread |      1000000 |      53324 |      51744 |      59282
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     155983 |     152595 |     161438
+            smarm 1-thread |        10000 |     162122 |     156170 |     200357
+      tokio current_thread |        10000 |     276303 |     264291 |     296266
+        tokio multi-thread |        10000 |     271350 |     267654 |     285897
diff --git a/benches/baseline-output/sweep/ai256_tc300k.txt b/benches/baseline-output/sweep/ai256_tc300k.txt
new file mode 100644
index 0000000..4bcd02d
--- /dev/null
+++ b/benches/baseline-output/sweep/ai256_tc300k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       9130 |       8720 |      10611
+            smarm 1-thread |         1000 |       8808 |       8617 |       9659
+      tokio current_thread |         1000 |        126 |        125 |        164
+        tokio multi-thread |         1000 |        190 |        184 |        329
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      42270 |      41814 |      44737
+            smarm 1-thread |       200000 |      42999 |      42104 |      45424
+      tokio current_thread |       200000 |      15441 |      15196 |      16096
+        tokio multi-thread |       200000 |      16249 |      16070 |      17620
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29813 |      29627 |      30176
+            smarm 1-thread |        33860 |      29613 |      29440 |      31205
+      tokio current_thread |        33860 |      28637 |      28406 |      29179
+        tokio multi-thread |        33860 |      34472 |      34389 |      36092
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      16899 |      16804 |      17017
+            smarm 1-thread |         1000 |      17001 |      16704 |      19533
+      tokio current_thread |         1000 |        914 |        893 |       1021
+        tokio multi-thread |         1000 |       4198 |       4136 |       4297
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     128621 |     126503 |     132268
+            smarm 1-thread |        10000 |     131316 |     128354 |     133964
+      tokio current_thread |        10000 |       2763 |       2696 |       2996
+        tokio multi-thread |        10000 |       6023 |       4300 |      12908
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9225 |       9071 |      11272
+            smarm 1-thread |       320000 |       9174 |       9028 |       9335
+      tokio current_thread |       320000 |      17210 |      17100 |      18404
+        tokio multi-thread |       320000 |      17550 |      17413 |      18080
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     136396 |     133330 |     142485
+            smarm 1-thread |        10000 |     137374 |     134345 |     141168
+      tokio current_thread |        10000 |      13789 |      13499 |      14621
+        tokio multi-thread |        10000 |      15036 |      14729 |      15359
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30065 |      29819 |      32418
+      tokio multi 1-thread |        33860 |      29501 |      28916 |      30057
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         94 |         81 |        257
+            smarm 1-thread |            1 |         83 |         80 |        134
+      tokio current_thread |            1 |         25 |         25 |         33
+        tokio multi-thread |            1 |         57 |         48 |        109
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     188506 |     187971 |     190121
+      tokio current_thread |      1000000 |     149663 |     148978 |     150733
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      26945 |      26703 |      29430
+      tokio current_thread |      1000000 |      52332 |      51838 |      54062
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     146192 |     143776 |     150609
+            smarm 1-thread |        10000 |     144012 |     140604 |     153892
+      tokio current_thread |        10000 |     268341 |     260941 |     275404
+        tokio multi-thread |        10000 |     272691 |     268094 |     307084
diff --git a/benches/baseline-output/sweep/ai32_tc150k.txt b/benches/baseline-output/sweep/ai32_tc150k.txt
new file mode 100644
index 0000000..0aa66c6
--- /dev/null
+++ b/benches/baseline-output/sweep/ai32_tc150k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8653 |       8522 |       9163
+            smarm 1-thread |         1000 |       8908 |       8660 |      10606
+      tokio current_thread |         1000 |        124 |        123 |        175
+        tokio multi-thread |         1000 |        244 |        184 |        340
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      42597 |      41857 |      43492
+            smarm 1-thread |       200000 |      42621 |      42097 |      44386
+      tokio current_thread |       200000 |      15368 |      15144 |      16484
+        tokio multi-thread |       200000 |      16120 |      16012 |      19222
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30499 |      29657 |      33910
+            smarm 1-thread |        33860 |      31190 |      30105 |      32675
+      tokio current_thread |        33860 |      28748 |      28643 |      29398
+        tokio multi-thread |        33860 |      34714 |      34499 |      36338
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      16990 |      16853 |      17540
+            smarm 1-thread |         1000 |      16944 |      16740 |      18603
+      tokio current_thread |         1000 |        937 |        921 |       1056
+        tokio multi-thread |         1000 |       4342 |       4205 |       4549
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     130032 |     128075 |     153842
+            smarm 1-thread |        10000 |     126396 |     125101 |     131406
+      tokio current_thread |        10000 |       2685 |       2629 |       2841
+        tokio multi-thread |        10000 |       6014 |       4126 |      11484
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9122 |       8987 |       9334
+            smarm 1-thread |       320000 |       9073 |       8956 |      10151
+      tokio current_thread |       320000 |      17259 |      17163 |      17673
+        tokio multi-thread |       320000 |      22771 |      17709 |      24514
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     137844 |     134570 |     157034
+            smarm 1-thread |        10000 |     141200 |     137494 |     156214
+      tokio current_thread |        10000 |      14809 |      14024 |      16518
+        tokio multi-thread |        10000 |      15089 |      14704 |      15331
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30880 |      29931 |      32667
+      tokio multi 1-thread |        33860 |      29862 |      29116 |      31310
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         90 |         80 |        196
+            smarm 1-thread |            1 |         87 |         79 |        126
+      tokio current_thread |            1 |         25 |         25 |         53
+        tokio multi-thread |            1 |         52 |         47 |         88
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     191187 |     187194 |     198269
+      tokio current_thread |      1000000 |     152531 |     151113 |     154462
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      27413 |      27312 |      29463
+      tokio current_thread |      1000000 |      53620 |      52594 |      55332
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     144199 |     141893 |     157984
+            smarm 1-thread |        10000 |     144857 |     142722 |     152275
+      tokio current_thread |        10000 |     268006 |     264666 |     274542
+        tokio multi-thread |        10000 |     271827 |     268740 |     290301
diff --git a/benches/baseline-output/sweep/ai32_tc300k.txt b/benches/baseline-output/sweep/ai32_tc300k.txt
new file mode 100644
index 0000000..1ec63b1
--- /dev/null
+++ b/benches/baseline-output/sweep/ai32_tc300k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8950 |       8591 |      10655
+            smarm 1-thread |         1000 |       9688 |       8657 |      11720
+      tokio current_thread |         1000 |        123 |        123 |        256
+        tokio multi-thread |         1000 |        192 |        177 |        314
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      42965 |      41667 |      44850
+            smarm 1-thread |       200000 |      42881 |      41634 |      48864
+      tokio current_thread |       200000 |      15112 |      14986 |      15484
+        tokio multi-thread |       200000 |      16006 |      15915 |      16647
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29931 |      29750 |      31707
+            smarm 1-thread |        33860 |      29977 |      29670 |      30996
+      tokio current_thread |        33860 |      28615 |      28441 |      30188
+        tokio multi-thread |        33860 |      34371 |      34330 |      35176
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      16753 |      16498 |      18516
+            smarm 1-thread |         1000 |      16728 |      16599 |      16874
+      tokio current_thread |         1000 |        940 |        933 |       1037
+        tokio multi-thread |         1000 |       4317 |       4236 |       4427
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     132575 |     128629 |     136999
+            smarm 1-thread |        10000 |     130313 |     127372 |     157234
+      tokio current_thread |        10000 |       2689 |       2611 |       2833
+        tokio multi-thread |        10000 |      11337 |       4288 |      12635
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9122 |       9000 |      11033
+            smarm 1-thread |       320000 |       9143 |       9015 |       9333
+      tokio current_thread |       320000 |      17705 |      17250 |      18111
+        tokio multi-thread |       320000 |      18044 |      17621 |      19484
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     141925 |     135531 |     188381
+            smarm 1-thread |        10000 |     139655 |     134291 |     146458
+      tokio current_thread |        10000 |      13837 |      13621 |      14877
+        tokio multi-thread |        10000 |      14992 |      14542 |      15237
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29687 |      29554 |      31408
+      tokio multi 1-thread |        33860 |      28963 |      28742 |      30236
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         83 |         80 |        128
+            smarm 1-thread |            1 |         86 |         77 |        149
+      tokio current_thread |            1 |         25 |         25 |         50
+        tokio multi-thread |            1 |         53 |         47 |         84
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     197474 |     194313 |     201690
+      tokio current_thread |      1000000 |     149289 |     148575 |     154319
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      26884 |      26675 |      29436
+      tokio current_thread |      1000000 |      52594 |      51941 |      54495
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     148321 |     146050 |     152943
+            smarm 1-thread |        10000 |     147961 |     144521 |     152158
+      tokio current_thread |        10000 |     264487 |     260848 |     274838
+        tokio multi-thread |        10000 |     272103 |     265687 |     285209
diff --git a/benches/baseline-output/sweep/ai512_tc300k.txt b/benches/baseline-output/sweep/ai512_tc300k.txt
new file mode 100644
index 0000000..7bd894b
--- /dev/null
+++ b/benches/baseline-output/sweep/ai512_tc300k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8574 |       8421 |       8729
+            smarm 1-thread |         1000 |       8675 |       8401 |      12686
+      tokio current_thread |         1000 |        125 |        125 |        148
+        tokio multi-thread |         1000 |        188 |        184 |        291
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      42389 |      41316 |      46466
+            smarm 1-thread |       200000 |      41776 |      41342 |      48940
+      tokio current_thread |       200000 |      15168 |      15094 |      15658
+        tokio multi-thread |       200000 |      15953 |      15862 |      17408
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29680 |      29572 |      30661
+            smarm 1-thread |        33860 |      29816 |      29597 |      30401
+      tokio current_thread |        33860 |      28657 |      28581 |      29488
+        tokio multi-thread |        33860 |      34837 |      34529 |      37270
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      16735 |      16601 |      17444
+            smarm 1-thread |         1000 |      16702 |      16500 |      17184
+      tokio current_thread |         1000 |        898 |        873 |        994
+        tokio multi-thread |         1000 |       4343 |       4241 |       4448
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     128408 |     126199 |     133268
+            smarm 1-thread |        10000 |     131599 |     129387 |     135080
+      tokio current_thread |        10000 |       2718 |       2661 |       2981
+        tokio multi-thread |        10000 |       7264 |       4608 |      11583
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9289 |       9039 |       9751
+            smarm 1-thread |       320000 |       9510 |       9157 |       9677
+      tokio current_thread |       320000 |      17550 |      17290 |      18578
+        tokio multi-thread |       320000 |      18336 |      17527 |      18989
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     139111 |     136105 |     146606
+            smarm 1-thread |        10000 |     137302 |     133316 |     141350
+      tokio current_thread |        10000 |      13720 |      13455 |      14607
+        tokio multi-thread |        10000 |      14964 |      14546 |      15400
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30048 |      29705 |      31530
+      tokio multi 1-thread |        33860 |      28894 |      28682 |      30094
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         93 |         81 |        161
+            smarm 1-thread |            1 |        103 |         80 |        178
+      tokio current_thread |            1 |         25 |         25 |         28
+        tokio multi-thread |            1 |         53 |         47 |         74
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     188726 |     187640 |     192658
+      tokio current_thread |      1000000 |     149332 |     148133 |     155745
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      27630 |      27086 |      29749
+      tokio current_thread |      1000000 |      54225 |      53355 |      56307
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     144934 |     143038 |     163552
+            smarm 1-thread |        10000 |     146614 |     143653 |     151325
+      tokio current_thread |        10000 |     266330 |     263523 |     271639
+        tokio multi-thread |        10000 |     274729 |     266323 |     285114
diff --git a/benches/baseline-output/sweep/ai64_tc150k.txt b/benches/baseline-output/sweep/ai64_tc150k.txt
new file mode 100644
index 0000000..144c39f
--- /dev/null
+++ b/benches/baseline-output/sweep/ai64_tc150k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8849 |       8486 |       9224
+            smarm 1-thread |         1000 |       8841 |       8477 |       9108
+      tokio current_thread |         1000 |        124 |        124 |        219
+        tokio multi-thread |         1000 |        187 |        184 |        283
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      41681 |      41278 |      43685
+            smarm 1-thread |       200000 |      41721 |      41218 |      42261
+      tokio current_thread |       200000 |      14969 |      14940 |      15051
+        tokio multi-thread |       200000 |      16004 |      15868 |      17569
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      29679 |      29516 |      30105
+            smarm 1-thread |        33860 |      29677 |      29594 |      31365
+      tokio current_thread |        33860 |      28656 |      28572 |      29239
+        tokio multi-thread |        33860 |      34783 |      34617 |      36531
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      17009 |      16822 |      17418
+            smarm 1-thread |         1000 |      16866 |      16723 |      17315
+      tokio current_thread |         1000 |        880 |        871 |       1035
+        tokio multi-thread |         1000 |       4263 |       4178 |       4391
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     126566 |     124995 |     130402
+            smarm 1-thread |        10000 |     128278 |     126209 |     135156
+      tokio current_thread |        10000 |       2680 |       2640 |       2787
+        tokio multi-thread |        10000 |       7411 |       4393 |      12421
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9073 |       8937 |       9324
+            smarm 1-thread |       320000 |       9120 |       9018 |       9263
+      tokio current_thread |       320000 |      17245 |      17180 |      17574
+        tokio multi-thread |       320000 |      18518 |      17685 |      19621
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     141855 |     135415 |     145810
+            smarm 1-thread |        10000 |     138265 |     135535 |     142346
+      tokio current_thread |        10000 |      14441 |      13453 |      14650
+        tokio multi-thread |        10000 |      14956 |      14529 |      15451
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30033 |      29659 |      31803
+      tokio multi 1-thread |        33860 |      29078 |      28963 |      30231
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         83 |         79 |        132
+            smarm 1-thread |            1 |         85 |         78 |        146
+      tokio current_thread |            1 |         25 |         25 |         73
+        tokio multi-thread |            1 |         51 |         47 |         64
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     191352 |     188830 |     196235
+      tokio current_thread |      1000000 |     152382 |     150674 |     187815
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      27552 |      27099 |      30612
+      tokio current_thread |      1000000 |      53160 |      52436 |      55255
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     145243 |     143291 |     173727
+            smarm 1-thread |        10000 |     145242 |     142819 |     148457
+      tokio current_thread |        10000 |     266471 |     262904 |     269145
+        tokio multi-thread |        10000 |     274195 |     269312 |     286111
diff --git a/benches/baseline-output/sweep/ai64_tc300k.txt b/benches/baseline-output/sweep/ai64_tc300k.txt
new file mode 100644
index 0000000..691f170
--- /dev/null
+++ b/benches/baseline-output/sweep/ai64_tc300k.txt
@@ -0,0 +1,126 @@
+smarm general benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000
+
+================================================================================
+  chained_spawn: depth 1000
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |       8735 |       8508 |       9314
+            smarm 1-thread |         1000 |       8808 |       8506 |      10346
+      tokio current_thread |         1000 |        123 |        123 |        172
+        tokio multi-thread |         1000 |        190 |        184 |        273
+
+================================================================================
+  yield_many: 200 tasks × 1000 yields
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       200000 |      41619 |      41255 |      43489
+            smarm 1-thread |       200000 |      41544 |      41196 |      43259
+      tokio current_thread |       200000 |      15382 |      15233 |      16007
+        tokio multi-thread |       200000 |      16095 |      15999 |      16296
+
+================================================================================
+  fan_out_compute: primes in [2, 400000) across 64
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30032 |      29838 |      31744
+            smarm 1-thread |        33860 |      29782 |      29653 |      30601
+      tokio current_thread |        33860 |      28754 |      28614 |      30700
+        tokio multi-thread |        33860 |      34988 |      34570 |      36871
+
+================================================================================
+  ping_pong_oneshot: 1000 rounds
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |         1000 |      17088 |      16868 |      18654
+            smarm 1-thread |         1000 |      16951 |      16797 |      17783
+      tokio current_thread |         1000 |        932 |        899 |       1019
+        tokio multi-thread |         1000 |       4340 |       4273 |       5245
+smarm tokio-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64
+
+================================================================================
+  spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     129009 |     127353 |     132990
+            smarm 1-thread |        10000 |     128009 |     126554 |     140472
+      tokio current_thread |        10000 |       2666 |       2624 |       2794
+        tokio multi-thread |        10000 |       5974 |       4368 |      11517
+
+================================================================================
+  mpsc_contention: 32 producers × 10000 msgs → 1 consumer
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |       320000 |       9044 |       8970 |      10788
+            smarm 1-thread |       320000 |       9087 |       8995 |      12500
+      tokio current_thread |       320000 |      17185 |      17072 |      18440
+        tokio multi-thread |       320000 |      17720 |      17394 |      19182
+
+================================================================================
+  many_timers: 10000 actors sleeping 1–10 ms
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     145819 |     140671 |     150512
+            smarm 1-thread |        10000 |     139046 |     135846 |     146127
+      tokio current_thread |        10000 |      13866 |      13522 |      14670
+        tokio multi-thread |        10000 |      14900 |      14471 |      16378
+
+================================================================================
+  multi_thread_scaling: primes in [2, 400000) across 64 workers
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        33860 |      30695 |      29720 |      33196
+      tokio multi 1-thread |        33860 |      29261 |      28895 |      31013
+smarm smarm-favored benchmarks
+available parallelism: 1 threads
+ITERS=15 (+1 warmup, discarded)
+RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000
+
+================================================================================
+  deep_recursion: depth 500
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |            1 |         82 |         79 |        113
+            smarm 1-thread |            1 |         85 |         78 |        143
+      tokio current_thread |            1 |         25 |         25 |         56
+        tokio multi-thread |            1 |         50 |         47 |         63
+
+================================================================================
+  yield_in_hot_loop: 2 actors × 500000 yields (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |     188698 |     187922 |     192263
+      tokio current_thread |      1000000 |     150231 |     148746 |     151723
+
+================================================================================
+  uncontended_channel: 1→1, 1000000 msgs (single thread)
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |      1000000 |      28461 |      27638 |      30283
+      tokio current_thread |      1000000 |      52224 |      51880 |      54732
+
+================================================================================
+  catch_unwind_panics: 10000 tasks, 50% panic
+================================================================================
+                   runtime |       result |  median µs |     min µs |     max µs
+--------------------------------------------------------------------------------
+            smarm 1-thread |        10000 |     144604 |     143246 |     145585
+            smarm 1-thread |        10000 |     148208 |     142691 |     151076
+      tokio current_thread |        10000 |     265255 |     260637 |     271065
+        tokio multi-thread |        10000 |     273131 |     271313 |     300420
diff --git a/benches/baseline.json b/benches/baseline.json
new file mode 100644
index 0000000..0958bba
--- /dev/null
+++ b/benches/baseline.json
@@ -0,0 +1,224 @@
+{
+  "chained_spawn": {
+    "smarm 1-thread": {
+      "result": 1000,
+      "median": 8637,
+      "min": 8553,
+      "max": 8933
+    },
+    "tokio current_thread": {
+      "result": 1000,
+      "median": 124,
+      "min": 124,
+      "max": 153
+    },
+    "tokio multi-thread": {
+      "result": 1000,
+      "median": 188,
+      "min": 183,
+      "max": 229
+    }
+  },
+  "yield_many": {
+    "smarm 1-thread": {
+      "result": 200000,
+      "median": 41622,
+      "min": 41063,
+      "max": 44973
+    },
+    "tokio current_thread": {
+      "result": 200000,
+      "median": 15085,
+      "min": 15013,
+      "max": 15274
+    },
+    "tokio multi-thread": {
+      "result": 200000,
+      "median": 15964,
+      "min": 15880,
+      "max": 17959
+    }
+  },
+  "fan_out_compute": {
+    "smarm 1-thread": {
+      "result": 33860,
+      "median": 29727,
+      "min": 29491,
+      "max": 31634
+    },
+    "tokio current_thread": {
+      "result": 33860,
+      "median": 28503,
+      "min": 28391,
+      "max": 28866
+    },
+    "tokio multi-thread": {
+      "result": 33860,
+      "median": 34542,
+      "min": 34396,
+      "max": 36111
+    }
+  },
+  "ping_pong_oneshot": {
+    "smarm 1-thread": {
+      "result": 1000,
+      "median": 16848,
+      "min": 16633,
+      "max": 17301
+    },
+    "tokio current_thread": {
+      "result": 1000,
+      "median": 879,
+      "min": 868,
+      "max": 973
+    },
+    "tokio multi-thread": {
+      "result": 1000,
+      "median": 4328,
+      "min": 4223,
+      "max": 4461
+    }
+  },
+  "spawn_storm_busy": {
+    "smarm 1-thread": {
+      "result": 10000,
+      "median": 130058,
+      "min": 126790,
+      "max": 134475
+    },
+    "tokio current_thread": {
+      "result": 10000,
+      "median": 2772,
+      "min": 2641,
+      "max": 4367
+    },
+    "tokio multi-thread": {
+      "result": 10000,
+      "median": 7462,
+      "min": 4469,
+      "max": 12892
+    }
+  },
+  "mpsc_contention": {
+    "smarm 1-thread": {
+      "result": 320000,
+      "median": 9260,
+      "min": 9095,
+      "max": 10081
+    },
+    "tokio current_thread": {
+      "result": 320000,
+      "median": 17570,
+      "min": 17213,
+      "max": 18276
+    },
+    "tokio multi-thread": {
+      "result": 320000,
+      "median": 17593,
+      "min": 17452,
+      "max": 19564
+    }
+  },
+  "many_timers": {
+    "smarm 1-thread": {
+      "result": 10000,
+      "median": 135806,
+      "min": 132573,
+      "max": 141651
+    },
+    "tokio current_thread": {
+      "result": 10000,
+      "median": 14462,
+      "min": 13555,
+      "max": 15457
+    },
+    "tokio multi-thread": {
+      "result": 10000,
+      "median": 15011,
+      "min": 14655,
+      "max": 15368
+    }
+  },
+  "multi_thread_scaling": {
+    "smarm 1-thread": {
+      "result": 33860,
+      "median": 30029,
+      "min": 29720,
+      "max": 31351
+    },
+    "tokio multi 1-thread": {
+      "result": 33860,
+      "median": 28983,
+      "min": 28908,
+      "max": 29323
+    }
+  },
+  "deep_recursion": {
+    "smarm 1-thread": {
+      "result": 1,
+      "median": 83,
+      "min": 78,
+      "max": 587
+    },
+    "tokio current_thread": {
+      "result": 1,
+      "median": 25,
+      "min": 25,
+      "max": 33
+    },
+    "tokio multi-thread": {
+      "result": 1,
+      "median": 59,
+      "min": 47,
+      "max": 205
+    }
+  },
+  "yield_in_hot_loop": {
+    "smarm 1-thread": {
+      "result": 1000000,
+      "median": 188753,
+      "min": 187007,
+      "max": 194366
+    },
+    "tokio current_thread": {
+      "result": 1000000,
+      "median": 153929,
+      "min": 152712,
+      "max": 158749
+    }
+  },
+  "uncontended_channel": {
+    "smarm 1-thread": {
+      "result": 1000000,
+      "median": 26811,
+      "min": 26498,
+      "max": 29069
+    },
+    "tokio current_thread": {
+      "result": 1000000,
+      "median": 51888,
+      "min": 51530,
+      "max": 52708
+    }
+  },
+  "catch_unwind_panics": {
+    "smarm 1-thread": {
+      "result": 10000,
+      "median": 142215,
+      "min": 140189,
+      "max": 143570
+    },
+    "tokio current_thread": {
+      "result": 10000,
+      "median": 682295,
+      "min": 670281,
+      "max": 700774
+    },
+    "tokio multi-thread": {
+      "result": 10000,
+      "median": 662688,
+      "min": 641453,
+      "max": 681868
+    }
+  }
+}
\ No newline at end of file
diff --git a/benches/general.rs b/benches/general.rs
new file mode 100644
index 0000000..96fa9e7
--- /dev/null
+++ b/benches/general.rs
@@ -0,0 +1,442 @@
+//! General benchmarks — workloads where neither runtime has a structural
+//! advantage. Both should be competitive; large gaps here indicate a real
+//! difference in per-task or per-yield overhead.
+//!
+//! Workloads:
+//!   1. chained_spawn  — task N spawns N+1, depth 1000. Spawn+exit overhead in
+//!                       a serial chain. Adapted from tokio's bench of the same
+//!                       name.
+//!   2. yield_many     — 200 actors × 1000 yields. Pure scheduling throughput
+//!                       with no allocation, no IO. Adapted from tokio.
+//!   3. fan_out_compute— count primes in [2, 400_000) across 64 workers. Same
+//!                       shape as multi_scheduler::primes but lives here for
+//!                       completeness.
+//!   4. ping_pong_oneshot — N rounds of (spawn pair, send oneshot, await).
+//!                       Closer to a request/response workload than channel
+//!                       ping-pong.
+
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+use std::time::Instant;
+
+// ---------------------------------------------------------------------------
+// Shared harness
+// ---------------------------------------------------------------------------
+
+const ITERS: u32 = 15;
+
+fn available_threads() -> usize {
+    std::thread::available_parallelism().map(|n| n.get()).unwrap_or(1)
+}
+
+fn print_header(title: &str) {
+    println!("\n{}", "=".repeat(80));
+    println!("  {title}");
+    println!("{}", "=".repeat(80));
+    println!(
+        "{:>26} | {:>12} | {:>10} | {:>10} | {:>10}",
+        "runtime", "result", "median µs", "min µs", "max µs"
+    );
+    println!("{}", "-".repeat(80));
+}
+
+fn run_n<F: FnMut() -> (u64, u128)>(name: &str, n: u32, mut f: F) {
+    let mut times = Vec::new();
+    let mut last = 0u64;
+    // One warmup iteration, discarded.
+    let _ = f();
+    for _ in 0..n {
+        let (v, t) = f();
+        times.push(t);
+        last = v;
+    }
+    times.sort_unstable();
+    let median = times[times.len() / 2];
+    let min = *times.iter().min().unwrap();
+    let max = *times.iter().max().unwrap();
+    println!(
+        "{:>26} | {:>12} | {:>10} | {:>10} | {:>10}",
+        name, last, median, min, max
+    );
+}
+
+// ---------------------------------------------------------------------------
+// 1. chained_spawn — depth 1000
+// ---------------------------------------------------------------------------
+
+const CHAIN_DEPTH: u64 = 1_000;
+
+fn bench_chained_smarm(threads: usize) -> (u64, u128) {
+    let counter = Arc::new(AtomicU64::new(0));
+    let c2 = counter.clone();
+    let start = Instant::now();
+    smarm::runtime::init(bench_cfg(threads)).run(move || {
+        // Fire-and-forget chain, matching tokio's bench shape: each link
+        // spawns the next link and exits immediately; depth 0 signals done
+        // via a channel. Crucially this does *not* nest joins on the
+        // spawner's stack — important because smarm actor stacks are a
+        // fixed 64 KiB.
+        let (tx, rx) = smarm::channel::<()>();
+        fn iter(c: Arc<AtomicU64>, tx: smarm::Sender<()>, n: u64) {
+            if n == 0 {
+                tx.send(()).unwrap();
+            } else {
+                let cc = c.clone();
+                smarm::spawn(move || {
+                    cc.fetch_add(1, Ordering::Relaxed);
+                    iter(cc.clone(), tx, n - 1);
+                });
+                // Caller exits; JoinHandle dropped, no parking.
+            }
+        }
+        iter(c2, tx, CHAIN_DEPTH);
+        rx.recv().unwrap();
+    });
+    (counter.load(Ordering::Relaxed), start.elapsed().as_micros())
+}
+
+fn bench_chained_tokio_current() -> (u64, u128) {
+    let counter = Arc::new(AtomicU64::new(0));
+    let c2 = counter.clone();
+    let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
+    let start = Instant::now();
+    let local = tokio::task::LocalSet::new();
+    local.block_on(&rt, async move {
+        // Use a oneshot done channel like tokio's own chained_spawn bench.
+        let (done_tx, done_rx) = tokio::sync::oneshot::channel();
+        fn iter(
+            c: Arc<AtomicU64>,
+            done: tokio::sync::oneshot::Sender<()>,
+            n: u64,
+        ) {
+            if n == 0 {
+                let _ = done.send(());
+            } else {
+                tokio::task::spawn_local(async move {
+                    c.fetch_add(1, Ordering::Relaxed);
+                    iter(c, done, n - 1);
+                });
+            }
+        }
+        iter(c2, done_tx, CHAIN_DEPTH);
+        let _ = done_rx.await;
+    });
+    (counter.load(Ordering::Relaxed), start.elapsed().as_micros())
+}
+
+fn bench_chained_tokio_multi() -> (u64, u128) {
+    let counter = Arc::new(AtomicU64::new(0));
+    let c2 = counter.clone();
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(available_threads())
+        .build()
+        .unwrap();
+    let start = Instant::now();
+    rt.block_on(async move {
+        let (done_tx, done_rx) = tokio::sync::oneshot::channel();
+        fn iter(c: Arc<AtomicU64>, done: tokio::sync::oneshot::Sender<()>, n: u64) {
+            if n == 0 {
+                let _ = done.send(());
+            } else {
+                tokio::spawn(async move {
+                    c.fetch_add(1, Ordering::Relaxed);
+                    iter(c, done, n - 1);
+                });
+            }
+        }
+        iter(c2, done_tx, CHAIN_DEPTH);
+        let _ = done_rx.await;
+    });
+    (counter.load(Ordering::Relaxed), start.elapsed().as_micros())
+}
+
+// ---------------------------------------------------------------------------
+// 2. yield_many — 200 actors × 1000 yields
+// ---------------------------------------------------------------------------
+
+const YIELD_TASKS: u64 = 200;
+const YIELD_ROUNDS: u64 = 1_000;
+
+fn bench_yield_smarm(threads: usize) -> (u64, u128) {
+    let start = Instant::now();
+    smarm::runtime::init(bench_cfg(threads)).run(|| {
+        let mut handles = Vec::new();
+        for _ in 0..YIELD_TASKS {
+            handles.push(smarm::spawn(|| {
+                for _ in 0..YIELD_ROUNDS {
+                    smarm::yield_now();
+                }
+            }));
+        }
+        for h in handles {
+            h.join().unwrap();
+        }
+    });
+    (YIELD_TASKS * YIELD_ROUNDS, start.elapsed().as_micros())
+}
+
+fn bench_yield_tokio_current() -> (u64, u128) {
+    let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
+    let start = Instant::now();
+    let local = tokio::task::LocalSet::new();
+    local.block_on(&rt, async move {
+        let mut handles = Vec::new();
+        for _ in 0..YIELD_TASKS {
+            handles.push(tokio::task::spawn_local(async move {
+                for _ in 0..YIELD_ROUNDS {
+                    tokio::task::yield_now().await;
+                }
+            }));
+        }
+        for h in handles {
+            let _ = h.await;
+        }
+    });
+    (YIELD_TASKS * YIELD_ROUNDS, start.elapsed().as_micros())
+}
+
+fn bench_yield_tokio_multi() -> (u64, u128) {
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(available_threads())
+        .build()
+        .unwrap();
+    let start = Instant::now();
+    rt.block_on(async move {
+        let mut handles = Vec::new();
+        for _ in 0..YIELD_TASKS {
+            handles.push(tokio::spawn(async move {
+                for _ in 0..YIELD_ROUNDS {
+                    tokio::task::yield_now().await;
+                }
+            }));
+        }
+        for h in handles {
+            let _ = h.await;
+        }
+    });
+    (YIELD_TASKS * YIELD_ROUNDS, start.elapsed().as_micros())
+}
+
+// ---------------------------------------------------------------------------
+// 3. fan_out_compute — primes, same shape as multi_scheduler::primes
+// ---------------------------------------------------------------------------
+
+const PRIME_N: u64 = 400_000;
+const PRIME_WORKERS: u64 = 64;
+
+fn is_prime(n: u64) -> bool {
+    if n < 2 { return false; }
+    if n < 4 { return true; }
+    if n % 2 == 0 { return false; }
+    let mut i = 3u64;
+    while i * i <= n { if n % i == 0 { return false; } i += 2; }
+    true
+}
+
+fn count_primes(lo: u64, hi: u64) -> u64 {
+    (lo..hi).filter(|&n| is_prime(n)).count() as u64
+}
+
+fn primes_slice(w: u64) -> (u64, u64) {
+    let per = PRIME_N / PRIME_WORKERS;
+    let lo = w * per;
+    let hi = if w + 1 == PRIME_WORKERS { PRIME_N } else { lo + per };
+    (lo, hi)
+}
+
+fn bench_primes_smarm(threads: usize) -> (u64, u128) {
+    let total = Arc::new(AtomicU64::new(0));
+    let t2 = total.clone();
+    let start = Instant::now();
+    smarm::runtime::init(bench_cfg(threads)).run(move || {
+        let mut handles = Vec::new();
+        for w in 0..PRIME_WORKERS {
+            let (lo, hi) = primes_slice(w);
+            let tc = t2.clone();
+            handles.push(smarm::spawn(move || {
+                tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed);
+            }));
+        }
+        for h in handles { h.join().unwrap(); }
+    });
+    (total.load(Ordering::Relaxed), start.elapsed().as_micros())
+}
+
+fn bench_primes_tokio_current() -> (u64, u128) {
+    let total = Arc::new(AtomicU64::new(0));
+    let t2 = total.clone();
+    let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
+    let start = Instant::now();
+    let local = tokio::task::LocalSet::new();
+    local.block_on(&rt, async move {
+        let mut handles = Vec::new();
+        for w in 0..PRIME_WORKERS {
+            let (lo, hi) = primes_slice(w);
+            let tc = t2.clone();
+            handles.push(tokio::task::spawn_local(async move {
+                tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed);
+            }));
+        }
+        for h in handles { let _ = h.await; }
+    });
+    (total.load(Ordering::Relaxed), start.elapsed().as_micros())
+}
+
+fn bench_primes_tokio_multi() -> (u64, u128) {
+    let total = Arc::new(AtomicU64::new(0));
+    let t2 = total.clone();
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(available_threads())
+        .build()
+        .unwrap();
+    let start = Instant::now();
+    rt.block_on(async move {
+        let mut handles = Vec::new();
+        for w in 0..PRIME_WORKERS {
+            let (lo, hi) = primes_slice(w);
+            let tc = t2.clone();
+            handles.push(tokio::spawn(async move {
+                tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed);
+            }));
+        }
+        for h in handles { let _ = h.await; }
+    });
+    (total.load(Ordering::Relaxed), start.elapsed().as_micros())
+}
+
+// ---------------------------------------------------------------------------
+// 4. ping_pong_oneshot — 1000 rounds of spawn-pair-await
+// ---------------------------------------------------------------------------
+
+const PP_ROUNDS: u64 = 1_000;
+
+fn bench_pp_smarm(threads: usize) -> (u64, u128) {
+    let start = Instant::now();
+    smarm::runtime::init(bench_cfg(threads)).run(|| {
+        for _ in 0..PP_ROUNDS {
+            // smarm has no oneshot, so use a channel<()> per round — both
+            // sides spawn, A sends ping, B replies pong, A joins B.
+            let (tx_ping, rx_ping) = smarm::channel::<()>();
+            let (tx_pong, rx_pong) = smarm::channel::<()>();
+            let hb = smarm::spawn(move || {
+                rx_ping.recv().unwrap();
+                tx_pong.send(()).unwrap();
+            });
+            let ha = smarm::spawn(move || {
+                tx_ping.send(()).unwrap();
+                rx_pong.recv().unwrap();
+            });
+            ha.join().unwrap();
+            hb.join().unwrap();
+        }
+    });
+    (PP_ROUNDS, start.elapsed().as_micros())
+}
+
+fn bench_pp_tokio_current() -> (u64, u128) {
+    let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
+    let start = Instant::now();
+    let local = tokio::task::LocalSet::new();
+    local.block_on(&rt, async move {
+        for _ in 0..PP_ROUNDS {
+            let (tx1, rx1) = tokio::sync::oneshot::channel::<()>();
+            let (tx2, rx2) = tokio::sync::oneshot::channel::<()>();
+            let hb = tokio::task::spawn_local(async move {
+                rx1.await.unwrap();
+                tx2.send(()).unwrap();
+            });
+            let ha = tokio::task::spawn_local(async move {
+                tx1.send(()).unwrap();
+                rx2.await.unwrap();
+            });
+            let _ = ha.await;
+            let _ = hb.await;
+        }
+    });
+    (PP_ROUNDS, start.elapsed().as_micros())
+}
+
+fn bench_pp_tokio_multi() -> (u64, u128) {
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(available_threads())
+        .build()
+        .unwrap();
+    let start = Instant::now();
+    rt.block_on(async move {
+        for _ in 0..PP_ROUNDS {
+            let (tx1, rx1) = tokio::sync::oneshot::channel::<()>();
+            let (tx2, rx2) = tokio::sync::oneshot::channel::<()>();
+            let hb = tokio::spawn(async move {
+                rx1.await.unwrap();
+                tx2.send(()).unwrap();
+            });
+            let ha = tokio::spawn(async move {
+                tx1.send(()).unwrap();
+                rx2.await.unwrap();
+            });
+            let _ = ha.await;
+            let _ = hb.await;
+        }
+    });
+    (PP_ROUNDS, start.elapsed().as_micros())
+}
+
+// ---------------------------------------------------------------------------
+// main
+// ---------------------------------------------------------------------------
+
+
+// ---------------------------------------------------------------------------
+// Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars
+// so the sweep script can override the preemption knobs without recompiling.
+// ---------------------------------------------------------------------------
+
+fn bench_cfg(threads: usize) -> smarm::runtime::Config {
+    let mut cfg = smarm::runtime::Config::exact(threads);
+    if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") {
+        if let Ok(n) = v.parse::<u32>() { cfg = cfg.alloc_interval(n); }
+    }
+    if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") {
+        if let Ok(n) = v.parse::<u64>() { cfg = cfg.timeslice_cycles(n); }
+    }
+    cfg
+}
+
+fn main() {
+    let n = available_threads();
+    println!("smarm general benchmarks");
+    println!("available parallelism: {n} threads");
+    println!("ITERS={ITERS} (+1 warmup, discarded)");
+    println!(
+        "CHAIN_DEPTH={CHAIN_DEPTH}, YIELD_TASKS={YIELD_TASKS}×{YIELD_ROUNDS}, \
+         PRIME_N={PRIME_N}/{PRIME_WORKERS} workers, PP_ROUNDS={PP_ROUNDS}"
+    );
+
+    // ---- 1. chained_spawn ----
+    print_header(&format!("chained_spawn: depth {CHAIN_DEPTH}"));
+    run_n("smarm 1-thread", ITERS, || bench_chained_smarm(1));
+    run_n(&format!("smarm {n}-thread"), ITERS, || bench_chained_smarm(n));
+    run_n("tokio current_thread", ITERS, bench_chained_tokio_current);
+    run_n("tokio multi-thread", ITERS, bench_chained_tokio_multi);
+
+    // ---- 2. yield_many ----
+    print_header(&format!("yield_many: {YIELD_TASKS} tasks × {YIELD_ROUNDS} yields"));
+    run_n("smarm 1-thread", ITERS, || bench_yield_smarm(1));
+    run_n(&format!("smarm {n}-thread"), ITERS, || bench_yield_smarm(n));
+    run_n("tokio current_thread", ITERS, bench_yield_tokio_current);
+    run_n("tokio multi-thread", ITERS, bench_yield_tokio_multi);
+
+    // ---- 3. fan_out_compute ----
+    print_header(&format!("fan_out_compute: primes in [2, {PRIME_N}) across {PRIME_WORKERS}"));
+    run_n("smarm 1-thread", ITERS, || bench_primes_smarm(1));
+    run_n(&format!("smarm {n}-thread"), ITERS, || bench_primes_smarm(n));
+    run_n("tokio current_thread", ITERS, bench_primes_tokio_current);
+    run_n("tokio multi-thread", ITERS, bench_primes_tokio_multi);
+
+    // ---- 4. ping_pong_oneshot ----
+    print_header(&format!("ping_pong_oneshot: {PP_ROUNDS} rounds"));
+    run_n("smarm 1-thread", ITERS, || bench_pp_smarm(1));
+    run_n(&format!("smarm {n}-thread"), ITERS, || bench_pp_smarm(n));
+    run_n("tokio current_thread", ITERS, bench_pp_tokio_current);
+    run_n("tokio multi-thread", ITERS, bench_pp_tokio_multi);
+}
diff --git a/benches/smarm_favored.rs b/benches/smarm_favored.rs
index 2139de5..2659558 100644
--- a/benches/smarm_favored.rs
+++ b/benches/smarm_favored.rs
@@ -84,7 +84,7 @@ fn bench_recurse_smarm(threads: usize) -> (u64, u128) {
     let total = Arc::new(AtomicU64::new(0));
     let t2 = total.clone();
     let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
+    smarm::runtime::init(bench_cfg(threads)).run(move || {
         // Plain Rust recursion on the actor's own (growable) stack.
         fn recurse(c: &AtomicU64, n: u64) -> u64 {
             if n == 0 {
@@ -170,7 +170,7 @@ const HOT_YIELDS: u64 = 500_000;
 
 fn bench_hot_smarm() -> (u64, u128) {
     let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(1)).run(|| {
+    smarm::runtime::init(bench_cfg(1)).run(|| {
         let ha = smarm::spawn(|| {
             for _ in 0..HOT_YIELDS {
                 smarm::yield_now();
@@ -216,7 +216,7 @@ const UNCONT_MSGS: u64 = 1_000_000;
 
 fn bench_unc_smarm() -> (u64, u128) {
     let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(1)).run(|| {
+    smarm::runtime::init(bench_cfg(1)).run(|| {
         let (tx, rx) = smarm::channel::<u64>();
         let consumer = smarm::spawn(move || {
             let mut count = 0u64;
@@ -273,7 +273,7 @@ fn bench_panic_smarm(threads: usize) -> (u64, u128) {
     let ok2 = ok.clone();
     let err2 = err.clone();
     let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
+    smarm::runtime::init(bench_cfg(threads)).run(move || {
         let mut handles = Vec::new();
         for i in 0..PANIC_TASKS {
             handles.push(smarm::spawn(move || {
@@ -355,6 +355,23 @@ fn bench_panic_tokio_multi() -> (u64, u128) {
 // main
 // ---------------------------------------------------------------------------
 
+
+// ---------------------------------------------------------------------------
+// Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars
+// so the sweep script can override the preemption knobs without recompiling.
+// ---------------------------------------------------------------------------
+
+fn bench_cfg(threads: usize) -> smarm::runtime::Config {
+    let mut cfg = smarm::runtime::Config::exact(threads);
+    if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") {
+        if let Ok(n) = v.parse::<u32>() { cfg = cfg.alloc_interval(n); }
+    }
+    if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") {
+        if let Ok(n) = v.parse::<u64>() { cfg = cfg.timeslice_cycles(n); }
+    }
+    cfg
+}
+
 fn main() {
     let n = available_threads();
     println!("smarm smarm-favored benchmarks");
diff --git a/benches/sweep.py b/benches/sweep.py
new file mode 100755
index 0000000..a226a62
--- /dev/null
+++ b/benches/sweep.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python3
+"""
+smarm bench sweep + regression checker.
+
+Usage:
+    # Run a full knob sweep and print a comparison table:
+    python3 benches/sweep.py sweep
+
+    # Check the current build against the committed baseline:
+    python3 benches/sweep.py regress
+
+    # Run all benches once (default knobs) and print results:
+    python3 benches/sweep.py run
+
+The sweep grid is defined in SWEEP_GRID below.
+The regression baseline is loaded from benches/baseline.json.
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+
+REPO = Path(__file__).resolve().parent.parent
+
+# Bench files to run (primes + multi_scheduler omitted — legacy harness,
+# not part of the 12-bench suite, and insensitive to the preemption knobs).
+BENCHES = ["general", "tokio_favored", "smarm_favored"]
+
+# Knob sweep grid: (alloc_interval, timeslice_cycles)
+# alloc_interval: lower = check RDTSC more often = finer preemption
+# timeslice_cycles: lower = shorter timeslice = more cooperative
+SWEEP_GRID = [
+    (32,  150_000),
+    (64,  150_000),
+    (128, 150_000),   # default interval, shorter slice
+    (32,  300_000),
+    (64,  300_000),
+    (128, 300_000),   # <<< baseline (defaults)
+    (256, 300_000),
+    (512, 300_000),
+    (128, 600_000),
+    (128, 1_200_000),
+]
+
+# Regression threshold: warn if median is more than this % worse than baseline.
+REGRESSION_THRESHOLD_PCT = 10
+
+# ---------------------------------------------------------------------------
+# Parsing
+# ---------------------------------------------------------------------------
+
+# Match lines like:
+#   "          smarm 1-thread |      1000000 |      31473 |      28719 |      33113"
+ROW_RE = re.compile(
+    r"^\s*(?P<name>[^|]+?)\s*\|\s*(?P<result>\d+)\s*\|\s*(?P<median>\d+)\s*\|\s*(?P<min>\d+)\s*\|\s*(?P<max>\d+)\s*$"
+)
+
+# Match section headers like:
+#   "  chained_spawn: depth 1000"
+HEADER_RE = re.compile(r"^\s{2}(?P<bench>[a-z_]+)[:—]")
+
+
+def parse_output(text: str) -> dict[str, dict[str, dict]]:
+    """
+    Returns {bench_name: {runtime_label: {median, min, max, result}}}.
+    bench_name is the snake_case name extracted from the section header.
+    """
+    results: dict[str, dict[str, dict]] = {}
+    current_bench = None
+
+    for line in text.splitlines():
+        hm = HEADER_RE.match(line)
+        if hm:
+            current_bench = hm.group("bench")
+            results.setdefault(current_bench, {})
+            continue
+
+        if current_bench is None:
+            continue
+
+        rm = ROW_RE.match(line)
+        if rm:
+            label = rm.group("name").strip()
+            results[current_bench][label] = {
+                "result": int(rm.group("result")),
+                "median": int(rm.group("median")),
+                "min":    int(rm.group("min")),
+                "max":    int(rm.group("max")),
+            }
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Running
+# ---------------------------------------------------------------------------
+
+def run_benches(env_extra: dict[str, str] | None = None) -> dict[str, dict[str, dict]]:
+    """Run all BENCHES and return merged parsed results."""
+    env = os.environ.copy()
+    if env_extra:
+        env.update(env_extra)
+
+    all_results: dict[str, dict[str, dict]] = {}
+
+    for bench in BENCHES:
+        cmd = ["cargo", "bench", "--bench", bench]
+        proc = subprocess.run(
+            cmd,
+            cwd=REPO,
+            env=env,
+            capture_output=True,
+            text=True,
+        )
+        if proc.returncode != 0:
+            print(f"  ERROR running {bench}:\n{proc.stderr[-800:]}", file=sys.stderr)
+            continue
+        parsed = parse_output(proc.stdout)
+        all_results.update(parsed)
+
+    return all_results
+
+
+# ---------------------------------------------------------------------------
+# Baseline JSON
+# ---------------------------------------------------------------------------
+
+BASELINE_PATH = REPO / "benches" / "baseline.json"
+
+
+def load_baseline() -> dict:
+    if not BASELINE_PATH.exists():
+        sys.exit(
+            f"No baseline found at {BASELINE_PATH}.\n"
+            "Run:  python3 benches/sweep.py run  then save the output manually,\n"
+            "or use --save-baseline with the run subcommand."
+        )
+    return json.loads(BASELINE_PATH.read_text())
+
+
+def save_baseline(results: dict) -> None:
+    BASELINE_PATH.write_text(json.dumps(results, indent=2))
+    print(f"Baseline saved to {BASELINE_PATH}")
+
+
+# ---------------------------------------------------------------------------
+# Regression check
+# ---------------------------------------------------------------------------
+
+def check_regressions(current: dict, baseline: dict) -> bool:
+    """
+    Compare current results to baseline. Print warnings for regressions.
+    Returns True if any regression found.
+    """
+    any_regression = False
+
+    for bench, runtimes in baseline.items():
+        cur_bench = current.get(bench, {})
+        for label, base_data in runtimes.items():
+            cur_data = cur_bench.get(label)
+            if cur_data is None:
+                print(f"  MISSING  {bench}/{label} — not present in current run")
+                any_regression = True
+                continue
+
+            base_med = base_data["median"]
+            cur_med  = cur_data["median"]
+            if base_med == 0:
+                continue
+
+            pct = (cur_med - base_med) / base_med * 100
+            if pct > REGRESSION_THRESHOLD_PCT:
+                print(
+                    f"  REGRESSION  {bench}/{label}: "
+                    f"{base_med} → {cur_med} µs  ({pct:+.1f}%)"
+                )
+                any_regression = True
+            elif pct < -REGRESSION_THRESHOLD_PCT:
+                print(
+                    f"  IMPROVEMENT {bench}/{label}: "
+                    f"{base_med} → {cur_med} µs  ({pct:+.1f}%)"
+                )
+
+    return any_regression
+
+
+# ---------------------------------------------------------------------------
+# Pretty print
+# ---------------------------------------------------------------------------
+
+def print_results(results: dict, label: str = "") -> None:
+    if label:
+        print(f"\n{'='*70}")
+        print(f"  {label}")
+        print(f"{'='*70}")
+    for bench, runtimes in sorted(results.items()):
+        print(f"\n  [{bench}]")
+        print(f"  {'runtime':>28} | {'result':>10} | {'median µs':>10} | {'min':>8} | {'max':>8}")
+        print(f"  {'-'*75}")
+        for rt_label, data in runtimes.items():
+            print(
+                f"  {rt_label:>28} | {data['result']:>10} | "
+                f"{data['median']:>10} | {data['min']:>8} | {data['max']:>8}"
+            )
+
+
+def print_sweep_table(sweep_results: list[tuple[int, int, dict]]) -> None:
+    """Print a compact comparison across sweep points for each bench/runtime."""
+    # Collect all bench/label pairs
+    all_keys: list[tuple[str, str]] = []
+    for _, _, results in sweep_results:
+        for bench, runtimes in results.items():
+            for label in runtimes:
+                key = (bench, label)
+                if key not in all_keys:
+                    all_keys.append(key)
+
+    # Header
+    col_w = 12
+    print(f"\n{'bench/runtime':<45}", end="")
+    for interval, cycles, _ in sweep_results:
+        tag = f"ai={interval}/tc={cycles//1000}k"
+        print(f"  {tag:>{col_w}}", end="")
+    print()
+    print("-" * (45 + (col_w + 2) * len(sweep_results)))
+
+    for bench, label in all_keys:
+        key_str = f"{bench}/{label}"
+        print(f"  {key_str:<43}", end="")
+        for _, _, results in sweep_results:
+            val = results.get(bench, {}).get(label, {}).get("median")
+            cell = str(val) if val is not None else "—"
+            print(f"  {cell:>{col_w}}", end="")
+        print()
+
+
+# ---------------------------------------------------------------------------
+# Subcommands
+# ---------------------------------------------------------------------------
+
+def cmd_run(args) -> None:
+    print("Building release binaries…")
+    subprocess.run(
+        ["cargo", "build", "--release", "--benches"],
+        cwd=REPO, check=True, capture_output=True,
+    )
+    print("Running benches…")
+    results = run_benches()
+    print_results(results, "Results (default knobs)")
+    if args.save_baseline:
+        save_baseline(results)
+
+
+def cmd_regress(args) -> None:
+    baseline = load_baseline()
+    print("Building release binaries…")
+    subprocess.run(
+        ["cargo", "build", "--release", "--benches"],
+        cwd=REPO, check=True, capture_output=True,
+    )
+    print("Running benches…")
+    current = run_benches()
+    print_results(current, "Current results")
+    print(f"\nRegression check (threshold: >{REGRESSION_THRESHOLD_PCT}% slower than baseline)")
+    print("-" * 60)
+    found = check_regressions(current, baseline)
+    if not found:
+        print("  No regressions detected.")
+    sys.exit(1 if found else 0)
+
+
+def cmd_sweep(args) -> None:
+    print("Building release binaries (once)…")
+    subprocess.run(
+        ["cargo", "build", "--release", "--benches"],
+        cwd=REPO, check=True, capture_output=True,
+    )
+    # Benches are pre-built; env vars change runtime behaviour, no recompile needed.
+    sweep_results: list[tuple[int, int, dict]] = []
+
+    for interval, cycles in SWEEP_GRID:
+        tag = f"alloc_interval={interval}, timeslice_cycles={cycles}"
+        print(f"  Running: {tag} …", flush=True)
+        env_extra = {
+            "SMARM_ALLOC_INTERVAL":    str(interval),
+            "SMARM_TIMESLICE_CYCLES":  str(cycles),
+        }
+        results = run_benches(env_extra)
+        sweep_results.append((interval, cycles, results))
+
+    print_sweep_table(sweep_results)
+
+    if args.save_csv:
+        import csv
+        rows = []
+        for interval, cycles, results in sweep_results:
+            for bench, runtimes in results.items():
+                for label, data in runtimes.items():
+                    rows.append({
+                        "alloc_interval": interval,
+                        "timeslice_cycles": cycles,
+                        "bench": bench,
+                        "runtime": label,
+                        **data,
+                    })
+        with open(args.save_csv, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=rows[0].keys())
+            writer.writeheader()
+            writer.writerows(rows)
+        print(f"\nCSV saved to {args.save_csv}")
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p_run = sub.add_parser("run", help="Run benches once with default knobs")
+    p_run.add_argument("--save-baseline", action="store_true",
+                       help="Save results as the regression baseline")
+    p_run.set_defaults(func=cmd_run)
+
+    p_reg = sub.add_parser("regress", help="Check current results against baseline")
+    p_reg.set_defaults(func=cmd_regress)
+
+    p_sw = sub.add_parser("sweep", help="Sweep preemption knobs and compare")
+    p_sw.add_argument("--save-csv", metavar="FILE",
+                      help="Write full sweep results to a CSV file")
+    p_sw.set_defaults(func=cmd_sweep)
+
+    args = parser.parse_args()
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benches/tokio_favored.rs b/benches/tokio_favored.rs
index 8082c15..dd96a00 100644
--- a/benches/tokio_favored.rs
+++ b/benches/tokio_favored.rs
@@ -83,7 +83,7 @@ fn bench_storm_smarm(threads: usize) -> (u64, u128) {
     let s2 = stop.clone();
 
     let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
+    smarm::runtime::init(bench_cfg(threads)).run(move || {
         // Background actors: yield in a tight loop until told to stop.
         let mut bg_handles = Vec::new();
         for _ in 0..STORM_BACKGROUND {
@@ -189,7 +189,7 @@ const MPSC_PER_PRODUCER: u64 = 10_000;
 
 fn bench_mpsc_smarm(threads: usize) -> (u64, u128) {
     let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(|| {
+    smarm::runtime::init(bench_cfg(threads)).run(|| {
         let (tx, rx) = smarm::channel::<u64>();
         let mut prod_handles = Vec::new();
         for p in 0..MPSC_PRODUCERS {
@@ -289,7 +289,7 @@ fn timer_delay_ms(i: u64) -> u64 {
 
 fn bench_timers_smarm(threads: usize) -> (u64, u128) {
     let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(|| {
+    smarm::runtime::init(bench_cfg(threads)).run(|| {
         let mut handles = Vec::new();
         for i in 0..TIMER_ACTORS {
             let ms = timer_delay_ms(i);
@@ -373,7 +373,7 @@ fn bench_scaling_smarm(threads: usize) -> (u64, u128) {
     let total = Arc::new(AtomicU64::new(0));
     let t2 = total.clone();
     let start = Instant::now();
-    smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
+    smarm::runtime::init(bench_cfg(threads)).run(move || {
         let mut handles = Vec::new();
         for w in 0..SCALING_WORKERS {
             let (lo, hi) = scaling_slice(w);
@@ -413,6 +413,23 @@ fn bench_scaling_tokio_multi(threads: usize) -> (u64, u128) {
 // main
 // ---------------------------------------------------------------------------
 
+
+// ---------------------------------------------------------------------------
+// Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars
+// so the sweep script can override the preemption knobs without recompiling.
+// ---------------------------------------------------------------------------
+
+fn bench_cfg(threads: usize) -> smarm::runtime::Config {
+    let mut cfg = smarm::runtime::Config::exact(threads);
+    if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") {
+        if let Ok(n) = v.parse::<u32>() { cfg = cfg.alloc_interval(n); }
+    }
+    if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") {
+        if let Ok(n) = v.parse::<u64>() { cfg = cfg.timeslice_cycles(n); }
+    }
+    cfg
+}
+
 fn main() {
     let n = available_threads();
     println!("smarm tokio-favored benchmarks");