From 3da6ffaa77c652d4a42cd1c60c892d93dbccb50f Mon Sep 17 00:00:00 2001 From: Bench Date: Sun, 24 May 2026 11:48:15 +0000 Subject: [PATCH] benches: expose preemption knobs + sweep runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Config API changes (src/preempt.rs, src/runtime.rs): - preempt: promote ALLOC_INTERVAL and TIMESLICE_CYCLES from bare consts to DEFAULT_ALLOC_INTERVAL / DEFAULT_TIMESLICE_CYCLES; store active values in thread-locals set on each actor resume so multiple runtimes can use different settings concurrently. - runtime: add alloc_interval / timeslice_cycles fields to Config; add Config::alloc_interval(n) and Config::timeslice_cycles(c) builder methods; thread the values through RuntimeInner to the reset_timeslice() call in schedule_loop. Bench changes: - Add bench_cfg(threads) helper to general/tokio_favored/smarm_favored that wraps Config::exact and reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars, so the sweep script can vary knobs without recompiling. Sweep tooling (benches/sweep.py): - 'run': run the 3-file bench suite once; --save-baseline persists JSON - 'regress': compare current run against baseline.json, exit 1 on any bench that regresses >10% vs stored medians - 'sweep': run the full SWEEP_GRID (10 points), print comparison table, optional --save-csv; binaries pre-built so no recompile per point Sweep results (10-point grid, 1-CPU sandbox): - The preemption knobs have very little effect on this single-CPU machine. Most benches move <5% across the entire grid. - Longer timeslices (tc=600k, tc=1200k) reliably hurt spawn_storm_busy (+11-15%) and catch_unwind_panics (+10-12%) because actors hold the scheduler mutex longer per timeslice, stalling the storm of joinable tasks. - Shorter timeslices (tc=150k) give a small improvement on many_timers (-3-4%) and a wash everywhere else. - yield_in_hot_loop and uncontended_channel are essentially flat across all knobs — both are scheduling-dominated and call yield_now explicitly, so the RDTSC-driven preemption path is irrelevant. - Conclusion: the knobs matter primarily under contention (multi-core). Re-run sweep on a multi-core machine before drawing tuning conclusions. --- .../baseline-output/sweep/ai128_tc1200k.txt | 126 +++++ .../baseline-output/sweep/ai128_tc150k.txt | 126 +++++ .../baseline-output/sweep/ai128_tc300k.txt | 126 +++++ .../baseline-output/sweep/ai128_tc600k.txt | 126 +++++ .../baseline-output/sweep/ai256_tc300k.txt | 126 +++++ benches/baseline-output/sweep/ai32_tc150k.txt | 126 +++++ benches/baseline-output/sweep/ai32_tc300k.txt | 126 +++++ .../baseline-output/sweep/ai512_tc300k.txt | 126 +++++ benches/baseline-output/sweep/ai64_tc150k.txt | 126 +++++ benches/baseline-output/sweep/ai64_tc300k.txt | 126 +++++ benches/baseline.json | 224 +++++++++ benches/general.rs | 442 ++++++++++++++++++ benches/smarm_favored.rs | 25 +- benches/sweep.py | 347 ++++++++++++++ benches/tokio_favored.rs | 25 +- 15 files changed, 2315 insertions(+), 8 deletions(-) create mode 100644 benches/baseline-output/sweep/ai128_tc1200k.txt create mode 100644 benches/baseline-output/sweep/ai128_tc150k.txt create mode 100644 benches/baseline-output/sweep/ai128_tc300k.txt create mode 100644 benches/baseline-output/sweep/ai128_tc600k.txt create mode 100644 benches/baseline-output/sweep/ai256_tc300k.txt create mode 100644 benches/baseline-output/sweep/ai32_tc150k.txt create mode 100644 benches/baseline-output/sweep/ai32_tc300k.txt create mode 100644 benches/baseline-output/sweep/ai512_tc300k.txt create mode 100644 benches/baseline-output/sweep/ai64_tc150k.txt create mode 100644 benches/baseline-output/sweep/ai64_tc300k.txt create mode 100644 benches/baseline.json create mode 100644 benches/general.rs create mode 100755 benches/sweep.py diff --git a/benches/baseline-output/sweep/ai128_tc1200k.txt b/benches/baseline-output/sweep/ai128_tc1200k.txt new file mode 100644 index 0000000..b268001 --- /dev/null +++ b/benches/baseline-output/sweep/ai128_tc1200k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8720 | 8526 | 9319 + smarm 1-thread | 1000 | 8662 | 8571 | 8991 + tokio current_thread | 1000 | 123 | 123 | 152 + tokio multi-thread | 1000 | 188 | 184 | 230 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 41530 | 41242 | 43501 + smarm 1-thread | 200000 | 41575 | 41187 | 43323 + tokio current_thread | 200000 | 15098 | 15020 | 15348 + tokio multi-thread | 200000 | 15900 | 15827 | 16012 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29573 | 29435 | 31647 + smarm 1-thread | 33860 | 29521 | 29453 | 29847 + tokio current_thread | 33860 | 28495 | 28441 | 30150 + tokio multi-thread | 33860 | 34384 | 34297 | 34745 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 17190 | 16994 | 17541 + smarm 1-thread | 1000 | 17078 | 16916 | 19139 + tokio current_thread | 1000 | 899 | 896 | 1000 + tokio multi-thread | 1000 | 4198 | 4116 | 4573 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 138556 | 136165 | 140947 + smarm 1-thread | 10000 | 140223 | 136325 | 146781 + tokio current_thread | 10000 | 2671 | 2622 | 2913 + tokio multi-thread | 10000 | 6004 | 4360 | 12576 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9051 | 8967 | 11152 + smarm 1-thread | 320000 | 9058 | 9008 | 9998 + tokio current_thread | 320000 | 17375 | 17131 | 18514 + tokio multi-thread | 320000 | 17955 | 17452 | 18508 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 156969 | 153124 | 167711 + smarm 1-thread | 10000 | 150638 | 146070 | 168286 + tokio current_thread | 10000 | 13823 | 13482 | 14796 + tokio multi-thread | 10000 | 15034 | 14425 | 15320 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30075 | 29707 | 30720 + tokio multi 1-thread | 33860 | 29060 | 28835 | 44378 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 86 | 79 | 130 + smarm 1-thread | 1 | 83 | 78 | 146 + tokio current_thread | 1 | 25 | 25 | 31 + tokio multi-thread | 1 | 49 | 46 | 85 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 190902 | 187600 | 194333 + tokio current_thread | 1000000 | 150279 | 148175 | 188184 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 27687 | 27198 | 29555 + tokio current_thread | 1000000 | 54465 | 54048 | 55954 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 160308 | 154365 | 167009 + smarm 1-thread | 10000 | 158662 | 155458 | 168896 + tokio current_thread | 10000 | 267762 | 260876 | 294092 + tokio multi-thread | 10000 | 275097 | 269344 | 287681 diff --git a/benches/baseline-output/sweep/ai128_tc150k.txt b/benches/baseline-output/sweep/ai128_tc150k.txt new file mode 100644 index 0000000..f5a95b0 --- /dev/null +++ b/benches/baseline-output/sweep/ai128_tc150k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8596 | 8491 | 8805 + smarm 1-thread | 1000 | 8552 | 8461 | 9003 + tokio current_thread | 1000 | 125 | 125 | 260 + tokio multi-thread | 1000 | 190 | 184 | 338 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 41885 | 41112 | 43292 + smarm 1-thread | 200000 | 42174 | 41063 | 43145 + tokio current_thread | 200000 | 15195 | 15010 | 15589 + tokio multi-thread | 200000 | 16037 | 15869 | 17057 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29872 | 29629 | 31596 + smarm 1-thread | 33860 | 29776 | 29528 | 30003 + tokio current_thread | 33860 | 28705 | 28605 | 30287 + tokio multi-thread | 33860 | 34655 | 34503 | 36596 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 16898 | 16574 | 17386 + smarm 1-thread | 1000 | 16871 | 16677 | 18467 + tokio current_thread | 1000 | 897 | 857 | 991 + tokio multi-thread | 1000 | 4325 | 4228 | 4458 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 133462 | 129526 | 138685 + smarm 1-thread | 10000 | 130118 | 127633 | 142344 + tokio current_thread | 10000 | 2713 | 2608 | 2831 + tokio multi-thread | 10000 | 7367 | 4345 | 11741 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9077 | 8944 | 9287 + smarm 1-thread | 320000 | 9100 | 9033 | 10604 + tokio current_thread | 320000 | 17310 | 17122 | 18616 + tokio multi-thread | 320000 | 17484 | 17413 | 17748 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 140039 | 135577 | 145123 + smarm 1-thread | 10000 | 139931 | 135513 | 143841 + tokio current_thread | 10000 | 14524 | 14378 | 14564 + tokio multi-thread | 10000 | 15066 | 14677 | 15336 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29620 | 29511 | 31347 + tokio multi 1-thread | 33860 | 29046 | 28817 | 29687 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 94 | 79 | 371 + smarm 1-thread | 1 | 183 | 83 | 317 + tokio current_thread | 1 | 25 | 25 | 31 + tokio multi-thread | 1 | 54 | 41 | 71 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 189034 | 187674 | 192204 + tokio current_thread | 1000000 | 151106 | 149564 | 155601 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 26949 | 26838 | 30868 + tokio current_thread | 1000000 | 52984 | 52149 | 55141 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 145860 | 143015 | 152734 + smarm 1-thread | 10000 | 144550 | 141592 | 149247 + tokio current_thread | 10000 | 267500 | 265301 | 278751 + tokio multi-thread | 10000 | 275320 | 268986 | 286891 diff --git a/benches/baseline-output/sweep/ai128_tc300k.txt b/benches/baseline-output/sweep/ai128_tc300k.txt new file mode 100644 index 0000000..b62387c --- /dev/null +++ b/benches/baseline-output/sweep/ai128_tc300k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8469 | 8414 | 8717 + smarm 1-thread | 1000 | 8625 | 8479 | 10212 + tokio current_thread | 1000 | 124 | 123 | 175 + tokio multi-thread | 1000 | 194 | 184 | 317 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 41949 | 41419 | 43784 + smarm 1-thread | 200000 | 42005 | 41491 | 45224 + tokio current_thread | 200000 | 15139 | 15049 | 16352 + tokio multi-thread | 200000 | 15985 | 15931 | 16306 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29640 | 29515 | 31229 + smarm 1-thread | 33860 | 29777 | 29642 | 30056 + tokio current_thread | 33860 | 28704 | 28584 | 30317 + tokio multi-thread | 33860 | 34870 | 34569 | 35876 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 17098 | 16968 | 18688 + smarm 1-thread | 1000 | 16918 | 16736 | 17326 + tokio current_thread | 1000 | 915 | 882 | 1000 + tokio multi-thread | 1000 | 4371 | 4265 | 4834 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 127075 | 124760 | 130259 + smarm 1-thread | 10000 | 125976 | 125121 | 128728 + tokio current_thread | 10000 | 2703 | 2646 | 2807 + tokio multi-thread | 10000 | 7201 | 4267 | 12853 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9116 | 8985 | 9237 + smarm 1-thread | 320000 | 9062 | 8947 | 10648 + tokio current_thread | 320000 | 17380 | 17192 | 18363 + tokio multi-thread | 320000 | 17854 | 17554 | 18219 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 137944 | 132081 | 141862 + smarm 1-thread | 10000 | 143773 | 137448 | 153703 + tokio current_thread | 10000 | 14174 | 13751 | 15079 + tokio multi-thread | 10000 | 15244 | 14625 | 16700 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30832 | 30082 | 33360 + tokio multi 1-thread | 33860 | 29736 | 29321 | 29958 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 84 | 78 | 122 + smarm 1-thread | 1 | 90 | 79 | 157 + tokio current_thread | 1 | 25 | 25 | 31 + tokio multi-thread | 1 | 48 | 47 | 62 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 190830 | 188562 | 196621 + tokio current_thread | 1000000 | 151537 | 150038 | 165825 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 27265 | 26969 | 29317 + tokio current_thread | 1000000 | 53894 | 53380 | 56189 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 145006 | 144092 | 149002 + smarm 1-thread | 10000 | 144417 | 142000 | 148224 + tokio current_thread | 10000 | 265376 | 260227 | 272279 + tokio multi-thread | 10000 | 277432 | 270860 | 283266 diff --git a/benches/baseline-output/sweep/ai128_tc600k.txt b/benches/baseline-output/sweep/ai128_tc600k.txt new file mode 100644 index 0000000..a1e563f --- /dev/null +++ b/benches/baseline-output/sweep/ai128_tc600k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8721 | 8398 | 8994 + smarm 1-thread | 1000 | 8587 | 8440 | 8810 + tokio current_thread | 1000 | 124 | 124 | 294 + tokio multi-thread | 1000 | 188 | 184 | 299 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 42588 | 42084 | 45080 + smarm 1-thread | 200000 | 42252 | 41963 | 43615 + tokio current_thread | 200000 | 15101 | 14994 | 15573 + tokio multi-thread | 200000 | 15979 | 15890 | 16356 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29686 | 29491 | 31263 + smarm 1-thread | 33860 | 29841 | 29586 | 30570 + tokio current_thread | 33860 | 28652 | 28510 | 30359 + tokio multi-thread | 33860 | 34677 | 34461 | 35318 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 16909 | 16579 | 20782 + smarm 1-thread | 1000 | 16888 | 16537 | 20808 + tokio current_thread | 1000 | 925 | 911 | 1021 + tokio multi-thread | 1000 | 4192 | 4079 | 4531 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 145813 | 142042 | 152501 + smarm 1-thread | 10000 | 145119 | 141282 | 161294 + tokio current_thread | 10000 | 2968 | 2899 | 3231 + tokio multi-thread | 10000 | 6288 | 4289 | 12226 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9662 | 9254 | 11370 + smarm 1-thread | 320000 | 9673 | 9331 | 9989 + tokio current_thread | 320000 | 18015 | 17334 | 21096 + tokio multi-thread | 320000 | 18384 | 17837 | 19534 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 160492 | 154795 | 180307 + smarm 1-thread | 10000 | 161716 | 156498 | 191986 + tokio current_thread | 10000 | 13895 | 13576 | 14913 + tokio multi-thread | 10000 | 15074 | 14665 | 16070 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30001 | 29600 | 38039 + tokio multi 1-thread | 33860 | 29419 | 28906 | 30079 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 91 | 79 | 186 + smarm 1-thread | 1 | 87 | 81 | 131 + tokio current_thread | 1 | 25 | 25 | 103 + tokio multi-thread | 1 | 56 | 47 | 64 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 190023 | 188250 | 193824 + tokio current_thread | 1000000 | 154681 | 152074 | 187328 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 27264 | 26772 | 29512 + tokio current_thread | 1000000 | 53324 | 51744 | 59282 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 155983 | 152595 | 161438 + smarm 1-thread | 10000 | 162122 | 156170 | 200357 + tokio current_thread | 10000 | 276303 | 264291 | 296266 + tokio multi-thread | 10000 | 271350 | 267654 | 285897 diff --git a/benches/baseline-output/sweep/ai256_tc300k.txt b/benches/baseline-output/sweep/ai256_tc300k.txt new file mode 100644 index 0000000..4bcd02d --- /dev/null +++ b/benches/baseline-output/sweep/ai256_tc300k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 9130 | 8720 | 10611 + smarm 1-thread | 1000 | 8808 | 8617 | 9659 + tokio current_thread | 1000 | 126 | 125 | 164 + tokio multi-thread | 1000 | 190 | 184 | 329 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 42270 | 41814 | 44737 + smarm 1-thread | 200000 | 42999 | 42104 | 45424 + tokio current_thread | 200000 | 15441 | 15196 | 16096 + tokio multi-thread | 200000 | 16249 | 16070 | 17620 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29813 | 29627 | 30176 + smarm 1-thread | 33860 | 29613 | 29440 | 31205 + tokio current_thread | 33860 | 28637 | 28406 | 29179 + tokio multi-thread | 33860 | 34472 | 34389 | 36092 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 16899 | 16804 | 17017 + smarm 1-thread | 1000 | 17001 | 16704 | 19533 + tokio current_thread | 1000 | 914 | 893 | 1021 + tokio multi-thread | 1000 | 4198 | 4136 | 4297 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 128621 | 126503 | 132268 + smarm 1-thread | 10000 | 131316 | 128354 | 133964 + tokio current_thread | 10000 | 2763 | 2696 | 2996 + tokio multi-thread | 10000 | 6023 | 4300 | 12908 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9225 | 9071 | 11272 + smarm 1-thread | 320000 | 9174 | 9028 | 9335 + tokio current_thread | 320000 | 17210 | 17100 | 18404 + tokio multi-thread | 320000 | 17550 | 17413 | 18080 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 136396 | 133330 | 142485 + smarm 1-thread | 10000 | 137374 | 134345 | 141168 + tokio current_thread | 10000 | 13789 | 13499 | 14621 + tokio multi-thread | 10000 | 15036 | 14729 | 15359 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30065 | 29819 | 32418 + tokio multi 1-thread | 33860 | 29501 | 28916 | 30057 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 94 | 81 | 257 + smarm 1-thread | 1 | 83 | 80 | 134 + tokio current_thread | 1 | 25 | 25 | 33 + tokio multi-thread | 1 | 57 | 48 | 109 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 188506 | 187971 | 190121 + tokio current_thread | 1000000 | 149663 | 148978 | 150733 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 26945 | 26703 | 29430 + tokio current_thread | 1000000 | 52332 | 51838 | 54062 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 146192 | 143776 | 150609 + smarm 1-thread | 10000 | 144012 | 140604 | 153892 + tokio current_thread | 10000 | 268341 | 260941 | 275404 + tokio multi-thread | 10000 | 272691 | 268094 | 307084 diff --git a/benches/baseline-output/sweep/ai32_tc150k.txt b/benches/baseline-output/sweep/ai32_tc150k.txt new file mode 100644 index 0000000..0aa66c6 --- /dev/null +++ b/benches/baseline-output/sweep/ai32_tc150k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8653 | 8522 | 9163 + smarm 1-thread | 1000 | 8908 | 8660 | 10606 + tokio current_thread | 1000 | 124 | 123 | 175 + tokio multi-thread | 1000 | 244 | 184 | 340 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 42597 | 41857 | 43492 + smarm 1-thread | 200000 | 42621 | 42097 | 44386 + tokio current_thread | 200000 | 15368 | 15144 | 16484 + tokio multi-thread | 200000 | 16120 | 16012 | 19222 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30499 | 29657 | 33910 + smarm 1-thread | 33860 | 31190 | 30105 | 32675 + tokio current_thread | 33860 | 28748 | 28643 | 29398 + tokio multi-thread | 33860 | 34714 | 34499 | 36338 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 16990 | 16853 | 17540 + smarm 1-thread | 1000 | 16944 | 16740 | 18603 + tokio current_thread | 1000 | 937 | 921 | 1056 + tokio multi-thread | 1000 | 4342 | 4205 | 4549 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 130032 | 128075 | 153842 + smarm 1-thread | 10000 | 126396 | 125101 | 131406 + tokio current_thread | 10000 | 2685 | 2629 | 2841 + tokio multi-thread | 10000 | 6014 | 4126 | 11484 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9122 | 8987 | 9334 + smarm 1-thread | 320000 | 9073 | 8956 | 10151 + tokio current_thread | 320000 | 17259 | 17163 | 17673 + tokio multi-thread | 320000 | 22771 | 17709 | 24514 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 137844 | 134570 | 157034 + smarm 1-thread | 10000 | 141200 | 137494 | 156214 + tokio current_thread | 10000 | 14809 | 14024 | 16518 + tokio multi-thread | 10000 | 15089 | 14704 | 15331 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30880 | 29931 | 32667 + tokio multi 1-thread | 33860 | 29862 | 29116 | 31310 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 90 | 80 | 196 + smarm 1-thread | 1 | 87 | 79 | 126 + tokio current_thread | 1 | 25 | 25 | 53 + tokio multi-thread | 1 | 52 | 47 | 88 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 191187 | 187194 | 198269 + tokio current_thread | 1000000 | 152531 | 151113 | 154462 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 27413 | 27312 | 29463 + tokio current_thread | 1000000 | 53620 | 52594 | 55332 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 144199 | 141893 | 157984 + smarm 1-thread | 10000 | 144857 | 142722 | 152275 + tokio current_thread | 10000 | 268006 | 264666 | 274542 + tokio multi-thread | 10000 | 271827 | 268740 | 290301 diff --git a/benches/baseline-output/sweep/ai32_tc300k.txt b/benches/baseline-output/sweep/ai32_tc300k.txt new file mode 100644 index 0000000..1ec63b1 --- /dev/null +++ b/benches/baseline-output/sweep/ai32_tc300k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8950 | 8591 | 10655 + smarm 1-thread | 1000 | 9688 | 8657 | 11720 + tokio current_thread | 1000 | 123 | 123 | 256 + tokio multi-thread | 1000 | 192 | 177 | 314 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 42965 | 41667 | 44850 + smarm 1-thread | 200000 | 42881 | 41634 | 48864 + tokio current_thread | 200000 | 15112 | 14986 | 15484 + tokio multi-thread | 200000 | 16006 | 15915 | 16647 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29931 | 29750 | 31707 + smarm 1-thread | 33860 | 29977 | 29670 | 30996 + tokio current_thread | 33860 | 28615 | 28441 | 30188 + tokio multi-thread | 33860 | 34371 | 34330 | 35176 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 16753 | 16498 | 18516 + smarm 1-thread | 1000 | 16728 | 16599 | 16874 + tokio current_thread | 1000 | 940 | 933 | 1037 + tokio multi-thread | 1000 | 4317 | 4236 | 4427 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 132575 | 128629 | 136999 + smarm 1-thread | 10000 | 130313 | 127372 | 157234 + tokio current_thread | 10000 | 2689 | 2611 | 2833 + tokio multi-thread | 10000 | 11337 | 4288 | 12635 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9122 | 9000 | 11033 + smarm 1-thread | 320000 | 9143 | 9015 | 9333 + tokio current_thread | 320000 | 17705 | 17250 | 18111 + tokio multi-thread | 320000 | 18044 | 17621 | 19484 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 141925 | 135531 | 188381 + smarm 1-thread | 10000 | 139655 | 134291 | 146458 + tokio current_thread | 10000 | 13837 | 13621 | 14877 + tokio multi-thread | 10000 | 14992 | 14542 | 15237 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29687 | 29554 | 31408 + tokio multi 1-thread | 33860 | 28963 | 28742 | 30236 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 83 | 80 | 128 + smarm 1-thread | 1 | 86 | 77 | 149 + tokio current_thread | 1 | 25 | 25 | 50 + tokio multi-thread | 1 | 53 | 47 | 84 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 197474 | 194313 | 201690 + tokio current_thread | 1000000 | 149289 | 148575 | 154319 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 26884 | 26675 | 29436 + tokio current_thread | 1000000 | 52594 | 51941 | 54495 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 148321 | 146050 | 152943 + smarm 1-thread | 10000 | 147961 | 144521 | 152158 + tokio current_thread | 10000 | 264487 | 260848 | 274838 + tokio multi-thread | 10000 | 272103 | 265687 | 285209 diff --git a/benches/baseline-output/sweep/ai512_tc300k.txt b/benches/baseline-output/sweep/ai512_tc300k.txt new file mode 100644 index 0000000..7bd894b --- /dev/null +++ b/benches/baseline-output/sweep/ai512_tc300k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8574 | 8421 | 8729 + smarm 1-thread | 1000 | 8675 | 8401 | 12686 + tokio current_thread | 1000 | 125 | 125 | 148 + tokio multi-thread | 1000 | 188 | 184 | 291 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 42389 | 41316 | 46466 + smarm 1-thread | 200000 | 41776 | 41342 | 48940 + tokio current_thread | 200000 | 15168 | 15094 | 15658 + tokio multi-thread | 200000 | 15953 | 15862 | 17408 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29680 | 29572 | 30661 + smarm 1-thread | 33860 | 29816 | 29597 | 30401 + tokio current_thread | 33860 | 28657 | 28581 | 29488 + tokio multi-thread | 33860 | 34837 | 34529 | 37270 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 16735 | 16601 | 17444 + smarm 1-thread | 1000 | 16702 | 16500 | 17184 + tokio current_thread | 1000 | 898 | 873 | 994 + tokio multi-thread | 1000 | 4343 | 4241 | 4448 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 128408 | 126199 | 133268 + smarm 1-thread | 10000 | 131599 | 129387 | 135080 + tokio current_thread | 10000 | 2718 | 2661 | 2981 + tokio multi-thread | 10000 | 7264 | 4608 | 11583 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9289 | 9039 | 9751 + smarm 1-thread | 320000 | 9510 | 9157 | 9677 + tokio current_thread | 320000 | 17550 | 17290 | 18578 + tokio multi-thread | 320000 | 18336 | 17527 | 18989 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 139111 | 136105 | 146606 + smarm 1-thread | 10000 | 137302 | 133316 | 141350 + tokio current_thread | 10000 | 13720 | 13455 | 14607 + tokio multi-thread | 10000 | 14964 | 14546 | 15400 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30048 | 29705 | 31530 + tokio multi 1-thread | 33860 | 28894 | 28682 | 30094 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 93 | 81 | 161 + smarm 1-thread | 1 | 103 | 80 | 178 + tokio current_thread | 1 | 25 | 25 | 28 + tokio multi-thread | 1 | 53 | 47 | 74 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 188726 | 187640 | 192658 + tokio current_thread | 1000000 | 149332 | 148133 | 155745 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 27630 | 27086 | 29749 + tokio current_thread | 1000000 | 54225 | 53355 | 56307 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 144934 | 143038 | 163552 + smarm 1-thread | 10000 | 146614 | 143653 | 151325 + tokio current_thread | 10000 | 266330 | 263523 | 271639 + tokio multi-thread | 10000 | 274729 | 266323 | 285114 diff --git a/benches/baseline-output/sweep/ai64_tc150k.txt b/benches/baseline-output/sweep/ai64_tc150k.txt new file mode 100644 index 0000000..144c39f --- /dev/null +++ b/benches/baseline-output/sweep/ai64_tc150k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8849 | 8486 | 9224 + smarm 1-thread | 1000 | 8841 | 8477 | 9108 + tokio current_thread | 1000 | 124 | 124 | 219 + tokio multi-thread | 1000 | 187 | 184 | 283 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 41681 | 41278 | 43685 + smarm 1-thread | 200000 | 41721 | 41218 | 42261 + tokio current_thread | 200000 | 14969 | 14940 | 15051 + tokio multi-thread | 200000 | 16004 | 15868 | 17569 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 29679 | 29516 | 30105 + smarm 1-thread | 33860 | 29677 | 29594 | 31365 + tokio current_thread | 33860 | 28656 | 28572 | 29239 + tokio multi-thread | 33860 | 34783 | 34617 | 36531 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 17009 | 16822 | 17418 + smarm 1-thread | 1000 | 16866 | 16723 | 17315 + tokio current_thread | 1000 | 880 | 871 | 1035 + tokio multi-thread | 1000 | 4263 | 4178 | 4391 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 126566 | 124995 | 130402 + smarm 1-thread | 10000 | 128278 | 126209 | 135156 + tokio current_thread | 10000 | 2680 | 2640 | 2787 + tokio multi-thread | 10000 | 7411 | 4393 | 12421 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9073 | 8937 | 9324 + smarm 1-thread | 320000 | 9120 | 9018 | 9263 + tokio current_thread | 320000 | 17245 | 17180 | 17574 + tokio multi-thread | 320000 | 18518 | 17685 | 19621 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 141855 | 135415 | 145810 + smarm 1-thread | 10000 | 138265 | 135535 | 142346 + tokio current_thread | 10000 | 14441 | 13453 | 14650 + tokio multi-thread | 10000 | 14956 | 14529 | 15451 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30033 | 29659 | 31803 + tokio multi 1-thread | 33860 | 29078 | 28963 | 30231 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 83 | 79 | 132 + smarm 1-thread | 1 | 85 | 78 | 146 + tokio current_thread | 1 | 25 | 25 | 73 + tokio multi-thread | 1 | 51 | 47 | 64 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 191352 | 188830 | 196235 + tokio current_thread | 1000000 | 152382 | 150674 | 187815 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 27552 | 27099 | 30612 + tokio current_thread | 1000000 | 53160 | 52436 | 55255 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 145243 | 143291 | 173727 + smarm 1-thread | 10000 | 145242 | 142819 | 148457 + tokio current_thread | 10000 | 266471 | 262904 | 269145 + tokio multi-thread | 10000 | 274195 | 269312 | 286111 diff --git a/benches/baseline-output/sweep/ai64_tc300k.txt b/benches/baseline-output/sweep/ai64_tc300k.txt new file mode 100644 index 0000000..691f170 --- /dev/null +++ b/benches/baseline-output/sweep/ai64_tc300k.txt @@ -0,0 +1,126 @@ +smarm general benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +CHAIN_DEPTH=1000, YIELD_TASKS=200×1000, PRIME_N=400000/64 workers, PP_ROUNDS=1000 + +================================================================================ + chained_spawn: depth 1000 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 8735 | 8508 | 9314 + smarm 1-thread | 1000 | 8808 | 8506 | 10346 + tokio current_thread | 1000 | 123 | 123 | 172 + tokio multi-thread | 1000 | 190 | 184 | 273 + +================================================================================ + yield_many: 200 tasks × 1000 yields +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 200000 | 41619 | 41255 | 43489 + smarm 1-thread | 200000 | 41544 | 41196 | 43259 + tokio current_thread | 200000 | 15382 | 15233 | 16007 + tokio multi-thread | 200000 | 16095 | 15999 | 16296 + +================================================================================ + fan_out_compute: primes in [2, 400000) across 64 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30032 | 29838 | 31744 + smarm 1-thread | 33860 | 29782 | 29653 | 30601 + tokio current_thread | 33860 | 28754 | 28614 | 30700 + tokio multi-thread | 33860 | 34988 | 34570 | 36871 + +================================================================================ + ping_pong_oneshot: 1000 rounds +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000 | 17088 | 16868 | 18654 + smarm 1-thread | 1000 | 16951 | 16797 | 17783 + tokio current_thread | 1000 | 932 | 899 | 1019 + tokio multi-thread | 1000 | 4340 | 4273 | 5245 +smarm tokio-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +STORM_BACKGROUND=8, STORM_SPAWN=10000, MPSC=32×10000, TIMER_ACTORS=10000 (1–10 ms), SCALING_N=400000/64 + +================================================================================ + spawn_storm_busy: 8 bg yielders + 10000 zero-work spawns +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 129009 | 127353 | 132990 + smarm 1-thread | 10000 | 128009 | 126554 | 140472 + tokio current_thread | 10000 | 2666 | 2624 | 2794 + tokio multi-thread | 10000 | 5974 | 4368 | 11517 + +================================================================================ + mpsc_contention: 32 producers × 10000 msgs → 1 consumer +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 320000 | 9044 | 8970 | 10788 + smarm 1-thread | 320000 | 9087 | 8995 | 12500 + tokio current_thread | 320000 | 17185 | 17072 | 18440 + tokio multi-thread | 320000 | 17720 | 17394 | 19182 + +================================================================================ + many_timers: 10000 actors sleeping 1–10 ms +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 145819 | 140671 | 150512 + smarm 1-thread | 10000 | 139046 | 135846 | 146127 + tokio current_thread | 10000 | 13866 | 13522 | 14670 + tokio multi-thread | 10000 | 14900 | 14471 | 16378 + +================================================================================ + multi_thread_scaling: primes in [2, 400000) across 64 workers +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 33860 | 30695 | 29720 | 33196 + tokio multi 1-thread | 33860 | 29261 | 28895 | 31013 +smarm smarm-favored benchmarks +available parallelism: 1 threads +ITERS=15 (+1 warmup, discarded) +RECURSE_DEPTH=500, HOT_YIELDS=500000×2, UNCONT_MSGS=1000000, PANIC_TASKS=10000 + +================================================================================ + deep_recursion: depth 500 +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1 | 82 | 79 | 113 + smarm 1-thread | 1 | 85 | 78 | 143 + tokio current_thread | 1 | 25 | 25 | 56 + tokio multi-thread | 1 | 50 | 47 | 63 + +================================================================================ + yield_in_hot_loop: 2 actors × 500000 yields (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 188698 | 187922 | 192263 + tokio current_thread | 1000000 | 150231 | 148746 | 151723 + +================================================================================ + uncontended_channel: 1→1, 1000000 msgs (single thread) +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 1000000 | 28461 | 27638 | 30283 + tokio current_thread | 1000000 | 52224 | 51880 | 54732 + +================================================================================ + catch_unwind_panics: 10000 tasks, 50% panic +================================================================================ + runtime | result | median µs | min µs | max µs +-------------------------------------------------------------------------------- + smarm 1-thread | 10000 | 144604 | 143246 | 145585 + smarm 1-thread | 10000 | 148208 | 142691 | 151076 + tokio current_thread | 10000 | 265255 | 260637 | 271065 + tokio multi-thread | 10000 | 273131 | 271313 | 300420 diff --git a/benches/baseline.json b/benches/baseline.json new file mode 100644 index 0000000..0958bba --- /dev/null +++ b/benches/baseline.json @@ -0,0 +1,224 @@ +{ + "chained_spawn": { + "smarm 1-thread": { + "result": 1000, + "median": 8637, + "min": 8553, + "max": 8933 + }, + "tokio current_thread": { + "result": 1000, + "median": 124, + "min": 124, + "max": 153 + }, + "tokio multi-thread": { + "result": 1000, + "median": 188, + "min": 183, + "max": 229 + } + }, + "yield_many": { + "smarm 1-thread": { + "result": 200000, + "median": 41622, + "min": 41063, + "max": 44973 + }, + "tokio current_thread": { + "result": 200000, + "median": 15085, + "min": 15013, + "max": 15274 + }, + "tokio multi-thread": { + "result": 200000, + "median": 15964, + "min": 15880, + "max": 17959 + } + }, + "fan_out_compute": { + "smarm 1-thread": { + "result": 33860, + "median": 29727, + "min": 29491, + "max": 31634 + }, + "tokio current_thread": { + "result": 33860, + "median": 28503, + "min": 28391, + "max": 28866 + }, + "tokio multi-thread": { + "result": 33860, + "median": 34542, + "min": 34396, + "max": 36111 + } + }, + "ping_pong_oneshot": { + "smarm 1-thread": { + "result": 1000, + "median": 16848, + "min": 16633, + "max": 17301 + }, + "tokio current_thread": { + "result": 1000, + "median": 879, + "min": 868, + "max": 973 + }, + "tokio multi-thread": { + "result": 1000, + "median": 4328, + "min": 4223, + "max": 4461 + } + }, + "spawn_storm_busy": { + "smarm 1-thread": { + "result": 10000, + "median": 130058, + "min": 126790, + "max": 134475 + }, + "tokio current_thread": { + "result": 10000, + "median": 2772, + "min": 2641, + "max": 4367 + }, + "tokio multi-thread": { + "result": 10000, + "median": 7462, + "min": 4469, + "max": 12892 + } + }, + "mpsc_contention": { + "smarm 1-thread": { + "result": 320000, + "median": 9260, + "min": 9095, + "max": 10081 + }, + "tokio current_thread": { + "result": 320000, + "median": 17570, + "min": 17213, + "max": 18276 + }, + "tokio multi-thread": { + "result": 320000, + "median": 17593, + "min": 17452, + "max": 19564 + } + }, + "many_timers": { + "smarm 1-thread": { + "result": 10000, + "median": 135806, + "min": 132573, + "max": 141651 + }, + "tokio current_thread": { + "result": 10000, + "median": 14462, + "min": 13555, + "max": 15457 + }, + "tokio multi-thread": { + "result": 10000, + "median": 15011, + "min": 14655, + "max": 15368 + } + }, + "multi_thread_scaling": { + "smarm 1-thread": { + "result": 33860, + "median": 30029, + "min": 29720, + "max": 31351 + }, + "tokio multi 1-thread": { + "result": 33860, + "median": 28983, + "min": 28908, + "max": 29323 + } + }, + "deep_recursion": { + "smarm 1-thread": { + "result": 1, + "median": 83, + "min": 78, + "max": 587 + }, + "tokio current_thread": { + "result": 1, + "median": 25, + "min": 25, + "max": 33 + }, + "tokio multi-thread": { + "result": 1, + "median": 59, + "min": 47, + "max": 205 + } + }, + "yield_in_hot_loop": { + "smarm 1-thread": { + "result": 1000000, + "median": 188753, + "min": 187007, + "max": 194366 + }, + "tokio current_thread": { + "result": 1000000, + "median": 153929, + "min": 152712, + "max": 158749 + } + }, + "uncontended_channel": { + "smarm 1-thread": { + "result": 1000000, + "median": 26811, + "min": 26498, + "max": 29069 + }, + "tokio current_thread": { + "result": 1000000, + "median": 51888, + "min": 51530, + "max": 52708 + } + }, + "catch_unwind_panics": { + "smarm 1-thread": { + "result": 10000, + "median": 142215, + "min": 140189, + "max": 143570 + }, + "tokio current_thread": { + "result": 10000, + "median": 682295, + "min": 670281, + "max": 700774 + }, + "tokio multi-thread": { + "result": 10000, + "median": 662688, + "min": 641453, + "max": 681868 + } + } +} \ No newline at end of file diff --git a/benches/general.rs b/benches/general.rs new file mode 100644 index 0000000..96fa9e7 --- /dev/null +++ b/benches/general.rs @@ -0,0 +1,442 @@ +//! General benchmarks — workloads where neither runtime has a structural +//! advantage. Both should be competitive; large gaps here indicate a real +//! difference in per-task or per-yield overhead. +//! +//! Workloads: +//! 1. chained_spawn — task N spawns N+1, depth 1000. Spawn+exit overhead in +//! a serial chain. Adapted from tokio's bench of the same +//! name. +//! 2. yield_many — 200 actors × 1000 yields. Pure scheduling throughput +//! with no allocation, no IO. Adapted from tokio. +//! 3. fan_out_compute— count primes in [2, 400_000) across 64 workers. Same +//! shape as multi_scheduler::primes but lives here for +//! completeness. +//! 4. ping_pong_oneshot — N rounds of (spawn pair, send oneshot, await). +//! Closer to a request/response workload than channel +//! ping-pong. + +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; +use std::time::Instant; + +// --------------------------------------------------------------------------- +// Shared harness +// --------------------------------------------------------------------------- + +const ITERS: u32 = 15; + +fn available_threads() -> usize { + std::thread::available_parallelism().map(|n| n.get()).unwrap_or(1) +} + +fn print_header(title: &str) { + println!("\n{}", "=".repeat(80)); + println!(" {title}"); + println!("{}", "=".repeat(80)); + println!( + "{:>26} | {:>12} | {:>10} | {:>10} | {:>10}", + "runtime", "result", "median µs", "min µs", "max µs" + ); + println!("{}", "-".repeat(80)); +} + +fn run_n (u64, u128)>(name: &str, n: u32, mut f: F) { + let mut times = Vec::new(); + let mut last = 0u64; + // One warmup iteration, discarded. + let _ = f(); + for _ in 0..n { + let (v, t) = f(); + times.push(t); + last = v; + } + times.sort_unstable(); + let median = times[times.len() / 2]; + let min = *times.iter().min().unwrap(); + let max = *times.iter().max().unwrap(); + println!( + "{:>26} | {:>12} | {:>10} | {:>10} | {:>10}", + name, last, median, min, max + ); +} + +// --------------------------------------------------------------------------- +// 1. chained_spawn — depth 1000 +// --------------------------------------------------------------------------- + +const CHAIN_DEPTH: u64 = 1_000; + +fn bench_chained_smarm(threads: usize) -> (u64, u128) { + let counter = Arc::new(AtomicU64::new(0)); + let c2 = counter.clone(); + let start = Instant::now(); + smarm::runtime::init(bench_cfg(threads)).run(move || { + // Fire-and-forget chain, matching tokio's bench shape: each link + // spawns the next link and exits immediately; depth 0 signals done + // via a channel. Crucially this does *not* nest joins on the + // spawner's stack — important because smarm actor stacks are a + // fixed 64 KiB. + let (tx, rx) = smarm::channel::<()>(); + fn iter(c: Arc, tx: smarm::Sender<()>, n: u64) { + if n == 0 { + tx.send(()).unwrap(); + } else { + let cc = c.clone(); + smarm::spawn(move || { + cc.fetch_add(1, Ordering::Relaxed); + iter(cc.clone(), tx, n - 1); + }); + // Caller exits; JoinHandle dropped, no parking. + } + } + iter(c2, tx, CHAIN_DEPTH); + rx.recv().unwrap(); + }); + (counter.load(Ordering::Relaxed), start.elapsed().as_micros()) +} + +fn bench_chained_tokio_current() -> (u64, u128) { + let counter = Arc::new(AtomicU64::new(0)); + let c2 = counter.clone(); + let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); + let start = Instant::now(); + let local = tokio::task::LocalSet::new(); + local.block_on(&rt, async move { + // Use a oneshot done channel like tokio's own chained_spawn bench. + let (done_tx, done_rx) = tokio::sync::oneshot::channel(); + fn iter( + c: Arc, + done: tokio::sync::oneshot::Sender<()>, + n: u64, + ) { + if n == 0 { + let _ = done.send(()); + } else { + tokio::task::spawn_local(async move { + c.fetch_add(1, Ordering::Relaxed); + iter(c, done, n - 1); + }); + } + } + iter(c2, done_tx, CHAIN_DEPTH); + let _ = done_rx.await; + }); + (counter.load(Ordering::Relaxed), start.elapsed().as_micros()) +} + +fn bench_chained_tokio_multi() -> (u64, u128) { + let counter = Arc::new(AtomicU64::new(0)); + let c2 = counter.clone(); + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(available_threads()) + .build() + .unwrap(); + let start = Instant::now(); + rt.block_on(async move { + let (done_tx, done_rx) = tokio::sync::oneshot::channel(); + fn iter(c: Arc, done: tokio::sync::oneshot::Sender<()>, n: u64) { + if n == 0 { + let _ = done.send(()); + } else { + tokio::spawn(async move { + c.fetch_add(1, Ordering::Relaxed); + iter(c, done, n - 1); + }); + } + } + iter(c2, done_tx, CHAIN_DEPTH); + let _ = done_rx.await; + }); + (counter.load(Ordering::Relaxed), start.elapsed().as_micros()) +} + +// --------------------------------------------------------------------------- +// 2. yield_many — 200 actors × 1000 yields +// --------------------------------------------------------------------------- + +const YIELD_TASKS: u64 = 200; +const YIELD_ROUNDS: u64 = 1_000; + +fn bench_yield_smarm(threads: usize) -> (u64, u128) { + let start = Instant::now(); + smarm::runtime::init(bench_cfg(threads)).run(|| { + let mut handles = Vec::new(); + for _ in 0..YIELD_TASKS { + handles.push(smarm::spawn(|| { + for _ in 0..YIELD_ROUNDS { + smarm::yield_now(); + } + })); + } + for h in handles { + h.join().unwrap(); + } + }); + (YIELD_TASKS * YIELD_ROUNDS, start.elapsed().as_micros()) +} + +fn bench_yield_tokio_current() -> (u64, u128) { + let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); + let start = Instant::now(); + let local = tokio::task::LocalSet::new(); + local.block_on(&rt, async move { + let mut handles = Vec::new(); + for _ in 0..YIELD_TASKS { + handles.push(tokio::task::spawn_local(async move { + for _ in 0..YIELD_ROUNDS { + tokio::task::yield_now().await; + } + })); + } + for h in handles { + let _ = h.await; + } + }); + (YIELD_TASKS * YIELD_ROUNDS, start.elapsed().as_micros()) +} + +fn bench_yield_tokio_multi() -> (u64, u128) { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(available_threads()) + .build() + .unwrap(); + let start = Instant::now(); + rt.block_on(async move { + let mut handles = Vec::new(); + for _ in 0..YIELD_TASKS { + handles.push(tokio::spawn(async move { + for _ in 0..YIELD_ROUNDS { + tokio::task::yield_now().await; + } + })); + } + for h in handles { + let _ = h.await; + } + }); + (YIELD_TASKS * YIELD_ROUNDS, start.elapsed().as_micros()) +} + +// --------------------------------------------------------------------------- +// 3. fan_out_compute — primes, same shape as multi_scheduler::primes +// --------------------------------------------------------------------------- + +const PRIME_N: u64 = 400_000; +const PRIME_WORKERS: u64 = 64; + +fn is_prime(n: u64) -> bool { + if n < 2 { return false; } + if n < 4 { return true; } + if n % 2 == 0 { return false; } + let mut i = 3u64; + while i * i <= n { if n % i == 0 { return false; } i += 2; } + true +} + +fn count_primes(lo: u64, hi: u64) -> u64 { + (lo..hi).filter(|&n| is_prime(n)).count() as u64 +} + +fn primes_slice(w: u64) -> (u64, u64) { + let per = PRIME_N / PRIME_WORKERS; + let lo = w * per; + let hi = if w + 1 == PRIME_WORKERS { PRIME_N } else { lo + per }; + (lo, hi) +} + +fn bench_primes_smarm(threads: usize) -> (u64, u128) { + let total = Arc::new(AtomicU64::new(0)); + let t2 = total.clone(); + let start = Instant::now(); + smarm::runtime::init(bench_cfg(threads)).run(move || { + let mut handles = Vec::new(); + for w in 0..PRIME_WORKERS { + let (lo, hi) = primes_slice(w); + let tc = t2.clone(); + handles.push(smarm::spawn(move || { + tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed); + })); + } + for h in handles { h.join().unwrap(); } + }); + (total.load(Ordering::Relaxed), start.elapsed().as_micros()) +} + +fn bench_primes_tokio_current() -> (u64, u128) { + let total = Arc::new(AtomicU64::new(0)); + let t2 = total.clone(); + let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); + let start = Instant::now(); + let local = tokio::task::LocalSet::new(); + local.block_on(&rt, async move { + let mut handles = Vec::new(); + for w in 0..PRIME_WORKERS { + let (lo, hi) = primes_slice(w); + let tc = t2.clone(); + handles.push(tokio::task::spawn_local(async move { + tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed); + })); + } + for h in handles { let _ = h.await; } + }); + (total.load(Ordering::Relaxed), start.elapsed().as_micros()) +} + +fn bench_primes_tokio_multi() -> (u64, u128) { + let total = Arc::new(AtomicU64::new(0)); + let t2 = total.clone(); + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(available_threads()) + .build() + .unwrap(); + let start = Instant::now(); + rt.block_on(async move { + let mut handles = Vec::new(); + for w in 0..PRIME_WORKERS { + let (lo, hi) = primes_slice(w); + let tc = t2.clone(); + handles.push(tokio::spawn(async move { + tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed); + })); + } + for h in handles { let _ = h.await; } + }); + (total.load(Ordering::Relaxed), start.elapsed().as_micros()) +} + +// --------------------------------------------------------------------------- +// 4. ping_pong_oneshot — 1000 rounds of spawn-pair-await +// --------------------------------------------------------------------------- + +const PP_ROUNDS: u64 = 1_000; + +fn bench_pp_smarm(threads: usize) -> (u64, u128) { + let start = Instant::now(); + smarm::runtime::init(bench_cfg(threads)).run(|| { + for _ in 0..PP_ROUNDS { + // smarm has no oneshot, so use a channel<()> per round — both + // sides spawn, A sends ping, B replies pong, A joins B. + let (tx_ping, rx_ping) = smarm::channel::<()>(); + let (tx_pong, rx_pong) = smarm::channel::<()>(); + let hb = smarm::spawn(move || { + rx_ping.recv().unwrap(); + tx_pong.send(()).unwrap(); + }); + let ha = smarm::spawn(move || { + tx_ping.send(()).unwrap(); + rx_pong.recv().unwrap(); + }); + ha.join().unwrap(); + hb.join().unwrap(); + } + }); + (PP_ROUNDS, start.elapsed().as_micros()) +} + +fn bench_pp_tokio_current() -> (u64, u128) { + let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); + let start = Instant::now(); + let local = tokio::task::LocalSet::new(); + local.block_on(&rt, async move { + for _ in 0..PP_ROUNDS { + let (tx1, rx1) = tokio::sync::oneshot::channel::<()>(); + let (tx2, rx2) = tokio::sync::oneshot::channel::<()>(); + let hb = tokio::task::spawn_local(async move { + rx1.await.unwrap(); + tx2.send(()).unwrap(); + }); + let ha = tokio::task::spawn_local(async move { + tx1.send(()).unwrap(); + rx2.await.unwrap(); + }); + let _ = ha.await; + let _ = hb.await; + } + }); + (PP_ROUNDS, start.elapsed().as_micros()) +} + +fn bench_pp_tokio_multi() -> (u64, u128) { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(available_threads()) + .build() + .unwrap(); + let start = Instant::now(); + rt.block_on(async move { + for _ in 0..PP_ROUNDS { + let (tx1, rx1) = tokio::sync::oneshot::channel::<()>(); + let (tx2, rx2) = tokio::sync::oneshot::channel::<()>(); + let hb = tokio::spawn(async move { + rx1.await.unwrap(); + tx2.send(()).unwrap(); + }); + let ha = tokio::spawn(async move { + tx1.send(()).unwrap(); + rx2.await.unwrap(); + }); + let _ = ha.await; + let _ = hb.await; + } + }); + (PP_ROUNDS, start.elapsed().as_micros()) +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- + + +// --------------------------------------------------------------------------- +// Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars +// so the sweep script can override the preemption knobs without recompiling. +// --------------------------------------------------------------------------- + +fn bench_cfg(threads: usize) -> smarm::runtime::Config { + let mut cfg = smarm::runtime::Config::exact(threads); + if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") { + if let Ok(n) = v.parse::() { cfg = cfg.alloc_interval(n); } + } + if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") { + if let Ok(n) = v.parse::() { cfg = cfg.timeslice_cycles(n); } + } + cfg +} + +fn main() { + let n = available_threads(); + println!("smarm general benchmarks"); + println!("available parallelism: {n} threads"); + println!("ITERS={ITERS} (+1 warmup, discarded)"); + println!( + "CHAIN_DEPTH={CHAIN_DEPTH}, YIELD_TASKS={YIELD_TASKS}×{YIELD_ROUNDS}, \ + PRIME_N={PRIME_N}/{PRIME_WORKERS} workers, PP_ROUNDS={PP_ROUNDS}" + ); + + // ---- 1. chained_spawn ---- + print_header(&format!("chained_spawn: depth {CHAIN_DEPTH}")); + run_n("smarm 1-thread", ITERS, || bench_chained_smarm(1)); + run_n(&format!("smarm {n}-thread"), ITERS, || bench_chained_smarm(n)); + run_n("tokio current_thread", ITERS, bench_chained_tokio_current); + run_n("tokio multi-thread", ITERS, bench_chained_tokio_multi); + + // ---- 2. yield_many ---- + print_header(&format!("yield_many: {YIELD_TASKS} tasks × {YIELD_ROUNDS} yields")); + run_n("smarm 1-thread", ITERS, || bench_yield_smarm(1)); + run_n(&format!("smarm {n}-thread"), ITERS, || bench_yield_smarm(n)); + run_n("tokio current_thread", ITERS, bench_yield_tokio_current); + run_n("tokio multi-thread", ITERS, bench_yield_tokio_multi); + + // ---- 3. fan_out_compute ---- + print_header(&format!("fan_out_compute: primes in [2, {PRIME_N}) across {PRIME_WORKERS}")); + run_n("smarm 1-thread", ITERS, || bench_primes_smarm(1)); + run_n(&format!("smarm {n}-thread"), ITERS, || bench_primes_smarm(n)); + run_n("tokio current_thread", ITERS, bench_primes_tokio_current); + run_n("tokio multi-thread", ITERS, bench_primes_tokio_multi); + + // ---- 4. ping_pong_oneshot ---- + print_header(&format!("ping_pong_oneshot: {PP_ROUNDS} rounds")); + run_n("smarm 1-thread", ITERS, || bench_pp_smarm(1)); + run_n(&format!("smarm {n}-thread"), ITERS, || bench_pp_smarm(n)); + run_n("tokio current_thread", ITERS, bench_pp_tokio_current); + run_n("tokio multi-thread", ITERS, bench_pp_tokio_multi); +} diff --git a/benches/smarm_favored.rs b/benches/smarm_favored.rs index 2139de5..2659558 100644 --- a/benches/smarm_favored.rs +++ b/benches/smarm_favored.rs @@ -84,7 +84,7 @@ fn bench_recurse_smarm(threads: usize) -> (u64, u128) { let total = Arc::new(AtomicU64::new(0)); let t2 = total.clone(); let start = Instant::now(); - smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || { + smarm::runtime::init(bench_cfg(threads)).run(move || { // Plain Rust recursion on the actor's own (growable) stack. fn recurse(c: &AtomicU64, n: u64) -> u64 { if n == 0 { @@ -170,7 +170,7 @@ const HOT_YIELDS: u64 = 500_000; fn bench_hot_smarm() -> (u64, u128) { let start = Instant::now(); - smarm::runtime::init(smarm::runtime::Config::exact(1)).run(|| { + smarm::runtime::init(bench_cfg(1)).run(|| { let ha = smarm::spawn(|| { for _ in 0..HOT_YIELDS { smarm::yield_now(); @@ -216,7 +216,7 @@ const UNCONT_MSGS: u64 = 1_000_000; fn bench_unc_smarm() -> (u64, u128) { let start = Instant::now(); - smarm::runtime::init(smarm::runtime::Config::exact(1)).run(|| { + smarm::runtime::init(bench_cfg(1)).run(|| { let (tx, rx) = smarm::channel::(); let consumer = smarm::spawn(move || { let mut count = 0u64; @@ -273,7 +273,7 @@ fn bench_panic_smarm(threads: usize) -> (u64, u128) { let ok2 = ok.clone(); let err2 = err.clone(); let start = Instant::now(); - smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || { + smarm::runtime::init(bench_cfg(threads)).run(move || { let mut handles = Vec::new(); for i in 0..PANIC_TASKS { handles.push(smarm::spawn(move || { @@ -355,6 +355,23 @@ fn bench_panic_tokio_multi() -> (u64, u128) { // main // --------------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars +// so the sweep script can override the preemption knobs without recompiling. +// --------------------------------------------------------------------------- + +fn bench_cfg(threads: usize) -> smarm::runtime::Config { + let mut cfg = smarm::runtime::Config::exact(threads); + if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") { + if let Ok(n) = v.parse::() { cfg = cfg.alloc_interval(n); } + } + if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") { + if let Ok(n) = v.parse::() { cfg = cfg.timeslice_cycles(n); } + } + cfg +} + fn main() { let n = available_threads(); println!("smarm smarm-favored benchmarks"); diff --git a/benches/sweep.py b/benches/sweep.py new file mode 100755 index 0000000..a226a62 --- /dev/null +++ b/benches/sweep.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python3 +""" +smarm bench sweep + regression checker. + +Usage: + # Run a full knob sweep and print a comparison table: + python3 benches/sweep.py sweep + + # Check the current build against the committed baseline: + python3 benches/sweep.py regress + + # Run all benches once (default knobs) and print results: + python3 benches/sweep.py run + +The sweep grid is defined in SWEEP_GRID below. +The regression baseline is loaded from benches/baseline.json. +""" + +import argparse +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +REPO = Path(__file__).resolve().parent.parent + +# Bench files to run (primes + multi_scheduler omitted — legacy harness, +# not part of the 12-bench suite, and insensitive to the preemption knobs). +BENCHES = ["general", "tokio_favored", "smarm_favored"] + +# Knob sweep grid: (alloc_interval, timeslice_cycles) +# alloc_interval: lower = check RDTSC more often = finer preemption +# timeslice_cycles: lower = shorter timeslice = more cooperative +SWEEP_GRID = [ + (32, 150_000), + (64, 150_000), + (128, 150_000), # default interval, shorter slice + (32, 300_000), + (64, 300_000), + (128, 300_000), # <<< baseline (defaults) + (256, 300_000), + (512, 300_000), + (128, 600_000), + (128, 1_200_000), +] + +# Regression threshold: warn if median is more than this % worse than baseline. +REGRESSION_THRESHOLD_PCT = 10 + +# --------------------------------------------------------------------------- +# Parsing +# --------------------------------------------------------------------------- + +# Match lines like: +# " smarm 1-thread | 1000000 | 31473 | 28719 | 33113" +ROW_RE = re.compile( + r"^\s*(?P[^|]+?)\s*\|\s*(?P\d+)\s*\|\s*(?P\d+)\s*\|\s*(?P\d+)\s*\|\s*(?P\d+)\s*$" +) + +# Match section headers like: +# " chained_spawn: depth 1000" +HEADER_RE = re.compile(r"^\s{2}(?P[a-z_]+)[:—]") + + +def parse_output(text: str) -> dict[str, dict[str, dict]]: + """ + Returns {bench_name: {runtime_label: {median, min, max, result}}}. + bench_name is the snake_case name extracted from the section header. + """ + results: dict[str, dict[str, dict]] = {} + current_bench = None + + for line in text.splitlines(): + hm = HEADER_RE.match(line) + if hm: + current_bench = hm.group("bench") + results.setdefault(current_bench, {}) + continue + + if current_bench is None: + continue + + rm = ROW_RE.match(line) + if rm: + label = rm.group("name").strip() + results[current_bench][label] = { + "result": int(rm.group("result")), + "median": int(rm.group("median")), + "min": int(rm.group("min")), + "max": int(rm.group("max")), + } + + return results + + +# --------------------------------------------------------------------------- +# Running +# --------------------------------------------------------------------------- + +def run_benches(env_extra: dict[str, str] | None = None) -> dict[str, dict[str, dict]]: + """Run all BENCHES and return merged parsed results.""" + env = os.environ.copy() + if env_extra: + env.update(env_extra) + + all_results: dict[str, dict[str, dict]] = {} + + for bench in BENCHES: + cmd = ["cargo", "bench", "--bench", bench] + proc = subprocess.run( + cmd, + cwd=REPO, + env=env, + capture_output=True, + text=True, + ) + if proc.returncode != 0: + print(f" ERROR running {bench}:\n{proc.stderr[-800:]}", file=sys.stderr) + continue + parsed = parse_output(proc.stdout) + all_results.update(parsed) + + return all_results + + +# --------------------------------------------------------------------------- +# Baseline JSON +# --------------------------------------------------------------------------- + +BASELINE_PATH = REPO / "benches" / "baseline.json" + + +def load_baseline() -> dict: + if not BASELINE_PATH.exists(): + sys.exit( + f"No baseline found at {BASELINE_PATH}.\n" + "Run: python3 benches/sweep.py run then save the output manually,\n" + "or use --save-baseline with the run subcommand." + ) + return json.loads(BASELINE_PATH.read_text()) + + +def save_baseline(results: dict) -> None: + BASELINE_PATH.write_text(json.dumps(results, indent=2)) + print(f"Baseline saved to {BASELINE_PATH}") + + +# --------------------------------------------------------------------------- +# Regression check +# --------------------------------------------------------------------------- + +def check_regressions(current: dict, baseline: dict) -> bool: + """ + Compare current results to baseline. Print warnings for regressions. + Returns True if any regression found. + """ + any_regression = False + + for bench, runtimes in baseline.items(): + cur_bench = current.get(bench, {}) + for label, base_data in runtimes.items(): + cur_data = cur_bench.get(label) + if cur_data is None: + print(f" MISSING {bench}/{label} — not present in current run") + any_regression = True + continue + + base_med = base_data["median"] + cur_med = cur_data["median"] + if base_med == 0: + continue + + pct = (cur_med - base_med) / base_med * 100 + if pct > REGRESSION_THRESHOLD_PCT: + print( + f" REGRESSION {bench}/{label}: " + f"{base_med} → {cur_med} µs ({pct:+.1f}%)" + ) + any_regression = True + elif pct < -REGRESSION_THRESHOLD_PCT: + print( + f" IMPROVEMENT {bench}/{label}: " + f"{base_med} → {cur_med} µs ({pct:+.1f}%)" + ) + + return any_regression + + +# --------------------------------------------------------------------------- +# Pretty print +# --------------------------------------------------------------------------- + +def print_results(results: dict, label: str = "") -> None: + if label: + print(f"\n{'='*70}") + print(f" {label}") + print(f"{'='*70}") + for bench, runtimes in sorted(results.items()): + print(f"\n [{bench}]") + print(f" {'runtime':>28} | {'result':>10} | {'median µs':>10} | {'min':>8} | {'max':>8}") + print(f" {'-'*75}") + for rt_label, data in runtimes.items(): + print( + f" {rt_label:>28} | {data['result']:>10} | " + f"{data['median']:>10} | {data['min']:>8} | {data['max']:>8}" + ) + + +def print_sweep_table(sweep_results: list[tuple[int, int, dict]]) -> None: + """Print a compact comparison across sweep points for each bench/runtime.""" + # Collect all bench/label pairs + all_keys: list[tuple[str, str]] = [] + for _, _, results in sweep_results: + for bench, runtimes in results.items(): + for label in runtimes: + key = (bench, label) + if key not in all_keys: + all_keys.append(key) + + # Header + col_w = 12 + print(f"\n{'bench/runtime':<45}", end="") + for interval, cycles, _ in sweep_results: + tag = f"ai={interval}/tc={cycles//1000}k" + print(f" {tag:>{col_w}}", end="") + print() + print("-" * (45 + (col_w + 2) * len(sweep_results))) + + for bench, label in all_keys: + key_str = f"{bench}/{label}" + print(f" {key_str:<43}", end="") + for _, _, results in sweep_results: + val = results.get(bench, {}).get(label, {}).get("median") + cell = str(val) if val is not None else "—" + print(f" {cell:>{col_w}}", end="") + print() + + +# --------------------------------------------------------------------------- +# Subcommands +# --------------------------------------------------------------------------- + +def cmd_run(args) -> None: + print("Building release binaries…") + subprocess.run( + ["cargo", "build", "--release", "--benches"], + cwd=REPO, check=True, capture_output=True, + ) + print("Running benches…") + results = run_benches() + print_results(results, "Results (default knobs)") + if args.save_baseline: + save_baseline(results) + + +def cmd_regress(args) -> None: + baseline = load_baseline() + print("Building release binaries…") + subprocess.run( + ["cargo", "build", "--release", "--benches"], + cwd=REPO, check=True, capture_output=True, + ) + print("Running benches…") + current = run_benches() + print_results(current, "Current results") + print(f"\nRegression check (threshold: >{REGRESSION_THRESHOLD_PCT}% slower than baseline)") + print("-" * 60) + found = check_regressions(current, baseline) + if not found: + print(" No regressions detected.") + sys.exit(1 if found else 0) + + +def cmd_sweep(args) -> None: + print("Building release binaries (once)…") + subprocess.run( + ["cargo", "build", "--release", "--benches"], + cwd=REPO, check=True, capture_output=True, + ) + # Benches are pre-built; env vars change runtime behaviour, no recompile needed. + sweep_results: list[tuple[int, int, dict]] = [] + + for interval, cycles in SWEEP_GRID: + tag = f"alloc_interval={interval}, timeslice_cycles={cycles}" + print(f" Running: {tag} …", flush=True) + env_extra = { + "SMARM_ALLOC_INTERVAL": str(interval), + "SMARM_TIMESLICE_CYCLES": str(cycles), + } + results = run_benches(env_extra) + sweep_results.append((interval, cycles, results)) + + print_sweep_table(sweep_results) + + if args.save_csv: + import csv + rows = [] + for interval, cycles, results in sweep_results: + for bench, runtimes in results.items(): + for label, data in runtimes.items(): + rows.append({ + "alloc_interval": interval, + "timeslice_cycles": cycles, + "bench": bench, + "runtime": label, + **data, + }) + with open(args.save_csv, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=rows[0].keys()) + writer.writeheader() + writer.writerows(rows) + print(f"\nCSV saved to {args.save_csv}") + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_run = sub.add_parser("run", help="Run benches once with default knobs") + p_run.add_argument("--save-baseline", action="store_true", + help="Save results as the regression baseline") + p_run.set_defaults(func=cmd_run) + + p_reg = sub.add_parser("regress", help="Check current results against baseline") + p_reg.set_defaults(func=cmd_regress) + + p_sw = sub.add_parser("sweep", help="Sweep preemption knobs and compare") + p_sw.add_argument("--save-csv", metavar="FILE", + help="Write full sweep results to a CSV file") + p_sw.set_defaults(func=cmd_sweep) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/benches/tokio_favored.rs b/benches/tokio_favored.rs index 8082c15..dd96a00 100644 --- a/benches/tokio_favored.rs +++ b/benches/tokio_favored.rs @@ -83,7 +83,7 @@ fn bench_storm_smarm(threads: usize) -> (u64, u128) { let s2 = stop.clone(); let start = Instant::now(); - smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || { + smarm::runtime::init(bench_cfg(threads)).run(move || { // Background actors: yield in a tight loop until told to stop. let mut bg_handles = Vec::new(); for _ in 0..STORM_BACKGROUND { @@ -189,7 +189,7 @@ const MPSC_PER_PRODUCER: u64 = 10_000; fn bench_mpsc_smarm(threads: usize) -> (u64, u128) { let start = Instant::now(); - smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(|| { + smarm::runtime::init(bench_cfg(threads)).run(|| { let (tx, rx) = smarm::channel::(); let mut prod_handles = Vec::new(); for p in 0..MPSC_PRODUCERS { @@ -289,7 +289,7 @@ fn timer_delay_ms(i: u64) -> u64 { fn bench_timers_smarm(threads: usize) -> (u64, u128) { let start = Instant::now(); - smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(|| { + smarm::runtime::init(bench_cfg(threads)).run(|| { let mut handles = Vec::new(); for i in 0..TIMER_ACTORS { let ms = timer_delay_ms(i); @@ -373,7 +373,7 @@ fn bench_scaling_smarm(threads: usize) -> (u64, u128) { let total = Arc::new(AtomicU64::new(0)); let t2 = total.clone(); let start = Instant::now(); - smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || { + smarm::runtime::init(bench_cfg(threads)).run(move || { let mut handles = Vec::new(); for w in 0..SCALING_WORKERS { let (lo, hi) = scaling_slice(w); @@ -413,6 +413,23 @@ fn bench_scaling_tokio_multi(threads: usize) -> (u64, u128) { // main // --------------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars +// so the sweep script can override the preemption knobs without recompiling. +// --------------------------------------------------------------------------- + +fn bench_cfg(threads: usize) -> smarm::runtime::Config { + let mut cfg = smarm::runtime::Config::exact(threads); + if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") { + if let Ok(n) = v.parse::() { cfg = cfg.alloc_interval(n); } + } + if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") { + if let Ok(n) = v.parse::() { cfg = cfg.timeslice_cycles(n); } + } + cfg +} + fn main() { let n = available_threads(); println!("smarm tokio-favored benchmarks");