//! Benchmarks where smarm's design has a structural advantage. //! //! These exist to show what the green-thread + stackful model buys you. The //! single-thread numbers are the most interesting ones — they isolate the //! per-switch / per-task cost from any contention story. //! //! Workloads: //! 9. deep_recursion — actor recurses 1000 deep then returns. In //! smarm this is plain stack recursion on the //! growable mmap'd stack. In tokio, async fn //! can't directly recurse — each level must //! `Box::pin` its future. We measure both. //! 10. yield_in_hot_loop — 2 actors ping yield_now back and forth 500k //! times. Pure context-switch cost; no //! channels, no allocation, no contention. //! Smarm's switch is ~6 GPRs + xmm save and a //! `ret`; tokio's is poll → state-machine → //! schedule. //! 11. uncontended_channel — single producer, single consumer, 1M msgs, //! single-threaded runtime. With no //! cross-thread contention, smarm's //! Arc> channel is essentially free, //! and the green-thread switch should beat //! tokio's future polling overhead. //! 12. catch_unwind_panics — spawn 10k tasks; half panic, half succeed. //! Supervisor handles each. Exploratory — if //! there's no real gap, drop this one. use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use std::time::Instant; // --------------------------------------------------------------------------- // Shared harness // --------------------------------------------------------------------------- const ITERS: u32 = 15; fn available_threads() -> usize { std::thread::available_parallelism().map(|n| n.get()).unwrap_or(1) } fn print_header(title: &str) { println!("\n{}", "=".repeat(80)); println!(" {title}"); println!("{}", "=".repeat(80)); println!( "{:>26} | {:>12} | {:>10} | {:>10} | {:>10}", "runtime", "result", "median µs", "min µs", "max µs" ); println!("{}", "-".repeat(80)); } fn run_n (u64, u128)>(name: &str, n: u32, mut f: F) { let mut times = Vec::new(); let mut last = 0u64; let _ = f(); // warmup for _ in 0..n { let (v, t) = f(); times.push(t); last = v; } times.sort_unstable(); let median = times[times.len() / 2]; let min = *times.iter().min().unwrap(); let max = *times.iter().max().unwrap(); println!( "{:>26} | {:>12} | {:>10} | {:>10} | {:>10}", name, last, median, min, max ); } // --------------------------------------------------------------------------- // 9. deep_recursion — 1000 levels deep // --------------------------------------------------------------------------- // Each recursive frame holds an `&AtomicU64`, a `u64`, plus prologue/spill — // conservatively ~64 B/frame on release. Smarm actor stacks are a fixed 64 KiB, // so 500 levels (~32 KiB) leaves comfortable headroom while still being deep // enough to exercise the stack-growth advantage over Box::pin recursion. const RECURSE_DEPTH: u64 = 500; fn bench_recurse_smarm(threads: usize) -> (u64, u128) { let total = Arc::new(AtomicU64::new(0)); let t2 = total.clone(); let start = Instant::now(); smarm::runtime::init(bench_cfg(threads)).run(move || { // Plain Rust recursion on the actor's own (growable) stack. fn recurse(c: &AtomicU64, n: u64) -> u64 { if n == 0 { c.fetch_add(1, Ordering::Relaxed); 0 } else { 1 + recurse(c, n - 1) } } let h = smarm::spawn(move || { let _ = recurse(&t2, RECURSE_DEPTH); }); h.join().unwrap(); }); (total.load(Ordering::Relaxed), start.elapsed().as_micros()) } fn bench_recurse_tokio_current() -> (u64, u128) { let counter = Arc::new(AtomicU64::new(0)); let c2 = counter.clone(); let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); let start = Instant::now(); let local = tokio::task::LocalSet::new(); local.block_on(&rt, async move { // async fn can't self-recurse; each level returns a Box::pin'd future. // This is the canonical workaround a real user would write. fn recurse( c: Arc, n: u64, ) -> std::pin::Pin>> { Box::pin(async move { if n == 0 { c.fetch_add(1, Ordering::Relaxed); 0 } else { 1 + recurse(c, n - 1).await } }) } let h = tokio::task::spawn_local(async move { let _ = recurse(c2, RECURSE_DEPTH).await; }); let _ = h.await; }); (counter.load(Ordering::Relaxed), start.elapsed().as_micros()) } fn bench_recurse_tokio_multi() -> (u64, u128) { let counter = Arc::new(AtomicU64::new(0)); let c2 = counter.clone(); let rt = tokio::runtime::Builder::new_multi_thread() .worker_threads(available_threads()) .build() .unwrap(); let start = Instant::now(); rt.block_on(async move { fn recurse( c: Arc, n: u64, ) -> std::pin::Pin + Send>> { Box::pin(async move { if n == 0 { c.fetch_add(1, Ordering::Relaxed); 0 } else { 1 + recurse(c, n - 1).await } }) } let h = tokio::spawn(async move { let _ = recurse(c2, RECURSE_DEPTH).await; }); let _ = h.await; }); (counter.load(Ordering::Relaxed), start.elapsed().as_micros()) } // --------------------------------------------------------------------------- // 10. yield_in_hot_loop — 2 actors, 500k yields each, single thread // --------------------------------------------------------------------------- const HOT_YIELDS: u64 = 500_000; fn bench_hot_smarm() -> (u64, u128) { let start = Instant::now(); smarm::runtime::init(bench_cfg(1)).run(|| { let ha = smarm::spawn(|| { for _ in 0..HOT_YIELDS { smarm::yield_now(); } }); let hb = smarm::spawn(|| { for _ in 0..HOT_YIELDS { smarm::yield_now(); } }); ha.join().unwrap(); hb.join().unwrap(); }); (HOT_YIELDS * 2, start.elapsed().as_micros()) } fn bench_hot_tokio_current() -> (u64, u128) { let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); let start = Instant::now(); let local = tokio::task::LocalSet::new(); local.block_on(&rt, async move { let ha = tokio::task::spawn_local(async move { for _ in 0..HOT_YIELDS { tokio::task::yield_now().await; } }); let hb = tokio::task::spawn_local(async move { for _ in 0..HOT_YIELDS { tokio::task::yield_now().await; } }); let _ = ha.await; let _ = hb.await; }); (HOT_YIELDS * 2, start.elapsed().as_micros()) } // --------------------------------------------------------------------------- // 11. uncontended_channel — 1 producer, 1 consumer, 1M msgs, single-threaded // --------------------------------------------------------------------------- const UNCONT_MSGS: u64 = 1_000_000; fn bench_unc_smarm() -> (u64, u128) { let start = Instant::now(); smarm::runtime::init(bench_cfg(1)).run(|| { let (tx, rx) = smarm::channel::(); let consumer = smarm::spawn(move || { let mut count = 0u64; while let Ok(_) = rx.recv() { count += 1; } let _ = count; // discard; run() closure must return () }); let producer = smarm::spawn(move || { for i in 0..UNCONT_MSGS { tx.send(i).unwrap(); } // tx drops here, closing the channel. }); producer.join().unwrap(); let _ = consumer.join().unwrap(); }); (UNCONT_MSGS, start.elapsed().as_micros()) } fn bench_unc_tokio_current() -> (u64, u128) { let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); let start = Instant::now(); let local = tokio::task::LocalSet::new(); local.block_on(&rt, async move { let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::(); let consumer = tokio::task::spawn_local(async move { let mut count = 0u64; while let Some(_) = rx.recv().await { count += 1; } count }); let producer = tokio::task::spawn_local(async move { for i in 0..UNCONT_MSGS { tx.send(i).unwrap(); } }); let _ = producer.await; let _ = consumer.await; }); (UNCONT_MSGS, start.elapsed().as_micros()) } // --------------------------------------------------------------------------- // 12. catch_unwind_panics — 10k tasks, half panic // --------------------------------------------------------------------------- const PANIC_TASKS: u64 = 10_000; fn bench_panic_smarm(threads: usize) -> (u64, u128) { let ok = Arc::new(AtomicU64::new(0)); let err = Arc::new(AtomicU64::new(0)); let ok2 = ok.clone(); let err2 = err.clone(); let start = Instant::now(); smarm::runtime::init(bench_cfg(threads)).run(move || { let mut handles = Vec::new(); for i in 0..PANIC_TASKS { handles.push(smarm::spawn(move || { if i % 2 == 0 { panic!("planned"); } })); } for h in handles { match h.join() { Ok(()) => { ok2.fetch_add(1, Ordering::Relaxed); } Err(_) => { err2.fetch_add(1, Ordering::Relaxed); } } } }); let total = ok.load(Ordering::Relaxed) + err.load(Ordering::Relaxed); (total, start.elapsed().as_micros()) } fn bench_panic_tokio_current() -> (u64, u128) { let ok = Arc::new(AtomicU64::new(0)); let err = Arc::new(AtomicU64::new(0)); let ok2 = ok.clone(); let err2 = err.clone(); let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); let start = Instant::now(); let local = tokio::task::LocalSet::new(); local.block_on(&rt, async move { let mut handles = Vec::new(); for i in 0..PANIC_TASKS { handles.push(tokio::task::spawn_local(async move { if i % 2 == 0 { panic!("planned"); } })); } for h in handles { match h.await { Ok(()) => { ok2.fetch_add(1, Ordering::Relaxed); } Err(_) => { err2.fetch_add(1, Ordering::Relaxed); } } } }); let total = ok.load(Ordering::Relaxed) + err.load(Ordering::Relaxed); (total, start.elapsed().as_micros()) } fn bench_panic_tokio_multi() -> (u64, u128) { let ok = Arc::new(AtomicU64::new(0)); let err = Arc::new(AtomicU64::new(0)); let ok2 = ok.clone(); let err2 = err.clone(); let rt = tokio::runtime::Builder::new_multi_thread() .worker_threads(available_threads()) .build() .unwrap(); let start = Instant::now(); rt.block_on(async move { let mut handles = Vec::new(); for i in 0..PANIC_TASKS { handles.push(tokio::spawn(async move { if i % 2 == 0 { panic!("planned"); } })); } for h in handles { match h.await { Ok(()) => { ok2.fetch_add(1, Ordering::Relaxed); } Err(_) => { err2.fetch_add(1, Ordering::Relaxed); } } } }); let total = ok.load(Ordering::Relaxed) + err.load(Ordering::Relaxed); (total, start.elapsed().as_micros()) } // --------------------------------------------------------------------------- // main // --------------------------------------------------------------------------- // --------------------------------------------------------------------------- // Knob helper — reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars // so the sweep script can override the preemption knobs without recompiling. // --------------------------------------------------------------------------- fn bench_cfg(threads: usize) -> smarm::runtime::Config { let mut cfg = smarm::runtime::Config::exact(threads); if let Ok(v) = std::env::var("SMARM_ALLOC_INTERVAL") { if let Ok(n) = v.parse::() { cfg = cfg.alloc_interval(n); } } if let Ok(v) = std::env::var("SMARM_TIMESLICE_CYCLES") { if let Ok(n) = v.parse::() { cfg = cfg.timeslice_cycles(n); } } cfg } fn main() { let n = available_threads(); println!("smarm smarm-favored benchmarks"); println!("available parallelism: {n} threads"); println!("ITERS={ITERS} (+1 warmup, discarded)"); println!( "RECURSE_DEPTH={RECURSE_DEPTH}, HOT_YIELDS={HOT_YIELDS}×2, \ UNCONT_MSGS={UNCONT_MSGS}, PANIC_TASKS={PANIC_TASKS}" ); // ---- 9. deep_recursion ---- print_header(&format!("deep_recursion: depth {RECURSE_DEPTH}")); run_n("smarm 1-thread", ITERS, || bench_recurse_smarm(1)); run_n(&format!("smarm {n}-thread"), ITERS, || bench_recurse_smarm(n)); run_n("tokio current_thread", ITERS, bench_recurse_tokio_current); run_n("tokio multi-thread", ITERS, bench_recurse_tokio_multi); // ---- 10. yield_in_hot_loop ---- print_header(&format!("yield_in_hot_loop: 2 actors × {HOT_YIELDS} yields (single thread)")); run_n("smarm 1-thread", ITERS, bench_hot_smarm); run_n("tokio current_thread", ITERS, bench_hot_tokio_current); // ---- 11. uncontended_channel ---- print_header(&format!("uncontended_channel: 1→1, {UNCONT_MSGS} msgs (single thread)")); run_n("smarm 1-thread", ITERS, bench_unc_smarm); run_n("tokio current_thread", ITERS, bench_unc_tokio_current); // ---- 12. catch_unwind_panics ---- print_header(&format!("catch_unwind_panics: {PANIC_TASKS} tasks, 50% panic")); run_n("smarm 1-thread", ITERS, || bench_panic_smarm(1)); run_n(&format!("smarm {n}-thread"), ITERS, || bench_panic_smarm(n)); run_n("tokio current_thread", ITERS, bench_panic_tokio_current); run_n("tokio multi-thread", ITERS, bench_panic_tokio_multi); }