//! Compute-heavy fan-out/fan-in benchmark. //! //! Counts primes in [2, N) across W workers (each handling a contiguous //! slice), then sums the results. Tests pure compute throughput plus the //! cost of spawn/join/channel. Single-threaded both sides (smarm has only //! one OS thread; tokio is configured `current_thread`). //! //! Run with `cargo bench`. use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use std::time::Instant; const N: u64 = 200_000; const WORKERS: u64 = 16; const ITERATIONS: u32 = 5; fn is_prime(n: u64) -> bool { if n < 2 { return false; } if n < 4 { return true; } if n % 2 == 0 { return false; } let mut i = 3u64; while i * i <= n { if n % i == 0 { return false; } i += 2; } true } fn count_primes_in(lo: u64, hi: u64) -> u64 { let mut count = 0u64; for n in lo..hi { if is_prime(n) { count += 1; } } count } fn slice(worker: u64) -> (u64, u64) { let per = N / WORKERS; let lo = worker * per; let hi = if worker + 1 == WORKERS { N } else { (worker + 1) * per }; (lo, hi) } fn bench_smarm() -> (u64, u128) { let total = Arc::new(AtomicU64::new(0)); let total2 = total.clone(); let start = Instant::now(); smarm::run(move || { let mut handles = Vec::new(); for w in 0..WORKERS { let (lo, hi) = slice(w); let t = total2.clone(); handles.push(smarm::spawn(move || { let c = count_primes_in(lo, hi); t.fetch_add(c, Ordering::Relaxed); })); } for h in handles { h.join().unwrap(); } }); (total.load(Ordering::Relaxed), start.elapsed().as_micros()) } fn bench_tokio() -> (u64, u128) { let total = Arc::new(AtomicU64::new(0)); let total2 = total.clone(); let rt = tokio::runtime::Builder::new_current_thread() .build() .unwrap(); let start = Instant::now(); let local = tokio::task::LocalSet::new(); local.block_on(&rt, async move { let mut handles = Vec::new(); for w in 0..WORKERS { let (lo, hi) = slice(w); let t = total2.clone(); handles.push(tokio::task::spawn_local(async move { let c = count_primes_in(lo, hi); t.fetch_add(c, Ordering::Relaxed); })); } for h in handles { let _ = h.await; } }); (total.load(Ordering::Relaxed), start.elapsed().as_micros()) } fn bench_baseline() -> (u64, u128) { let mut total = 0u64; let start = Instant::now(); for w in 0..WORKERS { let (lo, hi) = slice(w); total += count_primes_in(lo, hi); } (total, start.elapsed().as_micros()) } fn run_n (u64, u128)>(name: &str, n: u32, mut f: F) { let mut times = Vec::new(); let mut last_count = 0; for _ in 0..n { let (c, t) = f(); times.push(t); last_count = c; } times.sort(); let median = times[times.len() / 2]; let min = *times.iter().min().unwrap(); let max = *times.iter().max().unwrap(); println!( "{:>12} | primes: {:>6} | median: {:>8} µs | min: {:>8} µs | max: {:>8} µs", name, last_count, median, min, max ); } fn main() { println!( "Counting primes in [2, {}) across {} workers, {} iterations each\n", N, WORKERS, ITERATIONS ); println!("{:>12} | {:>15} | {:>16} | {:>15} | {:>15}", "runtime", "primes found", "median", "min", "max"); println!("{}", "-".repeat(80)); run_n("baseline", ITERATIONS, bench_baseline); run_n("smarm", ITERATIONS, bench_smarm); run_n("tokio", ITERATIONS, bench_tokio); }