Complete rewrite with improved architecture & correctness: - src/runtime.rs: Simplified task scheduling with proper state transitions - src/scheduler.rs: Decoupled from runtime, pure task queue logic - src/io.rs, src/mutex.rs: Refactored for clarity & performance - New actor model framework (src/actor.rs, src/context.rs) - Channel primitives (src/channel.rs) & process IDs (src/pid.rs) - Preemption framework (src/preempt.rs) for fair timeslicing - Expanded benchmarks & tests (multi_scheduler, primes, runtime)
344 lines
12 KiB
Rust
344 lines
12 KiB
Rust
//! Benchmarks for the multi-scheduler runtime.
|
|
//!
|
|
//! Three workloads, three runtimes:
|
|
//! - smarm single-thread (exact = 1)
|
|
//! - smarm multi-thread (exact = available_parallelism)
|
|
//! - tokio current_thread (single-thread baseline)
|
|
//! - tokio multi-thread (the parallel comparison)
|
|
//!
|
|
//! Workloads:
|
|
//! 1. Fan-out / fan-in compute (primes) — CPU-bound, tests parallelism
|
|
//! 2. Ping-pong — message-passing overhead, park/unpark cost
|
|
//! 3. Spawn throughput — cost of spawn + join per actor
|
|
|
|
use std::sync::atomic::{AtomicU64, Ordering};
|
|
use std::sync::Arc;
|
|
use std::time::Instant;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Shared helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
fn available_threads() -> usize {
|
|
std::thread::available_parallelism()
|
|
.map(|n| n.get())
|
|
.unwrap_or(1)
|
|
}
|
|
|
|
fn print_header(title: &str) {
|
|
println!("\n{}", "=".repeat(80));
|
|
println!(" {title}");
|
|
println!("{}", "=".repeat(80));
|
|
println!(
|
|
"{:>22} | {:>12} | {:>10} | {:>10} | {:>10}",
|
|
"runtime", "result", "median µs", "min µs", "max µs"
|
|
);
|
|
println!("{}", "-".repeat(80));
|
|
}
|
|
|
|
fn run_n<F: FnMut() -> (u64, u128)>(name: &str, n: u32, mut f: F) {
|
|
let mut times = Vec::new();
|
|
let mut last = 0u64;
|
|
for _ in 0..n {
|
|
let (v, t) = f();
|
|
times.push(t);
|
|
last = v;
|
|
}
|
|
times.sort_unstable();
|
|
let median = times[times.len() / 2];
|
|
let min = *times.iter().min().unwrap();
|
|
let max = *times.iter().max().unwrap();
|
|
println!(
|
|
"{:>22} | {:>12} | {:>10} | {:>10} | {:>10}",
|
|
name, last, median, min, max
|
|
);
|
|
}
|
|
|
|
const ITERS: u32 = 7;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Workload 1: fan-out / fan-in primes
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const PRIME_N: u64 = 400_000;
|
|
const WORKERS: u64 = 64;
|
|
|
|
fn is_prime(n: u64) -> bool {
|
|
if n < 2 { return false; }
|
|
if n < 4 { return true; }
|
|
if n % 2 == 0 { return false; }
|
|
let mut i = 3u64;
|
|
while i * i <= n { if n % i == 0 { return false; } i += 2; }
|
|
true
|
|
}
|
|
|
|
fn count_primes(lo: u64, hi: u64) -> u64 {
|
|
(lo..hi).filter(|&n| is_prime(n)).count() as u64
|
|
}
|
|
|
|
fn primes_slice(w: u64) -> (u64, u64) {
|
|
let per = PRIME_N / WORKERS;
|
|
let lo = w * per;
|
|
let hi = if w + 1 == WORKERS { PRIME_N } else { lo + per };
|
|
(lo, hi)
|
|
}
|
|
|
|
fn bench_primes_smarm(threads: usize) -> (u64, u128) {
|
|
let total = Arc::new(AtomicU64::new(0));
|
|
let t2 = total.clone();
|
|
let start = Instant::now();
|
|
smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
|
|
let mut handles = Vec::new();
|
|
for w in 0..WORKERS {
|
|
let (lo, hi) = primes_slice(w);
|
|
let tc = t2.clone();
|
|
handles.push(smarm::spawn(move || {
|
|
tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed);
|
|
}));
|
|
}
|
|
for h in handles { h.join().unwrap(); }
|
|
});
|
|
(total.load(Ordering::Relaxed), start.elapsed().as_micros())
|
|
}
|
|
|
|
fn bench_primes_tokio_current() -> (u64, u128) {
|
|
let total = Arc::new(AtomicU64::new(0));
|
|
let t2 = total.clone();
|
|
let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
|
|
let start = Instant::now();
|
|
let local = tokio::task::LocalSet::new();
|
|
local.block_on(&rt, async move {
|
|
let mut handles = Vec::new();
|
|
for w in 0..WORKERS {
|
|
let (lo, hi) = primes_slice(w);
|
|
let tc = t2.clone();
|
|
handles.push(tokio::task::spawn_local(async move {
|
|
tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed);
|
|
}));
|
|
}
|
|
for h in handles { let _ = h.await; }
|
|
});
|
|
(total.load(Ordering::Relaxed), start.elapsed().as_micros())
|
|
}
|
|
|
|
fn bench_primes_tokio_multi() -> (u64, u128) {
|
|
let total = Arc::new(AtomicU64::new(0));
|
|
let t2 = total.clone();
|
|
let rt = tokio::runtime::Builder::new_multi_thread()
|
|
.worker_threads(available_threads())
|
|
.build()
|
|
.unwrap();
|
|
let start = Instant::now();
|
|
rt.block_on(async move {
|
|
let mut handles = Vec::new();
|
|
for w in 0..WORKERS {
|
|
let (lo, hi) = primes_slice(w);
|
|
let tc = t2.clone();
|
|
handles.push(tokio::spawn(async move {
|
|
tc.fetch_add(count_primes(lo, hi), Ordering::Relaxed);
|
|
}));
|
|
}
|
|
for h in handles { let _ = h.await; }
|
|
});
|
|
(total.load(Ordering::Relaxed), start.elapsed().as_micros())
|
|
}
|
|
|
|
fn bench_primes_baseline() -> (u64, u128) {
|
|
let start = Instant::now();
|
|
let total: u64 = (0..WORKERS).map(|w| {
|
|
let (lo, hi) = primes_slice(w);
|
|
count_primes(lo, hi)
|
|
}).sum();
|
|
(total, start.elapsed().as_micros())
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Workload 2: channel ping-pong
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const PING_ROUNDS: u64 = 10_000;
|
|
|
|
fn bench_pingpong_smarm(threads: usize) -> (u64, u128) {
|
|
let start = Instant::now();
|
|
smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(|| {
|
|
let (tx_a, rx_a) = smarm::channel::<u64>();
|
|
let (tx_b, rx_b) = smarm::channel::<u64>();
|
|
let ha = smarm::spawn(move || {
|
|
tx_a.send(0).unwrap();
|
|
loop {
|
|
let v = rx_b.recv().unwrap();
|
|
if v >= PING_ROUNDS { break; }
|
|
tx_a.send(v + 1).unwrap();
|
|
}
|
|
});
|
|
let hb = smarm::spawn(move || {
|
|
loop {
|
|
let v = rx_a.recv().unwrap();
|
|
tx_b.send(v + 1).unwrap();
|
|
if v + 1 >= PING_ROUNDS { break; }
|
|
}
|
|
});
|
|
ha.join().unwrap();
|
|
hb.join().unwrap();
|
|
});
|
|
(PING_ROUNDS, start.elapsed().as_micros())
|
|
}
|
|
|
|
fn bench_pingpong_tokio_current() -> (u64, u128) {
|
|
let rt = tokio::runtime::Builder::new_current_thread()
|
|
.enable_all()
|
|
.build()
|
|
.unwrap();
|
|
let start = Instant::now();
|
|
let local = tokio::task::LocalSet::new();
|
|
local.block_on(&rt, async move {
|
|
let (tx_a, mut rx_a) = tokio::sync::mpsc::unbounded_channel::<u64>();
|
|
let (tx_b, mut rx_b) = tokio::sync::mpsc::unbounded_channel::<u64>();
|
|
let ha = tokio::task::spawn_local(async move {
|
|
tx_a.send(0).unwrap();
|
|
loop {
|
|
let v = rx_b.recv().await.unwrap();
|
|
if v >= PING_ROUNDS { break; }
|
|
tx_a.send(v + 1).unwrap();
|
|
}
|
|
});
|
|
let hb = tokio::task::spawn_local(async move {
|
|
loop {
|
|
let v = rx_a.recv().await.unwrap();
|
|
tx_b.send(v + 1).unwrap();
|
|
if v + 1 >= PING_ROUNDS { break; }
|
|
}
|
|
});
|
|
let _ = ha.await;
|
|
let _ = hb.await;
|
|
});
|
|
(PING_ROUNDS, start.elapsed().as_micros())
|
|
}
|
|
|
|
fn bench_pingpong_tokio_multi() -> (u64, u128) {
|
|
let rt = tokio::runtime::Builder::new_multi_thread()
|
|
.worker_threads(2) // ping-pong only needs 2 threads
|
|
.enable_all()
|
|
.build()
|
|
.unwrap();
|
|
let start = Instant::now();
|
|
rt.block_on(async move {
|
|
let (tx_a, mut rx_a) = tokio::sync::mpsc::unbounded_channel::<u64>();
|
|
let (tx_b, mut rx_b) = tokio::sync::mpsc::unbounded_channel::<u64>();
|
|
let ha = tokio::spawn(async move {
|
|
tx_a.send(0).unwrap();
|
|
loop {
|
|
let v = rx_b.recv().await.unwrap();
|
|
if v >= PING_ROUNDS { break; }
|
|
tx_a.send(v + 1).unwrap();
|
|
}
|
|
});
|
|
let hb = tokio::spawn(async move {
|
|
loop {
|
|
let v = rx_a.recv().await.unwrap();
|
|
tx_b.send(v + 1).unwrap();
|
|
if v + 1 >= PING_ROUNDS { break; }
|
|
}
|
|
});
|
|
let _ = ha.await;
|
|
let _ = hb.await;
|
|
});
|
|
(PING_ROUNDS, start.elapsed().as_micros())
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Workload 3: spawn throughput
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const SPAWN_COUNT: u64 = 1_000;
|
|
|
|
fn bench_spawn_smarm(threads: usize) -> (u64, u128) {
|
|
let counter = Arc::new(AtomicU64::new(0));
|
|
let c = counter.clone();
|
|
let start = Instant::now();
|
|
smarm::runtime::init(smarm::runtime::Config::exact(threads)).run(move || {
|
|
let mut handles = Vec::new();
|
|
for _ in 0..SPAWN_COUNT {
|
|
let cc = c.clone();
|
|
handles.push(smarm::spawn(move || {
|
|
cc.fetch_add(1, Ordering::Relaxed);
|
|
}));
|
|
}
|
|
for h in handles { h.join().unwrap(); }
|
|
});
|
|
(counter.load(Ordering::Relaxed), start.elapsed().as_micros())
|
|
}
|
|
|
|
fn bench_spawn_tokio_current() -> (u64, u128) {
|
|
let counter = Arc::new(AtomicU64::new(0));
|
|
let c = counter.clone();
|
|
let rt = tokio::runtime::Builder::new_current_thread().build().unwrap();
|
|
let start = Instant::now();
|
|
let local = tokio::task::LocalSet::new();
|
|
local.block_on(&rt, async move {
|
|
let mut handles = Vec::new();
|
|
for _ in 0..SPAWN_COUNT {
|
|
let cc = c.clone();
|
|
handles.push(tokio::task::spawn_local(async move {
|
|
cc.fetch_add(1, Ordering::Relaxed);
|
|
}));
|
|
}
|
|
for h in handles { let _ = h.await; }
|
|
});
|
|
(counter.load(Ordering::Relaxed), start.elapsed().as_micros())
|
|
}
|
|
|
|
fn bench_spawn_tokio_multi() -> (u64, u128) {
|
|
let counter = Arc::new(AtomicU64::new(0));
|
|
let c = counter.clone();
|
|
let rt = tokio::runtime::Builder::new_multi_thread()
|
|
.worker_threads(available_threads())
|
|
.build()
|
|
.unwrap();
|
|
let start = Instant::now();
|
|
rt.block_on(async move {
|
|
let mut handles = Vec::new();
|
|
for _ in 0..SPAWN_COUNT {
|
|
let cc = c.clone();
|
|
handles.push(tokio::spawn(async move {
|
|
cc.fetch_add(1, Ordering::Relaxed);
|
|
}));
|
|
}
|
|
for h in handles { let _ = h.await; }
|
|
});
|
|
(counter.load(Ordering::Relaxed), start.elapsed().as_micros())
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// main
|
|
// ---------------------------------------------------------------------------
|
|
|
|
fn main() {
|
|
let n = available_threads();
|
|
println!("smarm multi-scheduler benchmarks");
|
|
println!("available parallelism: {n} threads");
|
|
println!("PRIME_N={PRIME_N}, WORKERS={WORKERS}, PING_ROUNDS={PING_ROUNDS}, SPAWN_COUNT={SPAWN_COUNT}");
|
|
|
|
// ---- Primes ----
|
|
print_header(&format!("Fan-out/fan-in: count primes in [2, {PRIME_N}) across {WORKERS} workers"));
|
|
run_n("baseline (serial)", ITERS, bench_primes_baseline);
|
|
run_n("smarm single-thread", ITERS, || bench_primes_smarm(1));
|
|
run_n(&format!("smarm {n}-thread"), ITERS, || bench_primes_smarm(n));
|
|
run_n("tokio current_thread", ITERS, bench_primes_tokio_current);
|
|
run_n("tokio multi-thread", ITERS, bench_primes_tokio_multi);
|
|
|
|
// ---- Ping-pong ----
|
|
print_header(&format!("Ping-pong: {PING_ROUNDS} round-trips between two actors"));
|
|
run_n("smarm single-thread", ITERS, || bench_pingpong_smarm(1));
|
|
run_n(&format!("smarm {n}-thread"), ITERS, || bench_pingpong_smarm(n));
|
|
run_n("tokio current_thread", ITERS, bench_pingpong_tokio_current);
|
|
run_n("tokio multi-thread", ITERS, bench_pingpong_tokio_multi);
|
|
|
|
// ---- Spawn throughput ----
|
|
print_header(&format!("Spawn throughput: {SPAWN_COUNT} actors spawned and joined"));
|
|
run_n("smarm single-thread", ITERS, || bench_spawn_smarm(1));
|
|
run_n(&format!("smarm {n}-thread"), ITERS, || bench_spawn_smarm(n));
|
|
run_n("tokio current_thread", ITERS, bench_spawn_tokio_current);
|
|
run_n("tokio multi-thread", ITERS, bench_spawn_tokio_multi);
|
|
}
|