Files
smarm/benches/sweep.py
Bench 3da6ffaa77 benches: expose preemption knobs + sweep runner
Config API changes (src/preempt.rs, src/runtime.rs):
- preempt: promote ALLOC_INTERVAL and TIMESLICE_CYCLES from bare consts to
  DEFAULT_ALLOC_INTERVAL / DEFAULT_TIMESLICE_CYCLES; store active values in
  thread-locals set on each actor resume so multiple runtimes can use
  different settings concurrently.
- runtime: add alloc_interval / timeslice_cycles fields to Config; add
  Config::alloc_interval(n) and Config::timeslice_cycles(c) builder methods;
  thread the values through RuntimeInner to the reset_timeslice() call in
  schedule_loop.

Bench changes:
- Add bench_cfg(threads) helper to general/tokio_favored/smarm_favored that
  wraps Config::exact and reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES
  env vars, so the sweep script can vary knobs without recompiling.

Sweep tooling (benches/sweep.py):
- 'run':     run the 3-file bench suite once; --save-baseline persists JSON
- 'regress': compare current run against baseline.json, exit 1 on any bench
             that regresses >10% vs stored medians
- 'sweep':   run the full SWEEP_GRID (10 points), print comparison table,
             optional --save-csv; binaries pre-built so no recompile per point

Sweep results (10-point grid, 1-CPU sandbox):
- The preemption knobs have very little effect on this single-CPU machine.
  Most benches move <5% across the entire grid.
- Longer timeslices (tc=600k, tc=1200k) reliably hurt spawn_storm_busy
  (+11-15%) and catch_unwind_panics (+10-12%) because actors hold the
  scheduler mutex longer per timeslice, stalling the storm of joinable tasks.
- Shorter timeslices (tc=150k) give a small improvement on many_timers
  (-3-4%) and a wash everywhere else.
- yield_in_hot_loop and uncontended_channel are essentially flat across all
  knobs — both are scheduling-dominated and call yield_now explicitly, so
  the RDTSC-driven preemption path is irrelevant.
- Conclusion: the knobs matter primarily under contention (multi-core).
  Re-run sweep on a multi-core machine before drawing tuning conclusions.
2026-05-25 13:04:58 +00:00

348 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
"""
smarm bench sweep + regression checker.
Usage:
# Run a full knob sweep and print a comparison table:
python3 benches/sweep.py sweep
# Check the current build against the committed baseline:
python3 benches/sweep.py regress
# Run all benches once (default knobs) and print results:
python3 benches/sweep.py run
The sweep grid is defined in SWEEP_GRID below.
The regression baseline is loaded from benches/baseline.json.
"""
import argparse
import json
import os
import re
import subprocess
import sys
from pathlib import Path
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
REPO = Path(__file__).resolve().parent.parent
# Bench files to run (primes + multi_scheduler omitted — legacy harness,
# not part of the 12-bench suite, and insensitive to the preemption knobs).
BENCHES = ["general", "tokio_favored", "smarm_favored"]
# Knob sweep grid: (alloc_interval, timeslice_cycles)
# alloc_interval: lower = check RDTSC more often = finer preemption
# timeslice_cycles: lower = shorter timeslice = more cooperative
SWEEP_GRID = [
(32, 150_000),
(64, 150_000),
(128, 150_000), # default interval, shorter slice
(32, 300_000),
(64, 300_000),
(128, 300_000), # <<< baseline (defaults)
(256, 300_000),
(512, 300_000),
(128, 600_000),
(128, 1_200_000),
]
# Regression threshold: warn if median is more than this % worse than baseline.
REGRESSION_THRESHOLD_PCT = 10
# ---------------------------------------------------------------------------
# Parsing
# ---------------------------------------------------------------------------
# Match lines like:
# " smarm 1-thread | 1000000 | 31473 | 28719 | 33113"
ROW_RE = re.compile(
r"^\s*(?P<name>[^|]+?)\s*\|\s*(?P<result>\d+)\s*\|\s*(?P<median>\d+)\s*\|\s*(?P<min>\d+)\s*\|\s*(?P<max>\d+)\s*$"
)
# Match section headers like:
# " chained_spawn: depth 1000"
HEADER_RE = re.compile(r"^\s{2}(?P<bench>[a-z_]+)[:—]")
def parse_output(text: str) -> dict[str, dict[str, dict]]:
"""
Returns {bench_name: {runtime_label: {median, min, max, result}}}.
bench_name is the snake_case name extracted from the section header.
"""
results: dict[str, dict[str, dict]] = {}
current_bench = None
for line in text.splitlines():
hm = HEADER_RE.match(line)
if hm:
current_bench = hm.group("bench")
results.setdefault(current_bench, {})
continue
if current_bench is None:
continue
rm = ROW_RE.match(line)
if rm:
label = rm.group("name").strip()
results[current_bench][label] = {
"result": int(rm.group("result")),
"median": int(rm.group("median")),
"min": int(rm.group("min")),
"max": int(rm.group("max")),
}
return results
# ---------------------------------------------------------------------------
# Running
# ---------------------------------------------------------------------------
def run_benches(env_extra: dict[str, str] | None = None) -> dict[str, dict[str, dict]]:
"""Run all BENCHES and return merged parsed results."""
env = os.environ.copy()
if env_extra:
env.update(env_extra)
all_results: dict[str, dict[str, dict]] = {}
for bench in BENCHES:
cmd = ["cargo", "bench", "--bench", bench]
proc = subprocess.run(
cmd,
cwd=REPO,
env=env,
capture_output=True,
text=True,
)
if proc.returncode != 0:
print(f" ERROR running {bench}:\n{proc.stderr[-800:]}", file=sys.stderr)
continue
parsed = parse_output(proc.stdout)
all_results.update(parsed)
return all_results
# ---------------------------------------------------------------------------
# Baseline JSON
# ---------------------------------------------------------------------------
BASELINE_PATH = REPO / "benches" / "baseline.json"
def load_baseline() -> dict:
if not BASELINE_PATH.exists():
sys.exit(
f"No baseline found at {BASELINE_PATH}.\n"
"Run: python3 benches/sweep.py run then save the output manually,\n"
"or use --save-baseline with the run subcommand."
)
return json.loads(BASELINE_PATH.read_text())
def save_baseline(results: dict) -> None:
BASELINE_PATH.write_text(json.dumps(results, indent=2))
print(f"Baseline saved to {BASELINE_PATH}")
# ---------------------------------------------------------------------------
# Regression check
# ---------------------------------------------------------------------------
def check_regressions(current: dict, baseline: dict) -> bool:
"""
Compare current results to baseline. Print warnings for regressions.
Returns True if any regression found.
"""
any_regression = False
for bench, runtimes in baseline.items():
cur_bench = current.get(bench, {})
for label, base_data in runtimes.items():
cur_data = cur_bench.get(label)
if cur_data is None:
print(f" MISSING {bench}/{label} — not present in current run")
any_regression = True
continue
base_med = base_data["median"]
cur_med = cur_data["median"]
if base_med == 0:
continue
pct = (cur_med - base_med) / base_med * 100
if pct > REGRESSION_THRESHOLD_PCT:
print(
f" REGRESSION {bench}/{label}: "
f"{base_med}{cur_med} µs ({pct:+.1f}%)"
)
any_regression = True
elif pct < -REGRESSION_THRESHOLD_PCT:
print(
f" IMPROVEMENT {bench}/{label}: "
f"{base_med}{cur_med} µs ({pct:+.1f}%)"
)
return any_regression
# ---------------------------------------------------------------------------
# Pretty print
# ---------------------------------------------------------------------------
def print_results(results: dict, label: str = "") -> None:
if label:
print(f"\n{'='*70}")
print(f" {label}")
print(f"{'='*70}")
for bench, runtimes in sorted(results.items()):
print(f"\n [{bench}]")
print(f" {'runtime':>28} | {'result':>10} | {'median µs':>10} | {'min':>8} | {'max':>8}")
print(f" {'-'*75}")
for rt_label, data in runtimes.items():
print(
f" {rt_label:>28} | {data['result']:>10} | "
f"{data['median']:>10} | {data['min']:>8} | {data['max']:>8}"
)
def print_sweep_table(sweep_results: list[tuple[int, int, dict]]) -> None:
"""Print a compact comparison across sweep points for each bench/runtime."""
# Collect all bench/label pairs
all_keys: list[tuple[str, str]] = []
for _, _, results in sweep_results:
for bench, runtimes in results.items():
for label in runtimes:
key = (bench, label)
if key not in all_keys:
all_keys.append(key)
# Header
col_w = 12
print(f"\n{'bench/runtime':<45}", end="")
for interval, cycles, _ in sweep_results:
tag = f"ai={interval}/tc={cycles//1000}k"
print(f" {tag:>{col_w}}", end="")
print()
print("-" * (45 + (col_w + 2) * len(sweep_results)))
for bench, label in all_keys:
key_str = f"{bench}/{label}"
print(f" {key_str:<43}", end="")
for _, _, results in sweep_results:
val = results.get(bench, {}).get(label, {}).get("median")
cell = str(val) if val is not None else ""
print(f" {cell:>{col_w}}", end="")
print()
# ---------------------------------------------------------------------------
# Subcommands
# ---------------------------------------------------------------------------
def cmd_run(args) -> None:
print("Building release binaries…")
subprocess.run(
["cargo", "build", "--release", "--benches"],
cwd=REPO, check=True, capture_output=True,
)
print("Running benches…")
results = run_benches()
print_results(results, "Results (default knobs)")
if args.save_baseline:
save_baseline(results)
def cmd_regress(args) -> None:
baseline = load_baseline()
print("Building release binaries…")
subprocess.run(
["cargo", "build", "--release", "--benches"],
cwd=REPO, check=True, capture_output=True,
)
print("Running benches…")
current = run_benches()
print_results(current, "Current results")
print(f"\nRegression check (threshold: >{REGRESSION_THRESHOLD_PCT}% slower than baseline)")
print("-" * 60)
found = check_regressions(current, baseline)
if not found:
print(" No regressions detected.")
sys.exit(1 if found else 0)
def cmd_sweep(args) -> None:
print("Building release binaries (once)…")
subprocess.run(
["cargo", "build", "--release", "--benches"],
cwd=REPO, check=True, capture_output=True,
)
# Benches are pre-built; env vars change runtime behaviour, no recompile needed.
sweep_results: list[tuple[int, int, dict]] = []
for interval, cycles in SWEEP_GRID:
tag = f"alloc_interval={interval}, timeslice_cycles={cycles}"
print(f" Running: {tag}", flush=True)
env_extra = {
"SMARM_ALLOC_INTERVAL": str(interval),
"SMARM_TIMESLICE_CYCLES": str(cycles),
}
results = run_benches(env_extra)
sweep_results.append((interval, cycles, results))
print_sweep_table(sweep_results)
if args.save_csv:
import csv
rows = []
for interval, cycles, results in sweep_results:
for bench, runtimes in results.items():
for label, data in runtimes.items():
rows.append({
"alloc_interval": interval,
"timeslice_cycles": cycles,
"bench": bench,
"runtime": label,
**data,
})
with open(args.save_csv, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
writer.writeheader()
writer.writerows(rows)
print(f"\nCSV saved to {args.save_csv}")
# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
sub = parser.add_subparsers(dest="cmd", required=True)
p_run = sub.add_parser("run", help="Run benches once with default knobs")
p_run.add_argument("--save-baseline", action="store_true",
help="Save results as the regression baseline")
p_run.set_defaults(func=cmd_run)
p_reg = sub.add_parser("regress", help="Check current results against baseline")
p_reg.set_defaults(func=cmd_regress)
p_sw = sub.add_parser("sweep", help="Sweep preemption knobs and compare")
p_sw.add_argument("--save-csv", metavar="FILE",
help="Write full sweep results to a CSV file")
p_sw.set_defaults(func=cmd_sweep)
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()