#!/usr/bin/env python3 """ smarm bench sweep + regression checker. Usage: # Run a full knob sweep and print a comparison table: python3 benches/sweep.py sweep # Check the current build against the committed baseline: python3 benches/sweep.py regress # Run all benches once (default knobs) and print results: python3 benches/sweep.py run The sweep grid is defined in SWEEP_GRID below. The regression baseline is loaded from benches/baseline.json. """ import argparse import json import os import re import subprocess import sys from pathlib import Path # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- REPO = Path(__file__).resolve().parent.parent # Bench files to run (primes + multi_scheduler omitted — legacy harness, # not part of the 12-bench suite, and insensitive to the preemption knobs). BENCHES = ["general", "tokio_favored", "smarm_favored"] # Knob sweep grid: (alloc_interval, timeslice_cycles) # alloc_interval: lower = check RDTSC more often = finer preemption # timeslice_cycles: lower = shorter timeslice = more cooperative SWEEP_GRID = [ (32, 150_000), (64, 150_000), (128, 150_000), # default interval, shorter slice (32, 300_000), (64, 300_000), (128, 300_000), # <<< baseline (defaults) (256, 300_000), (512, 300_000), (128, 600_000), (128, 1_200_000), ] # Regression threshold: warn if median is more than this % worse than baseline. REGRESSION_THRESHOLD_PCT = 10 # --------------------------------------------------------------------------- # Parsing # --------------------------------------------------------------------------- # Match lines like: # " smarm 1-thread | 1000000 | 31473 | 28719 | 33113" ROW_RE = re.compile( r"^\s*(?P[^|]+?)\s*\|\s*(?P\d+)\s*\|\s*(?P\d+)\s*\|\s*(?P\d+)\s*\|\s*(?P\d+)\s*$" ) # Match section headers like: # " chained_spawn: depth 1000" HEADER_RE = re.compile(r"^\s{2}(?P[a-z_]+)[:—]") def parse_output(text: str) -> dict[str, dict[str, dict]]: """ Returns {bench_name: {runtime_label: {median, min, max, result}}}. bench_name is the snake_case name extracted from the section header. """ results: dict[str, dict[str, dict]] = {} current_bench = None for line in text.splitlines(): hm = HEADER_RE.match(line) if hm: current_bench = hm.group("bench") results.setdefault(current_bench, {}) continue if current_bench is None: continue rm = ROW_RE.match(line) if rm: label = rm.group("name").strip() results[current_bench][label] = { "result": int(rm.group("result")), "median": int(rm.group("median")), "min": int(rm.group("min")), "max": int(rm.group("max")), } return results # --------------------------------------------------------------------------- # Running # --------------------------------------------------------------------------- def run_benches(env_extra: dict[str, str] | None = None) -> dict[str, dict[str, dict]]: """Run all BENCHES and return merged parsed results.""" env = os.environ.copy() if env_extra: env.update(env_extra) all_results: dict[str, dict[str, dict]] = {} for bench in BENCHES: cmd = ["cargo", "bench", "--bench", bench] proc = subprocess.run( cmd, cwd=REPO, env=env, capture_output=True, text=True, ) if proc.returncode != 0: print(f" ERROR running {bench}:\n{proc.stderr[-800:]}", file=sys.stderr) continue parsed = parse_output(proc.stdout) all_results.update(parsed) return all_results # --------------------------------------------------------------------------- # Baseline JSON # --------------------------------------------------------------------------- BASELINE_PATH = REPO / "benches" / "baseline.json" def load_baseline() -> dict: if not BASELINE_PATH.exists(): sys.exit( f"No baseline found at {BASELINE_PATH}.\n" "Run: python3 benches/sweep.py run then save the output manually,\n" "or use --save-baseline with the run subcommand." ) return json.loads(BASELINE_PATH.read_text()) def save_baseline(results: dict) -> None: BASELINE_PATH.write_text(json.dumps(results, indent=2)) print(f"Baseline saved to {BASELINE_PATH}") # --------------------------------------------------------------------------- # Regression check # --------------------------------------------------------------------------- def check_regressions(current: dict, baseline: dict) -> bool: """ Compare current results to baseline. Print warnings for regressions. Returns True if any regression found. """ any_regression = False for bench, runtimes in baseline.items(): cur_bench = current.get(bench, {}) for label, base_data in runtimes.items(): cur_data = cur_bench.get(label) if cur_data is None: print(f" MISSING {bench}/{label} — not present in current run") any_regression = True continue base_med = base_data["median"] cur_med = cur_data["median"] if base_med == 0: continue pct = (cur_med - base_med) / base_med * 100 if pct > REGRESSION_THRESHOLD_PCT: print( f" REGRESSION {bench}/{label}: " f"{base_med} → {cur_med} µs ({pct:+.1f}%)" ) any_regression = True elif pct < -REGRESSION_THRESHOLD_PCT: print( f" IMPROVEMENT {bench}/{label}: " f"{base_med} → {cur_med} µs ({pct:+.1f}%)" ) return any_regression # --------------------------------------------------------------------------- # Pretty print # --------------------------------------------------------------------------- def print_results(results: dict, label: str = "") -> None: if label: print(f"\n{'='*70}") print(f" {label}") print(f"{'='*70}") for bench, runtimes in sorted(results.items()): print(f"\n [{bench}]") print(f" {'runtime':>28} | {'result':>10} | {'median µs':>10} | {'min':>8} | {'max':>8}") print(f" {'-'*75}") for rt_label, data in runtimes.items(): print( f" {rt_label:>28} | {data['result']:>10} | " f"{data['median']:>10} | {data['min']:>8} | {data['max']:>8}" ) def print_sweep_table(sweep_results: list[tuple[int, int, dict]]) -> None: """Print a compact comparison across sweep points for each bench/runtime.""" # Collect all bench/label pairs all_keys: list[tuple[str, str]] = [] for _, _, results in sweep_results: for bench, runtimes in results.items(): for label in runtimes: key = (bench, label) if key not in all_keys: all_keys.append(key) # Header col_w = 12 print(f"\n{'bench/runtime':<45}", end="") for interval, cycles, _ in sweep_results: tag = f"ai={interval}/tc={cycles//1000}k" print(f" {tag:>{col_w}}", end="") print() print("-" * (45 + (col_w + 2) * len(sweep_results))) for bench, label in all_keys: key_str = f"{bench}/{label}" print(f" {key_str:<43}", end="") for _, _, results in sweep_results: val = results.get(bench, {}).get(label, {}).get("median") cell = str(val) if val is not None else "—" print(f" {cell:>{col_w}}", end="") print() # --------------------------------------------------------------------------- # Subcommands # --------------------------------------------------------------------------- def cmd_run(args) -> None: print("Building release binaries…") subprocess.run( ["cargo", "build", "--release", "--benches"], cwd=REPO, check=True, capture_output=True, ) print("Running benches…") results = run_benches() print_results(results, "Results (default knobs)") if args.save_baseline: save_baseline(results) def cmd_regress(args) -> None: baseline = load_baseline() print("Building release binaries…") subprocess.run( ["cargo", "build", "--release", "--benches"], cwd=REPO, check=True, capture_output=True, ) print("Running benches…") current = run_benches() print_results(current, "Current results") print(f"\nRegression check (threshold: >{REGRESSION_THRESHOLD_PCT}% slower than baseline)") print("-" * 60) found = check_regressions(current, baseline) if not found: print(" No regressions detected.") sys.exit(1 if found else 0) def cmd_sweep(args) -> None: print("Building release binaries (once)…") subprocess.run( ["cargo", "build", "--release", "--benches"], cwd=REPO, check=True, capture_output=True, ) # Benches are pre-built; env vars change runtime behaviour, no recompile needed. sweep_results: list[tuple[int, int, dict]] = [] for interval, cycles in SWEEP_GRID: tag = f"alloc_interval={interval}, timeslice_cycles={cycles}" print(f" Running: {tag} …", flush=True) env_extra = { "SMARM_ALLOC_INTERVAL": str(interval), "SMARM_TIMESLICE_CYCLES": str(cycles), } results = run_benches(env_extra) sweep_results.append((interval, cycles, results)) print_sweep_table(sweep_results) if args.save_csv: import csv rows = [] for interval, cycles, results in sweep_results: for bench, runtimes in results.items(): for label, data in runtimes.items(): rows.append({ "alloc_interval": interval, "timeslice_cycles": cycles, "bench": bench, "runtime": label, **data, }) with open(args.save_csv, "w", newline="") as f: writer = csv.DictWriter(f, fieldnames=rows[0].keys()) writer.writeheader() writer.writerows(rows) print(f"\nCSV saved to {args.save_csv}") # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- def main() -> None: parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) sub = parser.add_subparsers(dest="cmd", required=True) p_run = sub.add_parser("run", help="Run benches once with default knobs") p_run.add_argument("--save-baseline", action="store_true", help="Save results as the regression baseline") p_run.set_defaults(func=cmd_run) p_reg = sub.add_parser("regress", help="Check current results against baseline") p_reg.set_defaults(func=cmd_regress) p_sw = sub.add_parser("sweep", help="Sweep preemption knobs and compare") p_sw.add_argument("--save-csv", metavar="FILE", help="Write full sweep results to a CSV file") p_sw.set_defaults(func=cmd_sweep) args = parser.parse_args() args.func(args) if __name__ == "__main__": main()