benches: expose preemption knobs + sweep runner

Config API changes (src/preempt.rs, src/runtime.rs): - preempt: promote ALLOC_INTERVAL and TIMESLICE_CYCLES from bare consts to DEFAULT_ALLOC_INTERVAL / DEFAULT_TIMESLICE_CYCLES; store active values in thread-locals set on each actor resume so multiple runtimes can use different settings concurrently. - runtime: add alloc_interval / timeslice_cycles fields to Config; add Config::alloc_interval(n) and Config::timeslice_cycles(c) builder methods; thread the values through RuntimeInner to the reset_timeslice() call in schedule_loop. Bench changes: - Add bench_cfg(threads) helper to general/tokio_favored/smarm_favored that wraps Config::exact and reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES env vars, so the sweep script can vary knobs without recompiling. Sweep tooling (benches/sweep.py): - 'run': run the 3-file bench suite once; --save-baseline persists JSON - 'regress': compare current run against baseline.json, exit 1 on any bench that regresses >10% vs stored medians - 'sweep': run the full SWEEP_GRID (10 points), print comparison table, optional --save-csv; binaries pre-built so no recompile per point Sweep results (10-point grid, 1-CPU sandbox): - The preemption knobs have very little effect on this single-CPU machine. Most benches move <5% across the entire grid. - Longer timeslices (tc=600k, tc=1200k) reliably hurt spawn_storm_busy (+11-15%) and catch_unwind_panics (+10-12%) because actors hold the scheduler mutex longer per timeslice, stalling the storm of joinable tasks. - Shorter timeslices (tc=150k) give a small improvement on many_timers (-3-4%) and a wash everywhere else. - yield_in_hot_loop and uncontended_channel are essentially flat across all knobs — both are scheduling-dominated and call yield_now explicitly, so the RDTSC-driven preemption path is irrelevant. - Conclusion: the knobs matter primarily under contention (multi-core). Re-run sweep on a multi-core machine before drawing tuning conclusions.
2026-05-25 13:04:58 +00:00
parent 6d1c59fb99
commit 3da6ffaa77
15 changed files with 2315 additions and 8 deletions
@@ -0,0 +1,347 @@
+#!/usr/bin/env python3
+"""
+smarm bench sweep + regression checker.
+
+Usage:
+    # Run a full knob sweep and print a comparison table:
+    python3 benches/sweep.py sweep
+
+    # Check the current build against the committed baseline:
+    python3 benches/sweep.py regress
+
+    # Run all benches once (default knobs) and print results:
+    python3 benches/sweep.py run
+
+The sweep grid is defined in SWEEP_GRID below.
+The regression baseline is loaded from benches/baseline.json.
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+
+REPO = Path(__file__).resolve().parent.parent
+
+# Bench files to run (primes + multi_scheduler omitted — legacy harness,
+# not part of the 12-bench suite, and insensitive to the preemption knobs).
+BENCHES = ["general", "tokio_favored", "smarm_favored"]
+
+# Knob sweep grid: (alloc_interval, timeslice_cycles)
+# alloc_interval: lower = check RDTSC more often = finer preemption
+# timeslice_cycles: lower = shorter timeslice = more cooperative
+SWEEP_GRID = [
+    (32,  150_000),
+    (64,  150_000),
+    (128, 150_000),   # default interval, shorter slice
+    (32,  300_000),
+    (64,  300_000),
+    (128, 300_000),   # <<< baseline (defaults)
+    (256, 300_000),
+    (512, 300_000),
+    (128, 600_000),
+    (128, 1_200_000),
+]
+
+# Regression threshold: warn if median is more than this % worse than baseline.
+REGRESSION_THRESHOLD_PCT = 10
+
+# ---------------------------------------------------------------------------
+# Parsing
+# ---------------------------------------------------------------------------
+
+# Match lines like:
+#   "          smarm 1-thread |      1000000 |      31473 |      28719 |      33113"
+ROW_RE = re.compile(
+    r"^\s*(?P<name>[^|]+?)\s*\|\s*(?P<result>\d+)\s*\|\s*(?P<median>\d+)\s*\|\s*(?P<min>\d+)\s*\|\s*(?P<max>\d+)\s*$"
+)
+
+# Match section headers like:
+#   "  chained_spawn: depth 1000"
+HEADER_RE = re.compile(r"^\s{2}(?P<bench>[a-z_]+)[:—]")
+
+
+def parse_output(text: str) -> dict[str, dict[str, dict]]:
+    """
+    Returns {bench_name: {runtime_label: {median, min, max, result}}}.
+    bench_name is the snake_case name extracted from the section header.
+    """
+    results: dict[str, dict[str, dict]] = {}
+    current_bench = None
+
+    for line in text.splitlines():
+        hm = HEADER_RE.match(line)
+        if hm:
+            current_bench = hm.group("bench")
+            results.setdefault(current_bench, {})
+            continue
+
+        if current_bench is None:
+            continue
+
+        rm = ROW_RE.match(line)
+        if rm:
+            label = rm.group("name").strip()
+            results[current_bench][label] = {
+                "result": int(rm.group("result")),
+                "median": int(rm.group("median")),
+                "min":    int(rm.group("min")),
+                "max":    int(rm.group("max")),
+            }
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Running
+# ---------------------------------------------------------------------------
+
+def run_benches(env_extra: dict[str, str] | None = None) -> dict[str, dict[str, dict]]:
+    """Run all BENCHES and return merged parsed results."""
+    env = os.environ.copy()
+    if env_extra:
+        env.update(env_extra)
+
+    all_results: dict[str, dict[str, dict]] = {}
+
+    for bench in BENCHES:
+        cmd = ["cargo", "bench", "--bench", bench]
+        proc = subprocess.run(
+            cmd,
+            cwd=REPO,
+            env=env,
+            capture_output=True,
+            text=True,
+        )
+        if proc.returncode != 0:
+            print(f"  ERROR running {bench}:\n{proc.stderr[-800:]}", file=sys.stderr)
+            continue
+        parsed = parse_output(proc.stdout)
+        all_results.update(parsed)
+
+    return all_results
+
+
+# ---------------------------------------------------------------------------
+# Baseline JSON
+# ---------------------------------------------------------------------------
+
+BASELINE_PATH = REPO / "benches" / "baseline.json"
+
+
+def load_baseline() -> dict:
+    if not BASELINE_PATH.exists():
+        sys.exit(
+            f"No baseline found at {BASELINE_PATH}.\n"
+            "Run:  python3 benches/sweep.py run  then save the output manually,\n"
+            "or use --save-baseline with the run subcommand."
+        )
+    return json.loads(BASELINE_PATH.read_text())
+
+
+def save_baseline(results: dict) -> None:
+    BASELINE_PATH.write_text(json.dumps(results, indent=2))
+    print(f"Baseline saved to {BASELINE_PATH}")
+
+
+# ---------------------------------------------------------------------------
+# Regression check
+# ---------------------------------------------------------------------------
+
+def check_regressions(current: dict, baseline: dict) -> bool:
+    """
+    Compare current results to baseline. Print warnings for regressions.
+    Returns True if any regression found.
+    """
+    any_regression = False
+
+    for bench, runtimes in baseline.items():
+        cur_bench = current.get(bench, {})
+        for label, base_data in runtimes.items():
+            cur_data = cur_bench.get(label)
+            if cur_data is None:
+                print(f"  MISSING  {bench}/{label} — not present in current run")
+                any_regression = True
+                continue
+
+            base_med = base_data["median"]
+            cur_med  = cur_data["median"]
+            if base_med == 0:
+                continue
+
+            pct = (cur_med - base_med) / base_med * 100
+            if pct > REGRESSION_THRESHOLD_PCT:
+                print(
+                    f"  REGRESSION  {bench}/{label}: "
+                    f"{base_med} → {cur_med} µs  ({pct:+.1f}%)"
+                )
+                any_regression = True
+            elif pct < -REGRESSION_THRESHOLD_PCT:
+                print(
+                    f"  IMPROVEMENT {bench}/{label}: "
+                    f"{base_med} → {cur_med} µs  ({pct:+.1f}%)"
+                )
+
+    return any_regression
+
+
+# ---------------------------------------------------------------------------
+# Pretty print
+# ---------------------------------------------------------------------------
+
+def print_results(results: dict, label: str = "") -> None:
+    if label:
+        print(f"\n{'='*70}")
+        print(f"  {label}")
+        print(f"{'='*70}")
+    for bench, runtimes in sorted(results.items()):
+        print(f"\n  [{bench}]")
+        print(f"  {'runtime':>28} | {'result':>10} | {'median µs':>10} | {'min':>8} | {'max':>8}")
+        print(f"  {'-'*75}")
+        for rt_label, data in runtimes.items():
+            print(
+                f"  {rt_label:>28} | {data['result']:>10} | "
+                f"{data['median']:>10} | {data['min']:>8} | {data['max']:>8}"
+            )
+
+
+def print_sweep_table(sweep_results: list[tuple[int, int, dict]]) -> None:
+    """Print a compact comparison across sweep points for each bench/runtime."""
+    # Collect all bench/label pairs
+    all_keys: list[tuple[str, str]] = []
+    for _, _, results in sweep_results:
+        for bench, runtimes in results.items():
+            for label in runtimes:
+                key = (bench, label)
+                if key not in all_keys:
+                    all_keys.append(key)
+
+    # Header
+    col_w = 12
+    print(f"\n{'bench/runtime':<45}", end="")
+    for interval, cycles, _ in sweep_results:
+        tag = f"ai={interval}/tc={cycles//1000}k"
+        print(f"  {tag:>{col_w}}", end="")
+    print()
+    print("-" * (45 + (col_w + 2) * len(sweep_results)))
+
+    for bench, label in all_keys:
+        key_str = f"{bench}/{label}"
+        print(f"  {key_str:<43}", end="")
+        for _, _, results in sweep_results:
+            val = results.get(bench, {}).get(label, {}).get("median")
+            cell = str(val) if val is not None else "—"
+            print(f"  {cell:>{col_w}}", end="")
+        print()
+
+
+# ---------------------------------------------------------------------------
+# Subcommands
+# ---------------------------------------------------------------------------
+
+def cmd_run(args) -> None:
+    print("Building release binaries…")
+    subprocess.run(
+        ["cargo", "build", "--release", "--benches"],
+        cwd=REPO, check=True, capture_output=True,
+    )
+    print("Running benches…")
+    results = run_benches()
+    print_results(results, "Results (default knobs)")
+    if args.save_baseline:
+        save_baseline(results)
+
+
+def cmd_regress(args) -> None:
+    baseline = load_baseline()
+    print("Building release binaries…")
+    subprocess.run(
+        ["cargo", "build", "--release", "--benches"],
+        cwd=REPO, check=True, capture_output=True,
+    )
+    print("Running benches…")
+    current = run_benches()
+    print_results(current, "Current results")
+    print(f"\nRegression check (threshold: >{REGRESSION_THRESHOLD_PCT}% slower than baseline)")
+    print("-" * 60)
+    found = check_regressions(current, baseline)
+    if not found:
+        print("  No regressions detected.")
+    sys.exit(1 if found else 0)
+
+
+def cmd_sweep(args) -> None:
+    print("Building release binaries (once)…")
+    subprocess.run(
+        ["cargo", "build", "--release", "--benches"],
+        cwd=REPO, check=True, capture_output=True,
+    )
+    # Benches are pre-built; env vars change runtime behaviour, no recompile needed.
+    sweep_results: list[tuple[int, int, dict]] = []
+
+    for interval, cycles in SWEEP_GRID:
+        tag = f"alloc_interval={interval}, timeslice_cycles={cycles}"
+        print(f"  Running: {tag} …", flush=True)
+        env_extra = {
+            "SMARM_ALLOC_INTERVAL":    str(interval),
+            "SMARM_TIMESLICE_CYCLES":  str(cycles),
+        }
+        results = run_benches(env_extra)
+        sweep_results.append((interval, cycles, results))
+
+    print_sweep_table(sweep_results)
+
+    if args.save_csv:
+        import csv
+        rows = []
+        for interval, cycles, results in sweep_results:
+            for bench, runtimes in results.items():
+                for label, data in runtimes.items():
+                    rows.append({
+                        "alloc_interval": interval,
+                        "timeslice_cycles": cycles,
+                        "bench": bench,
+                        "runtime": label,
+                        **data,
+                    })
+        with open(args.save_csv, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=rows[0].keys())
+            writer.writeheader()
+            writer.writerows(rows)
+        print(f"\nCSV saved to {args.save_csv}")
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p_run = sub.add_parser("run", help="Run benches once with default knobs")
+    p_run.add_argument("--save-baseline", action="store_true",
+                       help="Save results as the regression baseline")
+    p_run.set_defaults(func=cmd_run)
+
+    p_reg = sub.add_parser("regress", help="Check current results against baseline")
+    p_reg.set_defaults(func=cmd_regress)
+
+    p_sw = sub.add_parser("sweep", help="Sweep preemption knobs and compare")
+    p_sw.add_argument("--save-csv", metavar="FILE",
+                      help="Write full sweep results to a CSV file")
+    p_sw.set_defaults(func=cmd_sweep)
+
+    args = parser.parse_args()
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()