benches: expose preemption knobs + sweep runner
Config API changes (src/preempt.rs, src/runtime.rs):
- preempt: promote ALLOC_INTERVAL and TIMESLICE_CYCLES from bare consts to
DEFAULT_ALLOC_INTERVAL / DEFAULT_TIMESLICE_CYCLES; store active values in
thread-locals set on each actor resume so multiple runtimes can use
different settings concurrently.
- runtime: add alloc_interval / timeslice_cycles fields to Config; add
Config::alloc_interval(n) and Config::timeslice_cycles(c) builder methods;
thread the values through RuntimeInner to the reset_timeslice() call in
schedule_loop.
Bench changes:
- Add bench_cfg(threads) helper to general/tokio_favored/smarm_favored that
wraps Config::exact and reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES
env vars, so the sweep script can vary knobs without recompiling.
Sweep tooling (benches/sweep.py):
- 'run': run the 3-file bench suite once; --save-baseline persists JSON
- 'regress': compare current run against baseline.json, exit 1 on any bench
that regresses >10% vs stored medians
- 'sweep': run the full SWEEP_GRID (10 points), print comparison table,
optional --save-csv; binaries pre-built so no recompile per point
Sweep results (10-point grid, 1-CPU sandbox):
- The preemption knobs have very little effect on this single-CPU machine.
Most benches move <5% across the entire grid.
- Longer timeslices (tc=600k, tc=1200k) reliably hurt spawn_storm_busy
(+11-15%) and catch_unwind_panics (+10-12%) because actors hold the
scheduler mutex longer per timeslice, stalling the storm of joinable tasks.
- Shorter timeslices (tc=150k) give a small improvement on many_timers
(-3-4%) and a wash everywhere else.
- yield_in_hot_loop and uncontended_channel are essentially flat across all
knobs — both are scheduling-dominated and call yield_now explicitly, so
the RDTSC-driven preemption path is irrelevant.
- Conclusion: the knobs matter primarily under contention (multi-core).
Re-run sweep on a multi-core machine before drawing tuning conclusions.
This commit is contained in:
347
benches/sweep.py
Executable file
347
benches/sweep.py
Executable file
@@ -0,0 +1,347 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
smarm bench sweep + regression checker.
|
||||
|
||||
Usage:
|
||||
# Run a full knob sweep and print a comparison table:
|
||||
python3 benches/sweep.py sweep
|
||||
|
||||
# Check the current build against the committed baseline:
|
||||
python3 benches/sweep.py regress
|
||||
|
||||
# Run all benches once (default knobs) and print results:
|
||||
python3 benches/sweep.py run
|
||||
|
||||
The sweep grid is defined in SWEEP_GRID below.
|
||||
The regression baseline is loaded from benches/baseline.json.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
REPO = Path(__file__).resolve().parent.parent
|
||||
|
||||
# Bench files to run (primes + multi_scheduler omitted — legacy harness,
|
||||
# not part of the 12-bench suite, and insensitive to the preemption knobs).
|
||||
BENCHES = ["general", "tokio_favored", "smarm_favored"]
|
||||
|
||||
# Knob sweep grid: (alloc_interval, timeslice_cycles)
|
||||
# alloc_interval: lower = check RDTSC more often = finer preemption
|
||||
# timeslice_cycles: lower = shorter timeslice = more cooperative
|
||||
SWEEP_GRID = [
|
||||
(32, 150_000),
|
||||
(64, 150_000),
|
||||
(128, 150_000), # default interval, shorter slice
|
||||
(32, 300_000),
|
||||
(64, 300_000),
|
||||
(128, 300_000), # <<< baseline (defaults)
|
||||
(256, 300_000),
|
||||
(512, 300_000),
|
||||
(128, 600_000),
|
||||
(128, 1_200_000),
|
||||
]
|
||||
|
||||
# Regression threshold: warn if median is more than this % worse than baseline.
|
||||
REGRESSION_THRESHOLD_PCT = 10
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Match lines like:
|
||||
# " smarm 1-thread | 1000000 | 31473 | 28719 | 33113"
|
||||
ROW_RE = re.compile(
|
||||
r"^\s*(?P<name>[^|]+?)\s*\|\s*(?P<result>\d+)\s*\|\s*(?P<median>\d+)\s*\|\s*(?P<min>\d+)\s*\|\s*(?P<max>\d+)\s*$"
|
||||
)
|
||||
|
||||
# Match section headers like:
|
||||
# " chained_spawn: depth 1000"
|
||||
HEADER_RE = re.compile(r"^\s{2}(?P<bench>[a-z_]+)[:—]")
|
||||
|
||||
|
||||
def parse_output(text: str) -> dict[str, dict[str, dict]]:
|
||||
"""
|
||||
Returns {bench_name: {runtime_label: {median, min, max, result}}}.
|
||||
bench_name is the snake_case name extracted from the section header.
|
||||
"""
|
||||
results: dict[str, dict[str, dict]] = {}
|
||||
current_bench = None
|
||||
|
||||
for line in text.splitlines():
|
||||
hm = HEADER_RE.match(line)
|
||||
if hm:
|
||||
current_bench = hm.group("bench")
|
||||
results.setdefault(current_bench, {})
|
||||
continue
|
||||
|
||||
if current_bench is None:
|
||||
continue
|
||||
|
||||
rm = ROW_RE.match(line)
|
||||
if rm:
|
||||
label = rm.group("name").strip()
|
||||
results[current_bench][label] = {
|
||||
"result": int(rm.group("result")),
|
||||
"median": int(rm.group("median")),
|
||||
"min": int(rm.group("min")),
|
||||
"max": int(rm.group("max")),
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Running
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_benches(env_extra: dict[str, str] | None = None) -> dict[str, dict[str, dict]]:
|
||||
"""Run all BENCHES and return merged parsed results."""
|
||||
env = os.environ.copy()
|
||||
if env_extra:
|
||||
env.update(env_extra)
|
||||
|
||||
all_results: dict[str, dict[str, dict]] = {}
|
||||
|
||||
for bench in BENCHES:
|
||||
cmd = ["cargo", "bench", "--bench", bench]
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
cwd=REPO,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
print(f" ERROR running {bench}:\n{proc.stderr[-800:]}", file=sys.stderr)
|
||||
continue
|
||||
parsed = parse_output(proc.stdout)
|
||||
all_results.update(parsed)
|
||||
|
||||
return all_results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Baseline JSON
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BASELINE_PATH = REPO / "benches" / "baseline.json"
|
||||
|
||||
|
||||
def load_baseline() -> dict:
|
||||
if not BASELINE_PATH.exists():
|
||||
sys.exit(
|
||||
f"No baseline found at {BASELINE_PATH}.\n"
|
||||
"Run: python3 benches/sweep.py run then save the output manually,\n"
|
||||
"or use --save-baseline with the run subcommand."
|
||||
)
|
||||
return json.loads(BASELINE_PATH.read_text())
|
||||
|
||||
|
||||
def save_baseline(results: dict) -> None:
|
||||
BASELINE_PATH.write_text(json.dumps(results, indent=2))
|
||||
print(f"Baseline saved to {BASELINE_PATH}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regression check
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_regressions(current: dict, baseline: dict) -> bool:
|
||||
"""
|
||||
Compare current results to baseline. Print warnings for regressions.
|
||||
Returns True if any regression found.
|
||||
"""
|
||||
any_regression = False
|
||||
|
||||
for bench, runtimes in baseline.items():
|
||||
cur_bench = current.get(bench, {})
|
||||
for label, base_data in runtimes.items():
|
||||
cur_data = cur_bench.get(label)
|
||||
if cur_data is None:
|
||||
print(f" MISSING {bench}/{label} — not present in current run")
|
||||
any_regression = True
|
||||
continue
|
||||
|
||||
base_med = base_data["median"]
|
||||
cur_med = cur_data["median"]
|
||||
if base_med == 0:
|
||||
continue
|
||||
|
||||
pct = (cur_med - base_med) / base_med * 100
|
||||
if pct > REGRESSION_THRESHOLD_PCT:
|
||||
print(
|
||||
f" REGRESSION {bench}/{label}: "
|
||||
f"{base_med} → {cur_med} µs ({pct:+.1f}%)"
|
||||
)
|
||||
any_regression = True
|
||||
elif pct < -REGRESSION_THRESHOLD_PCT:
|
||||
print(
|
||||
f" IMPROVEMENT {bench}/{label}: "
|
||||
f"{base_med} → {cur_med} µs ({pct:+.1f}%)"
|
||||
)
|
||||
|
||||
return any_regression
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pretty print
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def print_results(results: dict, label: str = "") -> None:
|
||||
if label:
|
||||
print(f"\n{'='*70}")
|
||||
print(f" {label}")
|
||||
print(f"{'='*70}")
|
||||
for bench, runtimes in sorted(results.items()):
|
||||
print(f"\n [{bench}]")
|
||||
print(f" {'runtime':>28} | {'result':>10} | {'median µs':>10} | {'min':>8} | {'max':>8}")
|
||||
print(f" {'-'*75}")
|
||||
for rt_label, data in runtimes.items():
|
||||
print(
|
||||
f" {rt_label:>28} | {data['result']:>10} | "
|
||||
f"{data['median']:>10} | {data['min']:>8} | {data['max']:>8}"
|
||||
)
|
||||
|
||||
|
||||
def print_sweep_table(sweep_results: list[tuple[int, int, dict]]) -> None:
|
||||
"""Print a compact comparison across sweep points for each bench/runtime."""
|
||||
# Collect all bench/label pairs
|
||||
all_keys: list[tuple[str, str]] = []
|
||||
for _, _, results in sweep_results:
|
||||
for bench, runtimes in results.items():
|
||||
for label in runtimes:
|
||||
key = (bench, label)
|
||||
if key not in all_keys:
|
||||
all_keys.append(key)
|
||||
|
||||
# Header
|
||||
col_w = 12
|
||||
print(f"\n{'bench/runtime':<45}", end="")
|
||||
for interval, cycles, _ in sweep_results:
|
||||
tag = f"ai={interval}/tc={cycles//1000}k"
|
||||
print(f" {tag:>{col_w}}", end="")
|
||||
print()
|
||||
print("-" * (45 + (col_w + 2) * len(sweep_results)))
|
||||
|
||||
for bench, label in all_keys:
|
||||
key_str = f"{bench}/{label}"
|
||||
print(f" {key_str:<43}", end="")
|
||||
for _, _, results in sweep_results:
|
||||
val = results.get(bench, {}).get(label, {}).get("median")
|
||||
cell = str(val) if val is not None else "—"
|
||||
print(f" {cell:>{col_w}}", end="")
|
||||
print()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subcommands
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_run(args) -> None:
|
||||
print("Building release binaries…")
|
||||
subprocess.run(
|
||||
["cargo", "build", "--release", "--benches"],
|
||||
cwd=REPO, check=True, capture_output=True,
|
||||
)
|
||||
print("Running benches…")
|
||||
results = run_benches()
|
||||
print_results(results, "Results (default knobs)")
|
||||
if args.save_baseline:
|
||||
save_baseline(results)
|
||||
|
||||
|
||||
def cmd_regress(args) -> None:
|
||||
baseline = load_baseline()
|
||||
print("Building release binaries…")
|
||||
subprocess.run(
|
||||
["cargo", "build", "--release", "--benches"],
|
||||
cwd=REPO, check=True, capture_output=True,
|
||||
)
|
||||
print("Running benches…")
|
||||
current = run_benches()
|
||||
print_results(current, "Current results")
|
||||
print(f"\nRegression check (threshold: >{REGRESSION_THRESHOLD_PCT}% slower than baseline)")
|
||||
print("-" * 60)
|
||||
found = check_regressions(current, baseline)
|
||||
if not found:
|
||||
print(" No regressions detected.")
|
||||
sys.exit(1 if found else 0)
|
||||
|
||||
|
||||
def cmd_sweep(args) -> None:
|
||||
print("Building release binaries (once)…")
|
||||
subprocess.run(
|
||||
["cargo", "build", "--release", "--benches"],
|
||||
cwd=REPO, check=True, capture_output=True,
|
||||
)
|
||||
# Benches are pre-built; env vars change runtime behaviour, no recompile needed.
|
||||
sweep_results: list[tuple[int, int, dict]] = []
|
||||
|
||||
for interval, cycles in SWEEP_GRID:
|
||||
tag = f"alloc_interval={interval}, timeslice_cycles={cycles}"
|
||||
print(f" Running: {tag} …", flush=True)
|
||||
env_extra = {
|
||||
"SMARM_ALLOC_INTERVAL": str(interval),
|
||||
"SMARM_TIMESLICE_CYCLES": str(cycles),
|
||||
}
|
||||
results = run_benches(env_extra)
|
||||
sweep_results.append((interval, cycles, results))
|
||||
|
||||
print_sweep_table(sweep_results)
|
||||
|
||||
if args.save_csv:
|
||||
import csv
|
||||
rows = []
|
||||
for interval, cycles, results in sweep_results:
|
||||
for bench, runtimes in results.items():
|
||||
for label, data in runtimes.items():
|
||||
rows.append({
|
||||
"alloc_interval": interval,
|
||||
"timeslice_cycles": cycles,
|
||||
"bench": bench,
|
||||
"runtime": label,
|
||||
**data,
|
||||
})
|
||||
with open(args.save_csv, "w", newline="") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
print(f"\nCSV saved to {args.save_csv}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
sub = parser.add_subparsers(dest="cmd", required=True)
|
||||
|
||||
p_run = sub.add_parser("run", help="Run benches once with default knobs")
|
||||
p_run.add_argument("--save-baseline", action="store_true",
|
||||
help="Save results as the regression baseline")
|
||||
p_run.set_defaults(func=cmd_run)
|
||||
|
||||
p_reg = sub.add_parser("regress", help="Check current results against baseline")
|
||||
p_reg.set_defaults(func=cmd_regress)
|
||||
|
||||
p_sw = sub.add_parser("sweep", help="Sweep preemption knobs and compare")
|
||||
p_sw.add_argument("--save-csv", metavar="FILE",
|
||||
help="Write full sweep results to a CSV file")
|
||||
p_sw.set_defaults(func=cmd_sweep)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.func(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user