Config API changes (src/preempt.rs, src/runtime.rs):
- preempt: promote ALLOC_INTERVAL and TIMESLICE_CYCLES from bare consts to
DEFAULT_ALLOC_INTERVAL / DEFAULT_TIMESLICE_CYCLES; store active values in
thread-locals set on each actor resume so multiple runtimes can use
different settings concurrently.
- runtime: add alloc_interval / timeslice_cycles fields to Config; add
Config::alloc_interval(n) and Config::timeslice_cycles(c) builder methods;
thread the values through RuntimeInner to the reset_timeslice() call in
schedule_loop.
Bench changes:
- Add bench_cfg(threads) helper to general/tokio_favored/smarm_favored that
wraps Config::exact and reads SMARM_ALLOC_INTERVAL / SMARM_TIMESLICE_CYCLES
env vars, so the sweep script can vary knobs without recompiling.
Sweep tooling (benches/sweep.py):
- 'run': run the 3-file bench suite once; --save-baseline persists JSON
- 'regress': compare current run against baseline.json, exit 1 on any bench
that regresses >10% vs stored medians
- 'sweep': run the full SWEEP_GRID (10 points), print comparison table,
optional --save-csv; binaries pre-built so no recompile per point
Sweep results (10-point grid, 1-CPU sandbox):
- The preemption knobs have very little effect on this single-CPU machine.
Most benches move <5% across the entire grid.
- Longer timeslices (tc=600k, tc=1200k) reliably hurt spawn_storm_busy
(+11-15%) and catch_unwind_panics (+10-12%) because actors hold the
scheduler mutex longer per timeslice, stalling the storm of joinable tasks.
- Shorter timeslices (tc=150k) give a small improvement on many_timers
(-3-4%) and a wash everywhere else.
- yield_in_hot_loop and uncontended_channel are essentially flat across all
knobs — both are scheduling-dominated and call yield_now explicitly, so
the RDTSC-driven preemption path is irrelevant.
- Conclusion: the knobs matter primarily under contention (multi-core).
Re-run sweep on a multi-core machine before drawing tuning conclusions.
348 lines
12 KiB
Python
Executable File
348 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
smarm bench sweep + regression checker.
|
|
|
|
Usage:
|
|
# Run a full knob sweep and print a comparison table:
|
|
python3 benches/sweep.py sweep
|
|
|
|
# Check the current build against the committed baseline:
|
|
python3 benches/sweep.py regress
|
|
|
|
# Run all benches once (default knobs) and print results:
|
|
python3 benches/sweep.py run
|
|
|
|
The sweep grid is defined in SWEEP_GRID below.
|
|
The regression baseline is loaded from benches/baseline.json.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Configuration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
REPO = Path(__file__).resolve().parent.parent
|
|
|
|
# Bench files to run (primes + multi_scheduler omitted — legacy harness,
|
|
# not part of the 12-bench suite, and insensitive to the preemption knobs).
|
|
BENCHES = ["general", "tokio_favored", "smarm_favored"]
|
|
|
|
# Knob sweep grid: (alloc_interval, timeslice_cycles)
|
|
# alloc_interval: lower = check RDTSC more often = finer preemption
|
|
# timeslice_cycles: lower = shorter timeslice = more cooperative
|
|
SWEEP_GRID = [
|
|
(32, 150_000),
|
|
(64, 150_000),
|
|
(128, 150_000), # default interval, shorter slice
|
|
(32, 300_000),
|
|
(64, 300_000),
|
|
(128, 300_000), # <<< baseline (defaults)
|
|
(256, 300_000),
|
|
(512, 300_000),
|
|
(128, 600_000),
|
|
(128, 1_200_000),
|
|
]
|
|
|
|
# Regression threshold: warn if median is more than this % worse than baseline.
|
|
REGRESSION_THRESHOLD_PCT = 10
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Match lines like:
|
|
# " smarm 1-thread | 1000000 | 31473 | 28719 | 33113"
|
|
ROW_RE = re.compile(
|
|
r"^\s*(?P<name>[^|]+?)\s*\|\s*(?P<result>\d+)\s*\|\s*(?P<median>\d+)\s*\|\s*(?P<min>\d+)\s*\|\s*(?P<max>\d+)\s*$"
|
|
)
|
|
|
|
# Match section headers like:
|
|
# " chained_spawn: depth 1000"
|
|
HEADER_RE = re.compile(r"^\s{2}(?P<bench>[a-z_]+)[:—]")
|
|
|
|
|
|
def parse_output(text: str) -> dict[str, dict[str, dict]]:
|
|
"""
|
|
Returns {bench_name: {runtime_label: {median, min, max, result}}}.
|
|
bench_name is the snake_case name extracted from the section header.
|
|
"""
|
|
results: dict[str, dict[str, dict]] = {}
|
|
current_bench = None
|
|
|
|
for line in text.splitlines():
|
|
hm = HEADER_RE.match(line)
|
|
if hm:
|
|
current_bench = hm.group("bench")
|
|
results.setdefault(current_bench, {})
|
|
continue
|
|
|
|
if current_bench is None:
|
|
continue
|
|
|
|
rm = ROW_RE.match(line)
|
|
if rm:
|
|
label = rm.group("name").strip()
|
|
results[current_bench][label] = {
|
|
"result": int(rm.group("result")),
|
|
"median": int(rm.group("median")),
|
|
"min": int(rm.group("min")),
|
|
"max": int(rm.group("max")),
|
|
}
|
|
|
|
return results
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Running
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def run_benches(env_extra: dict[str, str] | None = None) -> dict[str, dict[str, dict]]:
|
|
"""Run all BENCHES and return merged parsed results."""
|
|
env = os.environ.copy()
|
|
if env_extra:
|
|
env.update(env_extra)
|
|
|
|
all_results: dict[str, dict[str, dict]] = {}
|
|
|
|
for bench in BENCHES:
|
|
cmd = ["cargo", "bench", "--bench", bench]
|
|
proc = subprocess.run(
|
|
cmd,
|
|
cwd=REPO,
|
|
env=env,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if proc.returncode != 0:
|
|
print(f" ERROR running {bench}:\n{proc.stderr[-800:]}", file=sys.stderr)
|
|
continue
|
|
parsed = parse_output(proc.stdout)
|
|
all_results.update(parsed)
|
|
|
|
return all_results
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Baseline JSON
|
|
# ---------------------------------------------------------------------------
|
|
|
|
BASELINE_PATH = REPO / "benches" / "baseline.json"
|
|
|
|
|
|
def load_baseline() -> dict:
|
|
if not BASELINE_PATH.exists():
|
|
sys.exit(
|
|
f"No baseline found at {BASELINE_PATH}.\n"
|
|
"Run: python3 benches/sweep.py run then save the output manually,\n"
|
|
"or use --save-baseline with the run subcommand."
|
|
)
|
|
return json.loads(BASELINE_PATH.read_text())
|
|
|
|
|
|
def save_baseline(results: dict) -> None:
|
|
BASELINE_PATH.write_text(json.dumps(results, indent=2))
|
|
print(f"Baseline saved to {BASELINE_PATH}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Regression check
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def check_regressions(current: dict, baseline: dict) -> bool:
|
|
"""
|
|
Compare current results to baseline. Print warnings for regressions.
|
|
Returns True if any regression found.
|
|
"""
|
|
any_regression = False
|
|
|
|
for bench, runtimes in baseline.items():
|
|
cur_bench = current.get(bench, {})
|
|
for label, base_data in runtimes.items():
|
|
cur_data = cur_bench.get(label)
|
|
if cur_data is None:
|
|
print(f" MISSING {bench}/{label} — not present in current run")
|
|
any_regression = True
|
|
continue
|
|
|
|
base_med = base_data["median"]
|
|
cur_med = cur_data["median"]
|
|
if base_med == 0:
|
|
continue
|
|
|
|
pct = (cur_med - base_med) / base_med * 100
|
|
if pct > REGRESSION_THRESHOLD_PCT:
|
|
print(
|
|
f" REGRESSION {bench}/{label}: "
|
|
f"{base_med} → {cur_med} µs ({pct:+.1f}%)"
|
|
)
|
|
any_regression = True
|
|
elif pct < -REGRESSION_THRESHOLD_PCT:
|
|
print(
|
|
f" IMPROVEMENT {bench}/{label}: "
|
|
f"{base_med} → {cur_med} µs ({pct:+.1f}%)"
|
|
)
|
|
|
|
return any_regression
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pretty print
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def print_results(results: dict, label: str = "") -> None:
|
|
if label:
|
|
print(f"\n{'='*70}")
|
|
print(f" {label}")
|
|
print(f"{'='*70}")
|
|
for bench, runtimes in sorted(results.items()):
|
|
print(f"\n [{bench}]")
|
|
print(f" {'runtime':>28} | {'result':>10} | {'median µs':>10} | {'min':>8} | {'max':>8}")
|
|
print(f" {'-'*75}")
|
|
for rt_label, data in runtimes.items():
|
|
print(
|
|
f" {rt_label:>28} | {data['result']:>10} | "
|
|
f"{data['median']:>10} | {data['min']:>8} | {data['max']:>8}"
|
|
)
|
|
|
|
|
|
def print_sweep_table(sweep_results: list[tuple[int, int, dict]]) -> None:
|
|
"""Print a compact comparison across sweep points for each bench/runtime."""
|
|
# Collect all bench/label pairs
|
|
all_keys: list[tuple[str, str]] = []
|
|
for _, _, results in sweep_results:
|
|
for bench, runtimes in results.items():
|
|
for label in runtimes:
|
|
key = (bench, label)
|
|
if key not in all_keys:
|
|
all_keys.append(key)
|
|
|
|
# Header
|
|
col_w = 12
|
|
print(f"\n{'bench/runtime':<45}", end="")
|
|
for interval, cycles, _ in sweep_results:
|
|
tag = f"ai={interval}/tc={cycles//1000}k"
|
|
print(f" {tag:>{col_w}}", end="")
|
|
print()
|
|
print("-" * (45 + (col_w + 2) * len(sweep_results)))
|
|
|
|
for bench, label in all_keys:
|
|
key_str = f"{bench}/{label}"
|
|
print(f" {key_str:<43}", end="")
|
|
for _, _, results in sweep_results:
|
|
val = results.get(bench, {}).get(label, {}).get("median")
|
|
cell = str(val) if val is not None else "—"
|
|
print(f" {cell:>{col_w}}", end="")
|
|
print()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subcommands
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def cmd_run(args) -> None:
|
|
print("Building release binaries…")
|
|
subprocess.run(
|
|
["cargo", "build", "--release", "--benches"],
|
|
cwd=REPO, check=True, capture_output=True,
|
|
)
|
|
print("Running benches…")
|
|
results = run_benches()
|
|
print_results(results, "Results (default knobs)")
|
|
if args.save_baseline:
|
|
save_baseline(results)
|
|
|
|
|
|
def cmd_regress(args) -> None:
|
|
baseline = load_baseline()
|
|
print("Building release binaries…")
|
|
subprocess.run(
|
|
["cargo", "build", "--release", "--benches"],
|
|
cwd=REPO, check=True, capture_output=True,
|
|
)
|
|
print("Running benches…")
|
|
current = run_benches()
|
|
print_results(current, "Current results")
|
|
print(f"\nRegression check (threshold: >{REGRESSION_THRESHOLD_PCT}% slower than baseline)")
|
|
print("-" * 60)
|
|
found = check_regressions(current, baseline)
|
|
if not found:
|
|
print(" No regressions detected.")
|
|
sys.exit(1 if found else 0)
|
|
|
|
|
|
def cmd_sweep(args) -> None:
|
|
print("Building release binaries (once)…")
|
|
subprocess.run(
|
|
["cargo", "build", "--release", "--benches"],
|
|
cwd=REPO, check=True, capture_output=True,
|
|
)
|
|
# Benches are pre-built; env vars change runtime behaviour, no recompile needed.
|
|
sweep_results: list[tuple[int, int, dict]] = []
|
|
|
|
for interval, cycles in SWEEP_GRID:
|
|
tag = f"alloc_interval={interval}, timeslice_cycles={cycles}"
|
|
print(f" Running: {tag} …", flush=True)
|
|
env_extra = {
|
|
"SMARM_ALLOC_INTERVAL": str(interval),
|
|
"SMARM_TIMESLICE_CYCLES": str(cycles),
|
|
}
|
|
results = run_benches(env_extra)
|
|
sweep_results.append((interval, cycles, results))
|
|
|
|
print_sweep_table(sweep_results)
|
|
|
|
if args.save_csv:
|
|
import csv
|
|
rows = []
|
|
for interval, cycles, results in sweep_results:
|
|
for bench, runtimes in results.items():
|
|
for label, data in runtimes.items():
|
|
rows.append({
|
|
"alloc_interval": interval,
|
|
"timeslice_cycles": cycles,
|
|
"bench": bench,
|
|
"runtime": label,
|
|
**data,
|
|
})
|
|
with open(args.save_csv, "w", newline="") as f:
|
|
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
print(f"\nCSV saved to {args.save_csv}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entry point
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
|
|
p_run = sub.add_parser("run", help="Run benches once with default knobs")
|
|
p_run.add_argument("--save-baseline", action="store_true",
|
|
help="Save results as the regression baseline")
|
|
p_run.set_defaults(func=cmd_run)
|
|
|
|
p_reg = sub.add_parser("regress", help="Check current results against baseline")
|
|
p_reg.set_defaults(func=cmd_regress)
|
|
|
|
p_sw = sub.add_parser("sweep", help="Sweep preemption knobs and compare")
|
|
p_sw.add_argument("--save-csv", metavar="FILE",
|
|
help="Write full sweep results to a CSV file")
|
|
p_sw.set_defaults(func=cmd_sweep)
|
|
|
|
args = parser.parse_args()
|
|
args.func(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|