Hand-rolled context switching on mmap'd stacks with guard pages, allocator-driven RDTSC preemption, unbounded MPSC channels, supervision via per-slot Signal mailboxes, root supervisor as sentinel PID. Lib + tests + benches clean check/clippy. All 29 tests pass. Bench: smarm 3.4% over serial baseline, within 160us of tokio current-thread on prime-counting fan-out.
530 lines
18 KiB
Rust
530 lines
18 KiB
Rust
//! The single-threaded scheduler.
|
|
//!
|
|
//! There is one global scheduler per OS thread, stored in a thread-local.
|
|
//! `run(initial)` initialises it, spawns the initial actor, drives the loop
|
|
//! until the run queue is empty, then tears it down.
|
|
//!
|
|
//! Slot table: a `Vec<Slot>` indexed by `Pid::index()`, with a free list of
|
|
//! reusable indices. Each slot has a `generation` counter that increments
|
|
//! every time the slot is freed; `Pid` carries the generation it was minted
|
|
//! with, so a stale PID has a mismatching generation and is detected on
|
|
//! lookup.
|
|
//!
|
|
//! Run queue: a `VecDeque<Pid>` of runnable actors. The state of an actor
|
|
//! is implicit in slot.state: `Runnable` means it's either in the queue or
|
|
//! currently executing; `Parked` means it's waiting for something to unpark
|
|
//! it (channel send, join completion, …); `Done` means it has finished and
|
|
//! is awaiting reaping.
|
|
//!
|
|
//! Joining: `JoinHandle::join()` parks the calling actor and registers it
|
|
//! on the target slot's `waiters` list. When the target actor finishes,
|
|
//! the scheduler reaps the slot and unparks every waiter, passing them the
|
|
//! outcome via a side channel (the target's `outcome` field, drained on
|
|
//! the joiner side).
|
|
|
|
use crate::actor::{
|
|
clear_current_pid, current_pid, is_actor_done, reset_actor_done,
|
|
set_current_actor_box, set_current_pid, take_last_outcome, trampoline, Actor, Outcome,
|
|
};
|
|
use crate::channel::Sender;
|
|
use crate::context::{get_actor_sp, init_actor_stack, set_actor_sp, switch_to_actor};
|
|
use crate::pid::Pid;
|
|
use crate::preempt::PREEMPTION_ENABLED;
|
|
use crate::stack::Stack;
|
|
use crate::supervisor::Signal;
|
|
use std::cell::RefCell;
|
|
use std::collections::VecDeque;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Configuration
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const ACTOR_STACK_SIZE: usize = 64 * 1024;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Per-actor slot
|
|
// ---------------------------------------------------------------------------
|
|
|
|
enum State {
|
|
/// Either in the run queue or currently executing.
|
|
Runnable,
|
|
/// Removed from the queue, waiting for `unpark()`.
|
|
Parked,
|
|
/// The actor has finished. Slot persists until the last `JoinHandle`
|
|
/// has been joined (or dropped). Then the slot is freed.
|
|
Done,
|
|
}
|
|
|
|
struct Slot {
|
|
/// Bumped every time this slot is freed and re-used. A `Pid` with a
|
|
/// non-matching generation is stale.
|
|
generation: u32,
|
|
/// `None` when the slot is free. `Some` otherwise.
|
|
actor: Option<Actor>,
|
|
state: State,
|
|
/// PIDs waiting in `JoinHandle::join`.
|
|
waiters: Vec<Pid>,
|
|
/// The outcome the actor produced, captured when it finished.
|
|
/// Drained by `JoinHandle::join`.
|
|
outcome: Option<Outcome>,
|
|
/// If this slot is a supervisor, the sender into its `Signal` mailbox.
|
|
/// Cloned out and used when one of its children dies.
|
|
supervisor_channel: Option<Sender<Signal>>,
|
|
/// Number of `JoinHandle`s still outstanding for this actor. The slot
|
|
/// is reclaimed only when the actor is done AND outstanding_handles == 0.
|
|
outstanding_handles: u32,
|
|
}
|
|
|
|
impl Slot {
|
|
fn vacant() -> Self {
|
|
Self {
|
|
generation: 0,
|
|
actor: None,
|
|
state: State::Done,
|
|
waiters: Vec::new(),
|
|
outcome: None,
|
|
supervisor_channel: None,
|
|
outstanding_handles: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Scheduler state
|
|
// ---------------------------------------------------------------------------
|
|
|
|
struct SchedulerState {
|
|
slots: Vec<Slot>,
|
|
free_list: Vec<u32>,
|
|
run_queue: VecDeque<Pid>,
|
|
/// The root supervisor's PID. Children spawned at the top level are
|
|
/// supervised by this. Set by `run()`.
|
|
root_pid: Option<Pid>,
|
|
}
|
|
|
|
impl SchedulerState {
|
|
fn new() -> Self {
|
|
Self {
|
|
slots: Vec::new(),
|
|
free_list: Vec::new(),
|
|
run_queue: VecDeque::new(),
|
|
root_pid: None,
|
|
}
|
|
}
|
|
|
|
/// Allocate a slot; return its (index, generation).
|
|
fn allocate_slot(&mut self) -> (u32, u32) {
|
|
if let Some(idx) = self.free_list.pop() {
|
|
let s = &mut self.slots[idx as usize];
|
|
(idx, s.generation)
|
|
} else {
|
|
let idx = self.slots.len() as u32;
|
|
self.slots.push(Slot::vacant());
|
|
(idx, 0)
|
|
}
|
|
}
|
|
|
|
fn slot(&self, pid: Pid) -> Option<&Slot> {
|
|
let s = self.slots.get(pid.index() as usize)?;
|
|
if s.generation == pid.generation() { Some(s) } else { None }
|
|
}
|
|
|
|
fn slot_mut(&mut self, pid: Pid) -> Option<&mut Slot> {
|
|
let s = self.slots.get_mut(pid.index() as usize)?;
|
|
if s.generation == pid.generation() { Some(s) } else { None }
|
|
}
|
|
}
|
|
|
|
thread_local! {
|
|
static SCHED: RefCell<Option<SchedulerState>> = const { RefCell::new(None) };
|
|
}
|
|
|
|
fn with_sched<R>(f: impl FnOnce(&mut SchedulerState) -> R) -> R {
|
|
SCHED.with(|c| {
|
|
let mut g = c.borrow_mut();
|
|
let s = g.as_mut().expect("scheduler not running");
|
|
f(s)
|
|
})
|
|
}
|
|
|
|
/// Same as `with_sched` but returns `None` when there's no scheduler instead
|
|
/// of panicking. Used on cleanup paths (channel sender drop during shutdown,
|
|
/// for example).
|
|
fn try_with_sched<R>(f: impl FnOnce(&mut SchedulerState) -> R) -> Option<R> {
|
|
SCHED.with(|c| {
|
|
let mut g = c.borrow_mut();
|
|
g.as_mut().map(f)
|
|
})
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// JoinHandle
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[derive(Debug)]
|
|
pub struct JoinError {
|
|
/// Whatever `panic!` was called with.
|
|
pub payload: Box<dyn std::any::Any + Send>,
|
|
}
|
|
|
|
pub struct JoinHandle {
|
|
pid: Pid,
|
|
/// `false` once `join()` has been called and the handle has consumed
|
|
/// its outcome. Prevents the Drop impl from double-decrementing.
|
|
consumed: bool,
|
|
}
|
|
|
|
impl JoinHandle {
|
|
pub fn pid(&self) -> Pid { self.pid }
|
|
|
|
/// Block the calling actor until the target completes. Returns
|
|
/// `Ok(())` on normal exit, `Err(JoinError)` if the target panicked.
|
|
pub fn join(mut self) -> Result<(), JoinError> {
|
|
let me = current_pid().expect("join() called outside an actor");
|
|
|
|
loop {
|
|
let outcome = with_sched(|s| {
|
|
let slot = s.slot_mut(self.pid)
|
|
.expect("join: target slot has been reused");
|
|
if matches!(slot.state, State::Done) {
|
|
Some(slot.outcome.take().expect("Done slot must have an outcome"))
|
|
} else {
|
|
slot.waiters.push(me);
|
|
None
|
|
}
|
|
});
|
|
|
|
match outcome {
|
|
Some(o) => {
|
|
self.consumed = true;
|
|
self.decrement_handle_count();
|
|
return match o {
|
|
Outcome::Exit => Ok(()),
|
|
Outcome::Panic(p) => Err(JoinError { payload: p }),
|
|
};
|
|
}
|
|
None => park_current(),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn decrement_handle_count(&mut self) {
|
|
with_sched(|s| {
|
|
let should_reclaim = match s.slot_mut(self.pid) {
|
|
Some(slot) => {
|
|
slot.outstanding_handles = slot.outstanding_handles.saturating_sub(1);
|
|
matches!(slot.state, State::Done) && slot.outstanding_handles == 0
|
|
}
|
|
None => false,
|
|
};
|
|
if should_reclaim {
|
|
reclaim_slot(s, self.pid);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
impl Drop for JoinHandle {
|
|
fn drop(&mut self) {
|
|
if !self.consumed {
|
|
self.decrement_handle_count();
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Slot reclamation
|
|
// ---------------------------------------------------------------------------
|
|
|
|
fn reclaim_slot(s: &mut SchedulerState, pid: Pid) {
|
|
let idx = pid.index();
|
|
let slot = &mut s.slots[idx as usize];
|
|
// Bump generation so any stale PIDs from now on miss.
|
|
slot.generation = slot.generation.wrapping_add(1);
|
|
// Drop the actor (its stack with it).
|
|
slot.actor = None;
|
|
slot.outcome = None;
|
|
slot.waiters.clear();
|
|
slot.supervisor_channel = None;
|
|
slot.state = State::Done; // semantically vacant; allocator checks free_list
|
|
slot.outstanding_handles = 0;
|
|
s.free_list.push(idx);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// spawn / spawn_under / self_pid
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// Spawn `f` as a child of the currently-executing actor.
|
|
/// Outside an actor (only legal from `run()`'s initial setup), the child's
|
|
/// supervisor is the root supervisor.
|
|
pub fn spawn(f: impl FnOnce() + Send + 'static) -> JoinHandle {
|
|
let parent = current_pid()
|
|
.or_else(|| with_sched(|s| s.root_pid))
|
|
.expect("spawn() before run()");
|
|
spawn_under(parent, f)
|
|
}
|
|
|
|
/// Spawn `f` with `supervisor` as its parent. The supervisor will receive
|
|
/// a `Signal` on its registered channel when the child terminates.
|
|
pub fn spawn_under(supervisor: Pid, f: impl FnOnce() + Send + 'static) -> JoinHandle {
|
|
let pid = with_sched(|s| {
|
|
let (idx, gen) = s.allocate_slot();
|
|
let pid = Pid::new(idx, gen);
|
|
let stack = Stack::new(ACTOR_STACK_SIZE)
|
|
.expect("stack allocation failed");
|
|
let sp = init_actor_stack(stack.top(), trampoline);
|
|
let slot = &mut s.slots[idx as usize];
|
|
slot.actor = Some(Actor { pid, stack, sp, supervisor });
|
|
slot.state = State::Runnable;
|
|
slot.outstanding_handles = 1;
|
|
slot.outcome = None;
|
|
slot.waiters.clear();
|
|
slot.supervisor_channel = None;
|
|
s.run_queue.push_back(pid);
|
|
pid
|
|
});
|
|
|
|
// Stash the closure where `schedule_loop` will find it before the first
|
|
// resume.
|
|
PENDING_CLOSURES.with(|c| {
|
|
c.borrow_mut().push((pid, Box::new(f) as Closure));
|
|
});
|
|
|
|
JoinHandle { pid, consumed: false }
|
|
}
|
|
|
|
type Closure = Box<dyn FnOnce() + Send>;
|
|
|
|
thread_local! {
|
|
/// Closures awaiting their first resume. Keyed by the PID the scheduler
|
|
/// allocated for them in `spawn_under`. The scheduler pops from here in
|
|
/// `pop_pending_closure` right before each first resume.
|
|
static PENDING_CLOSURES: RefCell<Vec<(Pid, Closure)>> = const { RefCell::new(Vec::new()) };
|
|
}
|
|
|
|
fn pop_pending_closure(pid: Pid) -> Option<Closure> {
|
|
PENDING_CLOSURES.with(|c| {
|
|
let mut v = c.borrow_mut();
|
|
v.iter().position(|(p, _)| *p == pid).map(|i| v.swap_remove(i).1)
|
|
})
|
|
}
|
|
|
|
pub fn self_pid() -> Pid {
|
|
current_pid().expect("self_pid() called outside an actor")
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// yield_now / park / unpark
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// Cooperative yield. The current actor goes to the back of the run queue.
|
|
pub fn yield_now() {
|
|
// Mark ourselves as needing to be re-queued, then yield.
|
|
YIELD_INTENT.with(|c| c.set(YieldIntent::Yield));
|
|
unsafe { crate::context::switch_to_scheduler() };
|
|
}
|
|
|
|
/// Park the current actor (remove it from the run queue until `unpark`).
|
|
pub fn park_current() {
|
|
YIELD_INTENT.with(|c| c.set(YieldIntent::Park));
|
|
unsafe { crate::context::switch_to_scheduler() };
|
|
}
|
|
|
|
/// Wake a parked actor. If the actor isn't parked (already runnable or done)
|
|
/// this is a no-op — that's important; channel and join can both fire
|
|
/// spurious unparks under some orderings and we want them to be cheap.
|
|
/// Also a no-op if the scheduler isn't running (covers channel-sender drop
|
|
/// during runtime teardown).
|
|
pub fn unpark(pid: Pid) {
|
|
try_with_sched(|s| {
|
|
if let Some(slot) = s.slot_mut(pid) {
|
|
if matches!(slot.state, State::Parked) {
|
|
slot.state = State::Runnable;
|
|
s.run_queue.push_back(pid);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/// What an actor wants the scheduler to do when control returns from it.
|
|
#[derive(Copy, Clone)]
|
|
enum YieldIntent {
|
|
/// Re-queue (yield_now or preemption).
|
|
Yield,
|
|
/// Remove from the run queue (waiting for unpark).
|
|
Park,
|
|
}
|
|
|
|
thread_local! {
|
|
static YIELD_INTENT: std::cell::Cell<YieldIntent> = const { std::cell::Cell::new(YieldIntent::Yield) };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Supervisor channel registration
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// Register `sender` as the mailbox for signals about children supervised
|
|
/// by `pid`. Idempotent; later calls overwrite.
|
|
pub fn register_supervisor_channel(pid: Pid, sender: Sender<Signal>) {
|
|
with_sched(|s| {
|
|
if let Some(slot) = s.slot_mut(pid) {
|
|
slot.supervisor_channel = Some(sender);
|
|
} else {
|
|
panic!("register_supervisor_channel: pid {:?} not found", pid);
|
|
}
|
|
});
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// run() — the runtime entry point
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// Boot the runtime, spawn `initial` as a child of the root supervisor,
|
|
/// drive the scheduler until the run queue is empty, tear down.
|
|
///
|
|
/// The root supervisor is a *sentinel* PID, not a real actor. Signals
|
|
/// addressed to it are dropped on the floor — that's what "process exits"
|
|
/// means in the spec when nothing escalates further. User code that wants
|
|
/// real supervision spawns its own supervisor actor and uses `spawn_under`.
|
|
pub fn run<F: FnOnce() + Send + 'static>(initial: F) {
|
|
SCHED.with(|c| {
|
|
assert!(c.borrow().is_none(), "smarm::run() called recursively");
|
|
let mut state = SchedulerState::new();
|
|
state.root_pid = Some(ROOT_PID);
|
|
*c.borrow_mut() = Some(state);
|
|
});
|
|
|
|
let initial_handle = spawn(initial);
|
|
|
|
schedule_loop();
|
|
|
|
// Drop the handle BEFORE the scheduler is torn down — its Drop impl
|
|
// calls `with_sched` to decrement the outstanding-handle count.
|
|
drop(initial_handle);
|
|
|
|
// Take the SchedulerState out of the thread-local BEFORE dropping it.
|
|
// Dropping it while still inside SCHED.with's RefCell borrow would
|
|
// re-enter (via channel senders' Drop → unpark → try_with_sched).
|
|
let state = SCHED.with(|c| c.borrow_mut().take());
|
|
drop(state);
|
|
PENDING_CLOSURES.with(|c| c.borrow_mut().clear());
|
|
}
|
|
|
|
/// Reserved sentinel pid for the root supervisor. Never allocated to a
|
|
/// real actor; lookups return `None`; signals are dropped.
|
|
pub const ROOT_PID: Pid = Pid::new(u32::MAX, u32::MAX);
|
|
|
|
fn schedule_loop() {
|
|
loop {
|
|
let pid = match with_sched(|s| s.run_queue.pop_front()) {
|
|
Some(p) => p,
|
|
None => return,
|
|
};
|
|
|
|
// Look up sp; skip stale or already-reaped pids.
|
|
let sp = match with_sched(|s| {
|
|
s.slot(pid).and_then(|slot| slot.actor.as_ref().map(|a| a.sp))
|
|
}) {
|
|
Some(sp) => sp,
|
|
None => continue,
|
|
};
|
|
|
|
// If this is a first resume, move the pending closure to the
|
|
// thread-local the trampoline reads.
|
|
if let Some(b) = pop_pending_closure(pid) {
|
|
set_current_actor_box(b);
|
|
}
|
|
|
|
set_actor_sp(sp);
|
|
set_current_pid(pid);
|
|
reset_actor_done();
|
|
YIELD_INTENT.with(|c| c.set(YieldIntent::Yield));
|
|
|
|
crate::preempt::reset_timeslice();
|
|
PREEMPTION_ENABLED.with(|c| c.set(true));
|
|
|
|
unsafe { switch_to_actor() };
|
|
|
|
PREEMPTION_ENABLED.with(|c| c.set(false));
|
|
clear_current_pid();
|
|
|
|
let intent = YIELD_INTENT.with(|c| c.get());
|
|
let new_sp = get_actor_sp();
|
|
|
|
if is_actor_done() {
|
|
let outcome = take_last_outcome().unwrap_or(Outcome::Exit);
|
|
finalize_actor(pid, outcome);
|
|
} else {
|
|
with_sched(|s| {
|
|
if let Some(slot) = s.slot_mut(pid) {
|
|
if let Some(actor) = slot.actor.as_mut() {
|
|
actor.sp = new_sp;
|
|
}
|
|
match intent {
|
|
YieldIntent::Yield => {
|
|
slot.state = State::Runnable;
|
|
s.run_queue.push_back(pid);
|
|
}
|
|
YieldIntent::Park => {
|
|
slot.state = State::Parked;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
fn finalize_actor(pid: Pid, outcome: Outcome) {
|
|
// Joiners get the typed Result with the panic payload. The supervisor
|
|
// gets an informational `Signal::Panic` with an empty payload — its job
|
|
// is policy (restart/escalate), not forensics. Users who need the
|
|
// payload in supervision can plumb their own channel.
|
|
|
|
let (joiner_outcome, sup_signal) = match outcome {
|
|
Outcome::Exit => (Outcome::Exit, Signal::Exit(pid)),
|
|
Outcome::Panic(payload) => (
|
|
Outcome::Panic(payload),
|
|
Signal::Panic(pid, Box::new(()) as Box<dyn std::any::Any + Send>),
|
|
),
|
|
};
|
|
|
|
// Stash outcome, mark Done, collect waiters, drop the actor stack.
|
|
let (waiters, supervisor_pid) = with_sched(|s| {
|
|
let slot = s.slot_mut(pid).expect("finalize_actor: slot vanished");
|
|
let sup = slot.actor.as_ref().map(|a| a.supervisor);
|
|
slot.outcome = Some(joiner_outcome);
|
|
slot.state = State::Done;
|
|
slot.actor = None;
|
|
let w = std::mem::take(&mut slot.waiters);
|
|
(w, sup)
|
|
});
|
|
|
|
// Deliver to supervisor (best-effort; ignore SendError).
|
|
if let Some(sup) = supervisor_pid {
|
|
let sender = with_sched(|s| {
|
|
s.slot(sup).and_then(|slot| slot.supervisor_channel.clone())
|
|
});
|
|
if let Some(sender) = sender {
|
|
let _ = sender.send(sup_signal);
|
|
}
|
|
}
|
|
|
|
// Unpark joiners.
|
|
for joiner in waiters {
|
|
unpark(joiner);
|
|
}
|
|
|
|
// Reclaim if no outstanding handles.
|
|
with_sched(|s| {
|
|
let should_reclaim = match s.slot(pid) {
|
|
Some(slot) => slot.outstanding_handles == 0,
|
|
None => false,
|
|
};
|
|
if should_reclaim {
|
|
reclaim_slot(s, pid);
|
|
}
|
|
});
|
|
}
|