feat: I/O and mutex support (v0.3)

Add epoll-based non-blocking I/O and kernel-like mutexes:
- src/io.rs: Complete epoll backend with timeout & error handling
- src/mutex.rs: Fair mutex with waiter queues & parking integration
- Enhanced scheduler to support synchronous I/O blocking
- Comprehensive test suites for I/O (epoll) and mutex behavior
- Documentation: LOOM.md concurrency model & README
This commit is contained in:
Claude
2026-05-23 16:09:29 +00:00
parent d3ab81b833
commit 8cbef1dfc1
11 changed files with 2032 additions and 146 deletions

318
src/mutex.rs Normal file
View File

@@ -0,0 +1,318 @@
//! Actor-aware mutex with mandatory timeout.
//!
//! `loom::Mutex<T>` looks like `std::sync::Mutex<T>` but its `lock()` parks
//! the calling *green* thread on contention rather than blocking the OS
//! thread — and every lock attempt is bounded by a timeout. If the lock is
//! not acquired within the timeout, `lock()` returns `Err(LockTimeout)`.
//! This is a hard runtime guarantee (the spec calls it out): no actor can
//! be parked on a mutex forever.
//!
//! ```ignore
//! let m = loom::Mutex::new(42);
//! let guard = m.lock()?; // default timeout
//! let guard = m.lock_timeout(Duration::from_millis(50))?;
//! ```
//!
//! Fairness
//! ========
//! Waiters are granted the lock in FIFO order. The spec prizes fairness:
//! starvation under contention is precisely the kind of failure mode
//! supervision can't recover from cleanly. LIFO would be faster on cache
//! locality and is not offered.
//!
//! Poisoning
//! =========
//! Unlike `std::sync::Mutex`, `loom::Mutex` does not poison on panic. If a
//! holder panics while holding the lock, the next waiter receives the
//! (now-untouched) value. Rationale: supervision handles the panic at the
//! actor level; a separate poisoning channel is redundant and adds an
//! error case to every `lock()`. Users who care about "the value may be in
//! an inconsistent state after a panic" should encode that in `T` itself
//! (e.g. `Mutex<Option<State>>` and `take()` the value at the start of
//! each critical section).
//!
//! Reentrance
//! ==========
//! Not reentrant. An actor that already holds the lock and calls `lock()`
//! again on the same mutex will wait on its own grant — and time out. This
//! is a bug in the caller, not a feature.
//!
//! Multi-threading note
//! ====================
//! The current implementation uses `Rc<RefCell<…>>` internals because the
//! v0.2 scheduler is single-threaded. The public API is identical to what
//! the eventual multi-threaded version will expose; the migration replaces
//! the `Rc<RefCell>` with `Arc<sync::Mutex>` around bookkeeping (waiters
//! queue, holder pid) — the *value* itself never goes through a blocking
//! OS-level lock, because contention always parks the green thread first.
//! No `unsafe impl Send` games today: `loom::Mutex<T>` is `!Send` on v0.2,
//! which is correct given there is only one OS thread.
use crate::pid::Pid;
use crate::scheduler;
use crate::timer::{self, TimerTarget};
use std::cell::{Cell, RefCell};
use std::collections::VecDeque;
use std::rc::Rc;
use std::time::Duration;
/// 30 seconds. Override per-call with `lock_timeout`, or per-mutex (TODO)
/// once the supervisor-level policy hook lands.
pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct LockTimeout;
impl std::fmt::Display for LockTimeout {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "mutex lock timed out")
}
}
impl std::error::Error for LockTimeout {}
// ---------------------------------------------------------------------------
// Internals
// ---------------------------------------------------------------------------
/// A pending lock attempt. Sits in `MutexCore::state.waiters` from the
/// moment an actor parks until it is either granted the lock (popped by
/// `MutexGuard::drop`) or times out (popped by `on_timeout`).
struct Wait {
pid: Pid,
/// Per-mutex monotonic sequence. Lets `on_timeout` recognise "this
/// specific wait" vs. "a later wait by the same pid on the same
/// mutex" — important because a single actor can re-acquire and then
/// re-wait, and we don't want a stale timer firing to disturb the new
/// wait.
seq: u64,
}
/// The non-generic part of the mutex. Lives inside `Rc<>` so it can also
/// be stashed (as `Rc<dyn TimerTarget>`) inside a timer entry.
struct MutexCore {
state: RefCell<MutexState>,
default_timeout: Cell<Duration>,
}
struct MutexState {
holder: Option<Pid>,
waiters: VecDeque<Wait>,
next_seq: u64,
}
impl MutexCore {
fn new(default_timeout: Duration) -> Self {
Self {
state: RefCell::new(MutexState {
holder: None,
waiters: VecDeque::new(),
next_seq: 0,
}),
default_timeout: Cell::new(default_timeout),
}
}
}
impl TimerTarget for MutexCore {
fn on_timeout(&self, pid: Pid, wait_seq: u64) {
// Remove the waiter with this seq, if it's still queued. If it's
// gone the lock was already granted to this actor before the timer
// popped — the actor will return normally; do nothing.
let removed = {
let mut st = self.state.borrow_mut();
if let Some(pos) = st.waiters.iter().position(|w| w.seq == wait_seq) {
st.waiters.remove(pos);
true
} else {
false
}
};
if removed {
// The actor is parked, waiting on us. Wake it up; `lock_timeout`
// will resume, observe `holder != Some(self)`, and return
// LockTimeout.
scheduler::unpark(pid);
}
}
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
pub struct Mutex<T> {
core: Rc<MutexCore>,
/// `None` while the lock is held; `Some(T)` while free or while a
/// grantee is in the gap between unpark and resumption.
value: Rc<RefCell<Option<T>>>,
}
impl<T> Mutex<T> {
pub fn new(value: T) -> Self {
Self {
core: Rc::new(MutexCore::new(DEFAULT_TIMEOUT)),
value: Rc::new(RefCell::new(Some(value))),
}
}
/// Set the default timeout used by `lock()`. Does not affect in-flight
/// `lock_timeout` calls.
pub fn set_default_timeout(&self, timeout: Duration) {
self.core.default_timeout.set(timeout);
}
/// Acquire the lock, blocking the calling actor until it's granted or
/// the default timeout expires.
pub fn lock(&self) -> Result<MutexGuard<'_, T>, LockTimeout> {
self.lock_timeout(self.core.default_timeout.get())
}
/// Acquire the lock with an explicit timeout.
pub fn lock_timeout(&self, timeout: Duration) -> Result<MutexGuard<'_, T>, LockTimeout> {
let me = scheduler::self_pid();
// Fast path: nobody holds it. Mark ourselves as holder, take the
// value out, return a guard.
{
let mut st = self.core.state.borrow_mut();
if st.holder.is_none() {
st.holder = Some(me);
drop(st);
let value = self
.value
.borrow_mut()
.take()
.expect("Mutex: value missing on free fast path");
return Ok(MutexGuard {
mutex: self,
value: Some(value),
});
}
}
// Slow path: register as a waiter, schedule a timeout, park.
// No preemption during prep-to-park — see scheduler::NoPreempt.
let _np = scheduler::NoPreempt::enter();
let seq = {
let mut st = self.core.state.borrow_mut();
let seq = st.next_seq;
st.next_seq = st.next_seq.wrapping_add(1);
st.waiters.push_back(Wait { pid: me, seq });
seq
};
let target: Rc<dyn TimerTarget> = self.core.clone();
let deadline = timer::deadline_from_now(timeout);
scheduler::insert_wait_timer(deadline, me, target, seq);
scheduler::park_current();
// Resumed. Two possibilities:
// (a) MutexGuard::drop on the previous holder popped us off the
// waiters queue, set core.holder = me, and unparked us.
// => self.value is Some, we take it and return Ok.
// (b) on_timeout fired: it removed us from waiters and unparked
// us, but did NOT set holder. core.holder is whatever it was
// (Some(other) or None). => return Err.
let is_holder = self.core.state.borrow().holder == Some(me);
if is_holder {
let value = self
.value
.borrow_mut()
.take()
.expect("Mutex: value missing after grant");
Ok(MutexGuard {
mutex: self,
value: Some(value),
})
} else {
Err(LockTimeout)
}
}
/// Non-blocking attempt. Returns `Some` if the lock was free, `None`
/// otherwise. Useful as a fast path before a long-running computation,
/// or for tests.
pub fn try_lock(&self) -> Option<MutexGuard<'_, T>> {
let mut st = self.core.state.borrow_mut();
if st.holder.is_some() {
return None;
}
let me = scheduler::self_pid();
st.holder = Some(me);
drop(st);
let value = self
.value
.borrow_mut()
.take()
.expect("Mutex: value missing on try_lock free path");
Some(MutexGuard {
mutex: self,
value: Some(value),
})
}
}
impl<T> Clone for Mutex<T> {
/// Cloning a `Mutex<T>` clones the handle, not the protected value —
/// both clones refer to the same lock state and the same `T`.
fn clone(&self) -> Self {
Self {
core: self.core.clone(),
value: self.value.clone(),
}
}
}
// ---------------------------------------------------------------------------
// Guard
// ---------------------------------------------------------------------------
pub struct MutexGuard<'a, T> {
mutex: &'a Mutex<T>,
/// The protected value, taken out of `mutex.value` while the guard is
/// alive. `Option` only so `Drop` can put it back; in normal use this
/// is always `Some` while the guard is observable.
value: Option<T>,
}
impl<T> std::ops::Deref for MutexGuard<'_, T> {
type Target = T;
fn deref(&self) -> &T {
self.value.as_ref().expect("MutexGuard: value missing")
}
}
impl<T> std::ops::DerefMut for MutexGuard<'_, T> {
fn deref_mut(&mut self) -> &mut T {
self.value.as_mut().expect("MutexGuard: value missing")
}
}
impl<T> Drop for MutexGuard<'_, T> {
fn drop(&mut self) {
// Put the value back into the mutex.
let v = self.value.take().expect("MutexGuard: double drop");
*self.mutex.value.borrow_mut() = Some(v);
// Pick the next waiter (if any) and grant it the lock by writing
// its pid into `holder` *before* unparking. The grantee, on
// resumption, will see `holder == self_pid` and take the value.
let next_pid = {
let mut st = self.mutex.core.state.borrow_mut();
let next = st.waiters.pop_front();
match next {
Some(w) => {
st.holder = Some(w.pid);
Some(w.pid)
}
None => {
st.holder = None;
None
}
}
};
if let Some(pid) = next_pid {
scheduler::unpark(pid);
}
}
}