feat: I/O and mutex support (v0.3)

Add epoll-based non-blocking I/O and kernel-like mutexes: - src/io.rs: Complete epoll backend with timeout & error handling - src/mutex.rs: Fair mutex with waiter queues & parking integration - Enhanced scheduler to support synchronous I/O blocking - Comprehensive test suites for I/O (epoll) and mutex behavior - Documentation: LOOM.md concurrency model & README
2026-05-23 16:09:29 +00:00
parent d3ab81b833
commit 8cbef1dfc1
11 changed files with 2032 additions and 146 deletions
@@ -0,0 +1,318 @@
+//! Actor-aware mutex with mandatory timeout.
+//!
+//! `loom::Mutex<T>` looks like `std::sync::Mutex<T>` but its `lock()` parks
+//! the calling *green* thread on contention rather than blocking the OS
+//! thread — and every lock attempt is bounded by a timeout. If the lock is
+//! not acquired within the timeout, `lock()` returns `Err(LockTimeout)`.
+//! This is a hard runtime guarantee (the spec calls it out): no actor can
+//! be parked on a mutex forever.
+//!
+//! ```ignore
+//! let m = loom::Mutex::new(42);
+//! let guard = m.lock()?;            // default timeout
+//! let guard = m.lock_timeout(Duration::from_millis(50))?;
+//! ```
+//!
+//! Fairness
+//! ========
+//! Waiters are granted the lock in FIFO order. The spec prizes fairness:
+//! starvation under contention is precisely the kind of failure mode
+//! supervision can't recover from cleanly. LIFO would be faster on cache
+//! locality and is not offered.
+//!
+//! Poisoning
+//! =========
+//! Unlike `std::sync::Mutex`, `loom::Mutex` does not poison on panic. If a
+//! holder panics while holding the lock, the next waiter receives the
+//! (now-untouched) value. Rationale: supervision handles the panic at the
+//! actor level; a separate poisoning channel is redundant and adds an
+//! error case to every `lock()`. Users who care about "the value may be in
+//! an inconsistent state after a panic" should encode that in `T` itself
+//! (e.g. `Mutex<Option<State>>` and `take()` the value at the start of
+//! each critical section).
+//!
+//! Reentrance
+//! ==========
+//! Not reentrant. An actor that already holds the lock and calls `lock()`
+//! again on the same mutex will wait on its own grant — and time out. This
+//! is a bug in the caller, not a feature.
+//!
+//! Multi-threading note
+//! ====================
+//! The current implementation uses `Rc<RefCell<…>>` internals because the
+//! v0.2 scheduler is single-threaded. The public API is identical to what
+//! the eventual multi-threaded version will expose; the migration replaces
+//! the `Rc<RefCell>` with `Arc<sync::Mutex>` around bookkeeping (waiters
+//! queue, holder pid) — the *value* itself never goes through a blocking
+//! OS-level lock, because contention always parks the green thread first.
+//! No `unsafe impl Send` games today: `loom::Mutex<T>` is `!Send` on v0.2,
+//! which is correct given there is only one OS thread.
+
+use crate::pid::Pid;
+use crate::scheduler;
+use crate::timer::{self, TimerTarget};
+use std::cell::{Cell, RefCell};
+use std::collections::VecDeque;
+use std::rc::Rc;
+use std::time::Duration;
+
+/// 30 seconds. Override per-call with `lock_timeout`, or per-mutex (TODO)
+/// once the supervisor-level policy hook lands.
+pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
+
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+pub struct LockTimeout;
+
+impl std::fmt::Display for LockTimeout {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "mutex lock timed out")
+    }
+}
+impl std::error::Error for LockTimeout {}
+
+// ---------------------------------------------------------------------------
+// Internals
+// ---------------------------------------------------------------------------
+
+/// A pending lock attempt. Sits in `MutexCore::state.waiters` from the
+/// moment an actor parks until it is either granted the lock (popped by
+/// `MutexGuard::drop`) or times out (popped by `on_timeout`).
+struct Wait {
+    pid: Pid,
+    /// Per-mutex monotonic sequence. Lets `on_timeout` recognise "this
+    /// specific wait" vs. "a later wait by the same pid on the same
+    /// mutex" — important because a single actor can re-acquire and then
+    /// re-wait, and we don't want a stale timer firing to disturb the new
+    /// wait.
+    seq: u64,
+}
+
+/// The non-generic part of the mutex. Lives inside `Rc<>` so it can also
+/// be stashed (as `Rc<dyn TimerTarget>`) inside a timer entry.
+struct MutexCore {
+    state: RefCell<MutexState>,
+    default_timeout: Cell<Duration>,
+}
+
+struct MutexState {
+    holder: Option<Pid>,
+    waiters: VecDeque<Wait>,
+    next_seq: u64,
+}
+
+impl MutexCore {
+    fn new(default_timeout: Duration) -> Self {
+        Self {
+            state: RefCell::new(MutexState {
+                holder: None,
+                waiters: VecDeque::new(),
+                next_seq: 0,
+            }),
+            default_timeout: Cell::new(default_timeout),
+        }
+    }
+}
+
+impl TimerTarget for MutexCore {
+    fn on_timeout(&self, pid: Pid, wait_seq: u64) {
+        // Remove the waiter with this seq, if it's still queued. If it's
+        // gone the lock was already granted to this actor before the timer
+        // popped — the actor will return normally; do nothing.
+        let removed = {
+            let mut st = self.state.borrow_mut();
+            if let Some(pos) = st.waiters.iter().position(|w| w.seq == wait_seq) {
+                st.waiters.remove(pos);
+                true
+            } else {
+                false
+            }
+        };
+        if removed {
+            // The actor is parked, waiting on us. Wake it up; `lock_timeout`
+            // will resume, observe `holder != Some(self)`, and return
+            // LockTimeout.
+            scheduler::unpark(pid);
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+pub struct Mutex<T> {
+    core: Rc<MutexCore>,
+    /// `None` while the lock is held; `Some(T)` while free or while a
+    /// grantee is in the gap between unpark and resumption.
+    value: Rc<RefCell<Option<T>>>,
+}
+
+impl<T> Mutex<T> {
+    pub fn new(value: T) -> Self {
+        Self {
+            core: Rc::new(MutexCore::new(DEFAULT_TIMEOUT)),
+            value: Rc::new(RefCell::new(Some(value))),
+        }
+    }
+
+    /// Set the default timeout used by `lock()`. Does not affect in-flight
+    /// `lock_timeout` calls.
+    pub fn set_default_timeout(&self, timeout: Duration) {
+        self.core.default_timeout.set(timeout);
+    }
+
+    /// Acquire the lock, blocking the calling actor until it's granted or
+    /// the default timeout expires.
+    pub fn lock(&self) -> Result<MutexGuard<'_, T>, LockTimeout> {
+        self.lock_timeout(self.core.default_timeout.get())
+    }
+
+    /// Acquire the lock with an explicit timeout.
+    pub fn lock_timeout(&self, timeout: Duration) -> Result<MutexGuard<'_, T>, LockTimeout> {
+        let me = scheduler::self_pid();
+
+        // Fast path: nobody holds it. Mark ourselves as holder, take the
+        // value out, return a guard.
+        {
+            let mut st = self.core.state.borrow_mut();
+            if st.holder.is_none() {
+                st.holder = Some(me);
+                drop(st);
+                let value = self
+                    .value
+                    .borrow_mut()
+                    .take()
+                    .expect("Mutex: value missing on free fast path");
+                return Ok(MutexGuard {
+                    mutex: self,
+                    value: Some(value),
+                });
+            }
+        }
+
+        // Slow path: register as a waiter, schedule a timeout, park.
+        // No preemption during prep-to-park — see scheduler::NoPreempt.
+        let _np = scheduler::NoPreempt::enter();
+        let seq = {
+            let mut st = self.core.state.borrow_mut();
+            let seq = st.next_seq;
+            st.next_seq = st.next_seq.wrapping_add(1);
+            st.waiters.push_back(Wait { pid: me, seq });
+            seq
+        };
+
+        let target: Rc<dyn TimerTarget> = self.core.clone();
+        let deadline = timer::deadline_from_now(timeout);
+        scheduler::insert_wait_timer(deadline, me, target, seq);
+        scheduler::park_current();
+
+        // Resumed. Two possibilities:
+        //   (a) MutexGuard::drop on the previous holder popped us off the
+        //       waiters queue, set core.holder = me, and unparked us.
+        //       => self.value is Some, we take it and return Ok.
+        //   (b) on_timeout fired: it removed us from waiters and unparked
+        //       us, but did NOT set holder. core.holder is whatever it was
+        //       (Some(other) or None). => return Err.
+        let is_holder = self.core.state.borrow().holder == Some(me);
+        if is_holder {
+            let value = self
+                .value
+                .borrow_mut()
+                .take()
+                .expect("Mutex: value missing after grant");
+            Ok(MutexGuard {
+                mutex: self,
+                value: Some(value),
+            })
+        } else {
+            Err(LockTimeout)
+        }
+    }
+
+    /// Non-blocking attempt. Returns `Some` if the lock was free, `None`
+    /// otherwise. Useful as a fast path before a long-running computation,
+    /// or for tests.
+    pub fn try_lock(&self) -> Option<MutexGuard<'_, T>> {
+        let mut st = self.core.state.borrow_mut();
+        if st.holder.is_some() {
+            return None;
+        }
+        let me = scheduler::self_pid();
+        st.holder = Some(me);
+        drop(st);
+        let value = self
+            .value
+            .borrow_mut()
+            .take()
+            .expect("Mutex: value missing on try_lock free path");
+        Some(MutexGuard {
+            mutex: self,
+            value: Some(value),
+        })
+    }
+}
+
+impl<T> Clone for Mutex<T> {
+    /// Cloning a `Mutex<T>` clones the handle, not the protected value —
+    /// both clones refer to the same lock state and the same `T`.
+    fn clone(&self) -> Self {
+        Self {
+            core: self.core.clone(),
+            value: self.value.clone(),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Guard
+// ---------------------------------------------------------------------------
+
+pub struct MutexGuard<'a, T> {
+    mutex: &'a Mutex<T>,
+    /// The protected value, taken out of `mutex.value` while the guard is
+    /// alive. `Option` only so `Drop` can put it back; in normal use this
+    /// is always `Some` while the guard is observable.
+    value: Option<T>,
+}
+
+impl<T> std::ops::Deref for MutexGuard<'_, T> {
+    type Target = T;
+    fn deref(&self) -> &T {
+        self.value.as_ref().expect("MutexGuard: value missing")
+    }
+}
+
+impl<T> std::ops::DerefMut for MutexGuard<'_, T> {
+    fn deref_mut(&mut self) -> &mut T {
+        self.value.as_mut().expect("MutexGuard: value missing")
+    }
+}
+
+impl<T> Drop for MutexGuard<'_, T> {
+    fn drop(&mut self) {
+        // Put the value back into the mutex.
+        let v = self.value.take().expect("MutexGuard: double drop");
+        *self.mutex.value.borrow_mut() = Some(v);
+
+        // Pick the next waiter (if any) and grant it the lock by writing
+        // its pid into `holder` *before* unparking. The grantee, on
+        // resumption, will see `holder == self_pid` and take the value.
+        let next_pid = {
+            let mut st = self.mutex.core.state.borrow_mut();
+            let next = st.waiters.pop_front();
+            match next {
+                Some(w) => {
+                    st.holder = Some(w.pid);
+                    Some(w.pid)
+                }
+                None => {
+                    st.holder = None;
+                    None
+                }
+            }
+        };
+        if let Some(pid) = next_pid {
+            scheduler::unpark(pid);
+        }
+    }
+}