//! Cooperative context switching, x86-64. //! //! Two naked-asm functions move execution between a scheduler thread and an //! actor running on its own mmap'd stack. The compiler cannot do this; the //! whole point of `#[unsafe(naked)]` is that we control every instruction. //! //! `SCHEDULER_SP` and `ACTOR_SP` are thread-locals holding each side's saved //! stack pointer. `init_actor_stack` builds the initial stack so that the //! first `switch_to_actor` lands inside the entry function with `rsp % 16 == 8` //! (the x86-64 ABI requirement at function entry). use std::cell::Cell; thread_local! { static SCHEDULER_SP: Cell = const { Cell::new(0) }; static ACTOR_SP: Cell = const { Cell::new(0) }; } fn get_scheduler_sp() -> usize { SCHEDULER_SP.with(|c| c.get()) } fn set_scheduler_sp(v: usize) { SCHEDULER_SP.with(|c| c.set(v)) } pub fn get_actor_sp() -> usize { ACTOR_SP.with(|c| c.get()) } pub fn set_actor_sp(v: usize) { ACTOR_SP.with(|c| c.set(v)) } // --------------------------------------------------------------------------- // Initial stack layout // // After alignment, sp = top & ~15 - 8. Then we push (downward) six callee- // saved register slots and a return address. The first `switch_to_actor` // pops r15..rbx and `ret`s — landing in `entry` with rsp % 16 == 8. // // Layout (high → low), relative to aligned_top = top & ~15: // aligned_top - 8 : entry ptr ← `ret` target. Post-ret: rsp % 16 == 8. // aligned_top - 16 : rbx = 0 // aligned_top - 24 : rbp = 0 // aligned_top - 32 : r12 = 0 // aligned_top - 40 : r13 = 0 // aligned_top - 48 : r14 = 0 // aligned_top - 56 : r15 = 0 ← initial rsp // --------------------------------------------------------------------------- pub fn init_actor_stack(top: *mut u8, entry: extern "C-unwind" fn()) -> usize { unsafe { let mut sp = (top as usize & !15) - 8; sp -= 8; (sp as *mut usize).write(entry as usize); // ret target sp -= 8; (sp as *mut usize).write(0); // rbx sp -= 8; (sp as *mut usize).write(0); // rbp sp -= 8; (sp as *mut usize).write(0); // r12 sp -= 8; (sp as *mut usize).write(0); // r13 sp -= 8; (sp as *mut usize).write(0); // r14 sp -= 8; (sp as *mut usize).write(0); // r15 sp } } // --------------------------------------------------------------------------- // Context switch shims // // Each shim: // 1. Pushes the six callee-saved integer registers. // 2. Snaps rsp into rdi and calls the Rust helper that stores it. // 3. Calls the Rust helper that returns the *other* side's saved rsp. // 4. Moves that into rsp. // 5. Pops the six registers and rets. // // XMM registers are NOT saved here. We rely on every yield happening through // a Rust call site, which means the compiler has spilled any live XMM state // to the stack before we get here. (This is the same argument the compiler // uses internally — callee-saved regs are what survive a `call`, and the // SysV AMD64 ABI says XMM0–15 are all caller-saved.) If we ever yield from // a place that isn't a Rust call boundary, this assumption breaks. // --------------------------------------------------------------------------- #[unsafe(naked)] unsafe extern "C" fn switch_to_actor_asm() { core::arch::naked_asm!( "push rbx", "push rbp", "push r12", "push r13", "push r14", "push r15", "mov rdi, rsp", "call {set_sched_sp}", "call {get_actor_sp}", "mov rsp, rax", "pop r15", "pop r14", "pop r13", "pop r12", "pop rbp", "pop rbx", "ret", set_sched_sp = sym set_scheduler_sp, get_actor_sp = sym get_actor_sp, ); } /// Resume the actor whose sp is in `ACTOR_SP`. Returns when the actor yields. pub unsafe fn switch_to_actor() { unsafe { switch_to_actor_asm() }; } #[unsafe(naked)] pub unsafe extern "C" fn switch_to_scheduler() { core::arch::naked_asm!( "push rbx", "push rbp", "push r12", "push r13", "push r14", "push r15", "mov rdi, rsp", "call {set_actor_sp}", "call {get_sched_sp}", "mov rsp, rax", "pop r15", "pop r14", "pop r13", "pop r12", "pop rbp", "pop rbx", "ret", set_actor_sp = sym set_actor_sp, get_sched_sp = sym get_scheduler_sp, ); }