linux-wasm/runtime/linux-worker.js
2025-10-31 18:37:02 +01:00

513 lines
24 KiB
JavaScript

// SPDX-License-Identifier: GPL-2.0-only
(function (console) {
let port = self;
let memory = null; // Note: memory.buffer has to be re-accessed after growing the memory!
let locks = null;
const text_decoder = new TextDecoder("utf-8");
const text_encoder = new TextEncoder();
/// A string denoting the runner name (same as Worker name), useful for debugging.
let runner_name = "[Unknown]";
/// SAB-backed storage for last process in switch_to (when it returns back from another task).
let switch_to_last_task = null;
/// The vmlinux instance, to handle boot, idle, kthreads and syscalls etc.
let vmlinux_instance = null;
/// The user executable (if any) to run when we're not in vmlinux.
let user_executable = null;
let user_executable_params = null;
/// The user executabe instance, or null. Try using the instance variable in the promise over this one if possible.
let user_executable_instance = null;
let user_executable_imports = null;
/// Flag that a clone callback should be called instead of _start().
let should_call_clone_callback = false;
/// A messenger to synchronize with the main thread, as well as communicate how many bytes were read on the console.
let console_read_messenger = new Int32Array(new SharedArrayBuffer(4));
/// An exception type used to abort part of execution (useful for collapsing the call stack of user code).
class Trap extends Error {
constructor(kind) {
super("This exception should be ignored. It is part of Linux/Wasm host glue.");
Error.captureStackTrace && Error.captureStackTrace(this, Trap);
this.name = "Trap";
this.kind = kind;
}
}
const log = (message) => {
port.postMessage({
method: "log",
message: "[Runner " + runner_name + "]: " + message,
});
};
/// Get a JS string object from a (nul-terminated) C-string in a Uint8Array.
const get_cstring = (memory, index) => {
const memory_u8 = new Uint8Array(memory.buffer);
let end;
for (end = index; memory_u8[end]; ++end); // Find terminating nul-character.
return text_decoder.decode(memory_u8.slice(index, end));
};
const lock_notify = (lock, count) => {
Atomics.store(locks._memory, locks[lock], 1);
Atomics.notify(locks._memory, locks[lock], count || 1);
};
const lock_wait = (lock) => {
Atomics.wait(locks._memory, locks[lock], 0);
Atomics.store(locks._memory, locks[lock], 0);
};
const serialize_me = () => {
// Wait for some other task or CPU to wake us up.
lock_wait("serialize");
return switch_to_last_task[0]; // last_task was written by the caller just prior to waking.
};
/// Callbacks from within Linux/Wasm out to our host code (cpu is not neccessarily ours).
const host_callbacks = {
/// Start secondary CPU.
wasm_start_cpu: (cpu, idle_task, start_stack) => {
// New web workers cannot be spawned from within a Worker in most browsers. It can currently not be spawned from
// within a SharedWorker in any browser. Do it on the main thread instead.
port.postMessage({ method: "start_secondary", cpu: cpu, idle_task: idle_task, start_stack: start_stack });
},
/// Stop secondary CPU (rather abruptly).
wasm_stop_cpu: (cpu) => {
port.postMessage({ method: "stop_secondary", cpu: cpu });
},
/// Creation of tasks on our end. Runs them too.
wasm_create_and_run_task: (prev_task, new_task, name, bin_start, bin_end, data_start, table_start) => {
// Tell main to create the new task, and then run it for the first time!
port.postMessage({
method: "create_and_run_task",
prev_task: prev_task,
new_task: new_task,
name: get_cstring(memory, name),
// For user tasks, there is user code to load first before trying to run it.
user_executable: bin_start ? {
bin_start: bin_start,
bin_end: bin_end,
data_start: data_start,
table_start: table_start,
} : null,
});
// Serialize this (old) task.
return serialize_me();
},
/// Remove a task created by wasm_create_and_run_task().
wasm_release_task: (dead_task) => {
port.postMessage({
method: "release_task",
dead_task: dead_task,
});
},
/// Serialization of tasks (idle tasks and before SMP is started).
wasm_serialize_tasks: (prev_task, next_task) => {
// Notify the next task that it can run again.
port.postMessage({
method: "serialize_tasks",
prev_task: prev_task,
next_task: next_task,
});
// Serialize this (old) task.
return serialize_me();
},
/// Kernel panic. We can't proceed.
wasm_panic: (msg) => {
const message = "Kernel panic: " + get_cstring(memory, msg);
console.error(message);
log(message);
// This will stop execution of the current task.
throw new Trap("panic");
},
/// Dump a stack trace into a text buffer. (The exact format is implementation-defined and varies by browser.)
wasm_dump_stacktrace: (stack_trace, max_size) => {
try {
throw new Error();
} catch (error) {
const memory_u8 = new Uint8Array(memory.buffer);
const encoded = text_encoder.encode(error.stack).slice(0, max_size - 1);
memory_u8.set(encoded, stack_trace);
memory_u8[stack_trace + encoded.length] = 0;
}
},
/// Replace the currently executing image (kthread spawning init, or user process) with a new user process image.
wasm_load_executable: (bin_start, bin_end, data_start, table_start) => {
user_executable = WebAssembly.compile(new Uint8Array(memory.buffer).slice(bin_start, bin_end));
user_executable_params = {
data_start: data_start,
table_start: table_start,
};
// We release our reference already, just to be sure. The promise chain will still have a reference until the
// kernel exits back to userland, which will termintate the user executable with a Trap.
user_executable_instance = null;
user_executable_imports = null;
},
/// Handle user mode return (e.g. from syscall) that should not proceed normally. (Not called on normal returns.)
wasm_user_mode_tail: (flow) => {
if (flow == -1) {
// Exec has been called and we should not return from the syscall. Trap() to collapse the call stack of the user
// executable. When swallowed, run the new user executable that was already preloaded by wasm_load_executable().
// This takes precedence of signal handlers or signal return - no reason to run any old user code!
throw new Trap("reload_program");
} else if (flow >= 1 && flow <= 3) {
// First, handle any signal (possibly stacked). Then, handle any signal return (happens after stacked signals).
// If exec() happens, we will slip out in the catch-else clause, ensuring the sigreturn does not proceed.
if (flow & 1) {
try {
if (user_executable_instance.exports.__libc_handle_signal) {
// Setup signal frame...
user_executable_imports.env.__stack_pointer.value = vmlinux_instance.exports.get_user_stack_pointer();
user_executable_instance.exports.__set_tls_base(vmlinux_instance.exports.get_user_tls_base());
user_executable_instance.exports.__libc_handle_signal();
throw new Error("Wasm function __libc_handle_signal() returned (it should never return)!");
} else {
throw new Error("Wasm function __libc_handle_signal() not defined!");
}
} catch (error) {
if (error instanceof Trap && error.kind == "signal_return") {
// ...restore signal frame.
user_executable_imports.env.__stack_pointer.value = vmlinux_instance.exports.get_user_stack_pointer();
user_executable_instance.exports.__set_tls_base(vmlinux_instance.exports.get_user_tls_base());
} else {
// Either a genuine error, or a Trap() from exec() (signal handlers are allowed to call exec()).
throw error;
}
}
}
if (flow & 2) {
throw new Trap("signal_return");
}
} else {
throw new Error("wasm_syscall_tail called with unknown kind");
}
},
// After this line follows host callbacks used by various drivers. In the future, we may make drivers more
// modularized and allow them to allocate certain resources, like host callbacks, IRQ numbers, even syscalls...
// Host callbacks by the Wasm-default clocksource.
wasm_cpu_clock_get_monotonic: () => {
// Convert this double in ms to u64 in us.
// Modern browsers can on good days reach 5us accuracy, given that the platform supports it.
return BigInt(Math.round(1000 * (performance.timeOrigin + performance.now()))) * 1000n;
},
// Host callbacks used by the Wasm-default console driver.
wasm_driver_hvc_put: (buffer, count) => {
const memory_u8 = new Uint8Array(memory.buffer);
port.postMessage({
method: "console_write",
message: text_decoder.decode(memory_u8.slice(buffer, buffer + count)),
});
return count;
},
wasm_driver_hvc_get: (buffer, count) => {
// Reset lock. Using .store() for the memory barrier.
Atomics.store(console_read_messenger, 0, -1);
// Tell the main thread to write any input into memory, up to count bytes.
port.postMessage({
method: "console_read",
buffer: buffer,
count: count,
console_read_messenger: console_read_messenger,
});
// Wait for a response from the main thread about how many bytes were actually written, could be 0.
Atomics.wait(console_read_messenger, 0, -1);
let console_read_count = Atomics.load(console_read_messenger, 0);
return console_read_count;
},
};
/// Callbacks from the main thread.
const message_callbacks = {
init: (message) => {
runner_name = message.runner_name;
memory = message.memory;
locks = message.locks;
switch_to_last_task = message.last_task; // Only defined for tasks and CPU 0 (init task).
if (message.user_executable) {
// We are in a new runner that should duplicate the user executable. Happens when someone calls clone().
host_callbacks.wasm_load_executable(
message.user_executable.bin_start,
message.user_executable.bin_end,
message.user_executable.data_start,
message.user_executable.table_start);
}
let import_object = {
env: {
...host_callbacks,
memory: message.memory,
},
};
// We have to fixup unimplemented syscalls as they are declared but not defined by vmlinux (to avoid the
// ni_syscall soup with unimplemented syscalls, which fails on Wasm due to a variable amount of arguments). Since
// these syscalls should not really be called anyway, we can have a slow js stub deal with them, and it can handle
// variable arguments gracefully!
const ni_syscall = () => { return -38 /* aka. -ENOSYS */; };
for (const imported of WebAssembly.Module.imports(message.vmlinux)) {
if (imported.name.startsWith("sys_") && imported.module == "env"
&& imported.kind == "function") {
import_object.env[imported.name] = ni_syscall;
}
}
// This is a global error handler that is used when calling Wasm code.
const wasm_error = (error) => {
log("Wasm crash: " + error.toString());
console.error(error);
if (vmlinux_instance) {
vmlinux_instance.exports.raise_exception();
throw new Error("raise_exception() returned");
} else {
// Only log stack if vmlinux is not up already - it will dump stacks itself.
log(error.stack);
throw error;
}
};
const vmlinux_setup = () => {
// Instantiate a vmlinux Wasm Module. This will implicitly run __wasm_init_memory, which will effectively:
// * Copy all passive data segments into their (static) position.
// * Clear BSS (in its static position).
// * Drop all passive data segments.
// An in-memory atomic flag ensures this only happens the first time vmlinux is instantiated on the main memory.
return WebAssembly.instantiate(message.vmlinux, import_object).then((instance) => {
vmlinux_instance = instance;
});
};
const vmlinux_run = () => {
if (message.runner_type == "primary_cpu") {
// Notify the main thread about init task so that it knows where it resides in memory.
port.postMessage({
method: "start_primary",
init_task: vmlinux_instance.exports.init_task.value,
});
// Setup the boot command line. We have the luxury to be able to write to it directly. The maximum length is
// not set here but is set by COMMAND_LINE_SIZE (defaults to 512 bytes).
const cmdline = message.boot_cmdline + "\0";
const cmdline_buffer = vmlinux_instance.exports.boot_command_line.value;
new Uint8Array(memory.buffer).set(text_encoder.encode(cmdline), cmdline_buffer);
// Grow the memory to fit initrd and copy it.
//
// All typed arrays and views on memory.buffer become invalid by growing and need to be re-created. grow()
// will return the old size, which becomes our base address for initrd.
const initrd_start = memory.grow(((message.initrd.byteLength + 0xFFFF) / 0x10000) | 0) * 0x10000;
const initrd_end = initrd_start + message.initrd.byteLength;
new Uint8Array(memory.buffer).set(new Uint8Array(message.initrd), initrd_start);
new DataView(memory.buffer).setUint32(vmlinux_instance.exports.initrd_start.value, initrd_start, true);
new DataView(memory.buffer).setUint32(vmlinux_instance.exports.initrd_end.value, initrd_end, true);
// This will boot the maching on the primary CPU. Later on, it will boot secondaries...
//
// _start sets up the Wasm global __stack_pointer to init_stack and calls start_kernel(). Note that this will
// grow the memory and thus all views on memory.buffer become invalid.
vmlinux_instance.exports._start();
// _start() will never return, unless it fails to allocate all memoy it wants to.
throw new Error("_start did not even succeed in allocating 16 pages of RAM, aborting...");
} else if (message.runner_type == "secondary_cpu") {
// start_secondary() will never return. It can be killed by terminate() on this Worker.
vmlinux_instance.exports._start_secondary(message.start_stack);
throw new Error("start_secondary returned");
} else if (message.runner_type == "task") {
// A fresh task, possibly serialized on CPU 0 before secondaries are brought up.
should_call_clone_callback = vmlinux_instance.exports.ret_from_fork(message.prev_task, message.new_task);
// Two cases exist when we reach here:
// 1. The kthread that spawned init retuned.
// The code will already have been loaded, just execute it.
//
// 2. Someone called clone.
// We should call the clone callback on the user executable, which has already been loaded.
//
// Notably, we don't end up here after exec() syscalls. Instead, the user instance is reloaded directly.
return;
} else {
throw new Error("Unknown runner_type: " + message.runner_type);
}
};
const user_executable_setup = () => {
const stack_pointer = vmlinux_instance.exports.get_user_stack_pointer();
const tls_base = vmlinux_instance.exports.get_user_tls_base();
user_executable_imports = {
env: {
memory: memory,
__memory_base: new WebAssembly.Global({ value: 'i32', mutable: false }, user_executable_params.data_start),
__stack_pointer: new WebAssembly.Global({ value: 'i32', mutable: true }, stack_pointer),
__indirect_function_table: new WebAssembly.Table({ initial: 4096, element: "anyfunc" }), // TODO: fix this!
__table_base: new WebAssembly.Global({ value: 'i32', mutable: false }, user_executable_params.table_start),
// To be correct, we should save AND restore these globals between the user instance and vmlinux instance:
// __stack_pointer <-> __user_stack_pointer
// __tls_base <-> __user_tls_base
// The kernel interacts with them in the following ways:
// * Diagnostics (reading them and displaying them in informational messages).
// * ret_from_fork: writes stack and tls. We have to deal with it, but not here, as this is not a syscall!
// * syscall exec: tls should be kept even if the process image is replaced (probably has no real use case).
// * syscall clone: stack and tls should be transfered to the new instance, unless overridden.
// * signal handlers: also not a syscall - vmlinux calls the host, perhaps during syscall return!
// The kernel never modifies neither of them for the task that makes a syscall.
//
// To make syscalls faster (allowing them to not go through a slow JavaScript wrapper), we skip transferring
// them back to the user instance. They always have to be transferred to vmlinux at syscall sites, as a
// signal being handled in its return path would need to save (and restore) them on its signal stack.
__wasm_syscall_0: vmlinux_instance.exports.wasm_syscall_0,
__wasm_syscall_1: vmlinux_instance.exports.wasm_syscall_1,
__wasm_syscall_2: vmlinux_instance.exports.wasm_syscall_2,
__wasm_syscall_3: vmlinux_instance.exports.wasm_syscall_3,
__wasm_syscall_4: vmlinux_instance.exports.wasm_syscall_4,
__wasm_syscall_5: vmlinux_instance.exports.wasm_syscall_5,
__wasm_syscall_6: vmlinux_instance.exports.wasm_syscall_6,
__wasm_abort: () => {
debugger
throw WebAssembly.RuntimeError('abort');
},
},
};
// Instantiate a user Wasm Module. This will implicitly run __wasm_init_memory, which will effectively:
// * Initialize the TLS pointer (to a data_start-relocated static area, for the first thread).
// * Copy all passive data segments into their (data_start-relocated) position.
// * Clear BSS (data_start-relocated).
// * Drop all passive data segments (except the TLS region, which is saved, but unused in the musl case).
// An atomic flag ensures this only happens for the first thread to be started (using instantiate).
//
// The TLS pointer will be initialized in the following way ways:
// * kthread-returns-to-init: __user_tls_base would be 0 as it's zero-initialized on the kthreads switch_stack.
// (We are ignoring it.) __wasm_init_memory() would initialize it to the static area as described above.
//
// * exec: __user_tls_base should have been the value of the process calling exec (during the syscall). However,
// we would want to restore it as part of initializing the runtime, which is exactly what __wasm_init_memory()
// does. This also means that whatever value the task calling exec() supplied for tls is ignored.
//
// * clone: clone explicitly passes its tls pointer to the kernel as part of the syscall. Unless the tls pointer
// has been overridden with CLONE_SETTLS, it will be copied from the old task to the new one. This is mostly
// useful when CLONE_VFORK is used, in which case the new task can borrow the TLS until it calls exec or exit.
let woken = user_executable.then((user_module) => WebAssembly.instantiate(user_module, user_executable_imports));
woken = woken.then((instance) => {
instance.exports.__wasm_apply_data_relocs();
if (should_call_clone_callback) {
// Note: __wasm_init_tls cannot be used as it would also re-initilize the _Thread_local variables' data. But
// on a clone(), it is none of our business to do that. It's up to the libc to do that as part of pthreads.
// Indeed, for example on a clone with CLONE_VFORK, the right thing to do may be to borrow the parent's TLS.
// Unfortunately, LLVM does not export __tls_base directly on dynamic libraries, so we go through a wrapper.
instance.exports.__set_tls_base(tls_base);
}
user_executable_instance = instance;
return instance;
});
return woken;
};
const user_executable_run = (instance) => {
if (should_call_clone_callback) {
// We have to reset this state, because if the clone callback calls exec, we have to run _start() instead!
should_call_clone_callback = false;
if (instance.exports.__libc_clone_callback) {
instance.exports.__libc_clone_callback();
throw new Error("Wasm function __libc_clone_callback() returned (it should never return)!");
} else {
throw new Error("Wasm function __libc_clone_callback() not defined!");
}
} else {
if (instance.exports._start) {
// Ideally libc would do this instead of the usual __init_array stuff (e.g. override __libc_start_init in
// musl). However, a reference to __wasm_call_ctors becomes a GOT import in -fPIC code, perhaps rightfully
// so with the current implementation and use case on LLVM. Anyway, we do it here, slightly early on...
if (instance.exports.__wasm_call_ctors) {
instance.exports.__wasm_call_ctors();
}
// TLS: somewhat incorrectly contains 0 instead of the TP before exec(). Since we will anyway not care about
// its value (__wasm_apply_data_relocs() called would have overwritten it in this case) it does not matter.
instance.exports._start();
throw new Error("Wasm function _start() returned (it should never return)!");
} else {
throw new Error("Wasm function _start() not defined!");
}
}
};
const user_executable_error = (error) => {
if (error instanceof Trap) {
if (error.kind == "reload_program") {
// Someone called exec and the currently executing code should stop. We should run the new user code already
// loaded by wasm_load_executable().
return user_executable_chain();
} else if (error.kind == "panic") {
// This has already been handled - just swallow it. This Worker will be done - but kept for later debugging.
} else {
throw new Error("Unexpected Wasm host Trap " + error.kind);
}
} else {
wasm_error(error);
}
};
const user_executable_chain = () => {
// user_executable_error() may deal with an exec() trap and recursively call run_chain() again.
return user_executable_setup().then(user_executable_run).catch(user_executable_error);
};
// All tasks start in the kernel, some return to userland, where they should never return. If they return, we
// handle this as an error and wait. Our life ends when the kernel kills us by terminating the whole Worker. Oh,
// and exex() can trap us, in which case we have to circle back to loading new user code and executing it agian.
vmlinux_setup().then(vmlinux_run).catch(wasm_error).then(user_executable_chain);
},
};
self.onmessage = (message_event) => {
const data = message_event.data;
message_callbacks[data.method](data);
};
self.onmessageerror = (error) => {
throw error;
};
})(console);