From dafccb57b4fe063682b6b19db57bfc9d0478c563 Mon Sep 17 00:00:00 2001 From: Joel Severin Date: Fri, 31 Oct 2025 18:37:02 +0100 Subject: [PATCH] Add initial release files --- .gitignore | 3 + LICENSE | 5 + README.md | 93 +++++ docker/linux-wasm-base/Dockerfile | 16 + docker/linux-wasm-contained/Dockerfile | 8 + linux-wasm.sh | 251 ++++++++++++ runtime/bright.css | 51 +++ runtime/index.html | 263 +++++++++++++ runtime/linux-worker.js | 512 +++++++++++++++++++++++++ runtime/linux.js | 222 +++++++++++ runtime/server.py | 25 ++ tools/make-initramfs-base.sh | 19 + 12 files changed, 1468 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 docker/linux-wasm-base/Dockerfile create mode 100644 docker/linux-wasm-contained/Dockerfile create mode 100755 linux-wasm.sh create mode 100644 runtime/bright.css create mode 100644 runtime/index.html create mode 100644 runtime/linux-worker.js create mode 100644 runtime/linux.js create mode 100755 runtime/server.py create mode 100755 tools/make-initramfs-base.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e39da51 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/*workspace*/ +/runtime/vmlinux.wasm +/runtime/initramfs.cpio.gz diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..55fe4c7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,5 @@ +Code originating from this repository is provided under: + SPDX-License-Identifier: GPL-2.0-only + +Patches are provided with the default license of each target project. +If in doubt, and if compatible with the patch target, GPL 2.0 shall be used. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5a2d9dd --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +# Scripts for Building a Linux/Wasm Operating System +This project contains scripts to download, build and run a Linux system that can executed on the web, using native WebAssembly (Wasm). + +These scripts can be run in the following way: +* Directly on a host machine. +* In a generic docker container. +* In a specific docker container (see Dockerfile). + +## Parts +The project is built and assembled from following pieces of software: +* LLVM Project: + * Base version: 18.1.2 + * Patches: + * A hack patch that enables GNU ld-style linker scripts in wasm-ld. + * Artifacts: clang, wasm-ld (from lld), compiler-rt +* Linux kernel: + * Base version: 6.4.16 + * Patches: + * A patch for adding Wasm architecture support to the kernel. + * A wasm binfmt feature patch, enabling .wasm files to run as executables. + * A console driver for a Wasm "web console". + * Artifacts: vmlinux, exported (unmodified) kernel headers + * Dependencies: clang, wasm-ld with linker script support, (compiler-rt is *not* needed) +* musl: + * Base version: 1.2.5 + * Patches: + * A hack patch (minimal and incorrect) that: + * Adds Wasm as a target to musl (I guessed and cheated a lot on this one). + * Allows musl to be built using clang and wasm-ld (linker script support may be needed). + * Atifacts: musl libc + * Dependencies: clang, wasm-ld, compiler-rt +* Linux kernel headers for BusyBox + * Base version: from the kernel + * Patches: + * A series of patches, originally hosted by Sabotage Linux, but modified to suit a newer kernel. These patches allow BusyBox to include kernel headers (which is not really supported by Linux). This magically just "works" with glibc but needs modding for musl. + * Artifacts: modified kernel headers + * Dependencies: exported Linux kernel headers +* BusyBox: + * Base version: 1.36.1 + * Patches: + * A hack patch (minimal and incomplete) that: + * Allows BuxyBox to be built using clang and wasm-ld (linker script support might be unnecessary). + * Adds a Wasm defconfig. + * Artifacts: BusyBox installation (base binary and symlinks for ls, cat, mv etc.) + * Dependencies: musl libc, modified headers for BusyBox +* A minimal initramfs: + * Notes: + * Packages up the busybox installation into a compessed cpio archive. + * It sets up a pty for you (for proper signal/session/job management) and drops you into a shell. + * Artifacts: initramfs.cpio.gz + * Dependencies: BusyBox installation +* A runtime: + * Notes: + * Some example code of how a minimal JavaScript Wasm host could look like. + * Error handling is not very graceful, more geared towards debugging than user experience. + * This is the glue code that kicks everything off, spawns web workers, creates Wasm instances etc. + +Hint: Wasm lacks an MMU, meaning that Linux needs to be built in a NOMMU configuration. Wasm programs thus need to be built using -fPIC/-shared. Alternatively, existing Wasm programs can run together with a proxy that does syscalls towards the kernel. In such a case, each thread that wishes to independently execute syscalls should map to a thread in the proxy. The drawback of such an approach is that memory cannot be mapped and shared between processes. However, from a memory protection standpoint, this property could also be beneficial. + +## Running +Run ./linux-wasm.sh to see usage. Downloads happen first, building afterwards. You may partially select what to download or (re)-build. + +Due to a bug in LLVM's build system, building LLVM a second time fails when building runtimes (complaining that clang fails to build a simple test program). A workaround is to build it yet again (it works each other time, i.e. the 1st, 3rd, 5th etc. time). + +Due to limitations in the Linux kernel's build system, the absolute path of the cross compiler (install path of LLVM) cannot contain spaces. Since LLVM is built by linux-wasm.sh, it more or less means its workspace directory (or at least install directory) has to be in a space free path. + +### Docker +The following commands should be executed in this repo root. + +There are two containers: +* **linux-wasm-base**: Contains an Ubuntu 20.04 environment with all tools installed for building (e.g. cmake, gcc etc.). +* **linux-wasm-contained**: Actually builds everything into the container. Meant as a dispoable way to build everything isolated. + +Create the containers: +``` +docker build -t linux-wasm-base:dev ./docker/linux-wasm-base +docker build -t linux-wasm-contained:dev ./docker/linux-wasm-contained +``` +Note that the latter command will copy linux-wasm.sh, in its current state, into the container. + +To launch a simple docker container with a mapping to host (recommended for development): +``` +docker run -it --name my-linux-wasm --mount type=bind,src="$(pwd)",target=/linux-wasm linux-wasm-base:dev bash +(Inside the bash prompt, run for example:) /linux-wasm/linux-wasm.sh all +``` + +To actually build everything inside the container (mostly useful for build servers): +``` +docker run -it -name full-linux-wasm linux-wasm-contained:dev /linux-wasm/linux-wasm.sh all +``` + +To change workspace folder, docker run -e LW_WORKSPACE=/path/to/workspace ...blah... can be used. This may be useful together with docker volumes. + diff --git a/docker/linux-wasm-base/Dockerfile b/docker/linux-wasm-base/Dockerfile new file mode 100644 index 0000000..96ed232 --- /dev/null +++ b/docker/linux-wasm-base/Dockerfile @@ -0,0 +1,16 @@ +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && \ + apt install -y ca-certificates gpg wget && \ + test -f /usr/share/doc/kitware-archive-keyring/copyright || \ + wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ + gpg --dearmor - | \ + tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \ + echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ focal main' | \ + tee /etc/apt/sources.list.d/kitware.list >/dev/null && \ + apt update && \ + test -f /usr/share/doc/kitware-archive-keyring/copyright || rm /usr/share/keyrings/kitware-archive-keyring.gpg && \ + apt install -y kitware-archive-keyring && \ + apt install -y build-essential git cmake ninja-build && \ + rm -rf /var/lib/apt/lists/* diff --git a/docker/linux-wasm-contained/Dockerfile b/docker/linux-wasm-contained/Dockerfile new file mode 100644 index 0000000..379a167 --- /dev/null +++ b/docker/linux-wasm-contained/Dockerfile @@ -0,0 +1,8 @@ +FROM linux-wasm-base:dev + +WORKDIR /linux-wasm + +COPY patches patches +COPY linux-wasm.sh linux-wasm.sh + +ENTRYPOINT ["/linux-wasm/linux-wasm.sh", "all"] diff --git a/linux-wasm.sh b/linux-wasm.sh new file mode 100755 index 0000000..04073a5 --- /dev/null +++ b/linux-wasm.sh @@ -0,0 +1,251 @@ +#!/bin/bash + +# This is a very simple file that can be divided into two phases for each inherent software: fetching and building. +# Fetching happens first, then building. You can "fetch" all, "build" all, or do "all" which does both. You may also +# specify a specific piece of software and fetch or build just that, but keep in mind dependencies between them. +# +# Fetching means: download and patch. +# Building means: configure, compile and install (to separate folder). +# By default everything ends up in a folder named workspace/ but you can change that by specifying LW_WORKSPACE=... +# This script can be run in any directory, it should not pollute the current working directory, but just in case you +# may want to create an empty scratch directory. It's hard to validate that all components' build systems behave... + +set -e + +LW_ROOT="$(realpath -s "$(dirname "$0")")" + +# (All paths below are resolved as absolute. This is required for the other parts of the script to work properly.) + +# Path to workspace (will set LW_SRC, LW_BUILD, LW_INSTALL ... paths). +: "${LW_WORKSPACE:=$LW_ROOT/workspace}" +LW_WORKSPACE="$(realpath -sm "$LW_WORKSPACE")" + +# Path to where sources will be downloaded and patched. +: "${LW_SRC:=$LW_WORKSPACE/src}" +LW_SRC="$(realpath -sm "$LW_SRC")" + +# Path to where each software component will be built. +: "${LW_BUILD:=$LW_WORKSPACE/build}" +LW_BUILD="$(realpath -sm "$LW_BUILD")" + +# Path to where each software component will be installed. +: "${LW_INSTALL:=$LW_WORKSPACE/install}" +LW_INSTALL="$(realpath -sm "$LW_INSTALL")" + +# Flags used with git. --depth 1 is recommended to avoid downloading a lot of history. +: "${LW_GITFLAGS:=--depth 1}" + +# Parallel build jobs. Unfortunately not as simple as one number in reality. These are rather conservative. +: "${LW_JOBS_LLVM_LINK:=1}" +: "${LW_JOBS_LLVM_COMPILE:=3}" +: "${LW_JOBS_KERNEL_COMPILE:=8}" +: "${LW_JOBS_MUSL_COMPILE:=8}" +: "${LW_JOBS_BUSYBOX_COMPILE:=8}" + +handled=0 +case "$1" in # note use of ;;& meaning that each case is re-tested (can hit multiple times)! + "fetch-llvm"|"all-llvm"|"fetch"|"all") + mkdir -p "$LW_SRC/llvm" + git clone -b llvmorg-18.1.2 $LW_GITFLAGS https://github.com/llvm/llvm-project.git "$LW_SRC/llvm" + git -C "$LW_SRC/llvm" am < "$LW_ROOT/patches/llvm/0001-Hack-patch-to-allow-GNU-ld-style-linker-scripts-in-w.patch" + handled=1;;& + + "fetch-kernel"|"all-kernel"|"fetch"|"all") + mkdir -p "$LW_SRC/kernel" + git clone -b v6.4.16 $LW_GITFLAGS https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git "$LW_SRC/kernel" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0009-HACK-Workaround-broken-wq_worker_comm.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0001-Always-access-the-instruction-pointer-intrinsic-via-.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0002-Allow-architecture-specific-panic-handling.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0003-Add-missing-processor.h-include-for-asm-generic-barr.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0004-Align-dot-instead-of-section-in-vmlinux.lds.h.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0005-Add-Wasm-architecture.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0006-Add-Wasm-binfmt.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0007-Use-.section-format-compatible-with-LLVM-as-when-tar.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0008-Provide-Wasm-support-in-mk_elfconfig.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0009-Add-dummy-ELF-constants-for-Wasm.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0010-Add-Wasm-console-support.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0011-Add-wasm_defconfig.patch" + git -C "$LW_SRC/kernel" am < "$LW_ROOT/patches/kernel/0012-HACK-Workaround-broken-wq_worker_comm.patch" + handled=1;;& + + "fetch-musl"|"all-musl"|"fetch"|"all") + mkdir -p "$LW_SRC/musl" + git clone -b v1.2.5 $LW_GITFLAGS https://git.musl-libc.org/git/musl "$LW_SRC/musl" + git -C "$LW_SRC/musl" am < "$LW_ROOT/patches/musl/0001-NOMERGE-Hacks-to-get-Linux-Wasm-to-compile-minimal-a.patch" + handled=1;;& + + "fetch-busybox-kernel-headers"|"all-busybox-kernel-headers"|"fetch"|"all") + # There is not really much to do here, the kernel needs to be built first. See build-busybox-kernel-headers. + handled=1;;& + + "fetch-busybox"|"all-busybox"|"fetch"|"all") + mkdir -p "$LW_SRC/busybox" + git clone -b 1_36_1 $LW_GITFLAGS https://git.busybox.net/busybox "$LW_SRC/busybox" + git -C "$LW_SRC/busybox" am < "$LW_ROOT/patches/busybox/0001-NOMERGE-Hacks-to-build-Wasm-Linux-arch-minimal-and-i.patch" + handled=1;;& + + "fetch-initramfs"|"all-initramfs"|"fetch"|"all") + # Nothing to do here. + # We already have patches/initramfs/initramfs-base.cpio pre-built by toos/make-initramfs-base.sh in the repo. + handled=1;;& + + "build-llvm"|"all-llvm"|"build"|"all"|"build-tools") + mkdir -p "$LW_BUILD/llvm" + # (LLVM_DEFAULT_TARGET_TRIPLE is needed to build compiler-rt, which is needed by musl.) + # The extra indented lines are to build compiler-rt for Wasm, you may remove all of them to skip it. + cmake -G Ninja \ + "-DCMAKE_INSTALL_PREFIX=$LW_INSTALL/llvm" \ + "-B$LW_BUILD/llvm" \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_TARGETS_TO_BUILD="WebAssembly" \ + -DLLVM_ENABLE_PROJECTS="clang;lld" \ + -DLLVM_ENABLE_RUNTIMES="compiler-rt" \ + -DCOMPILER_RT_BAREMETAL_BUILD=Yes \ + -DCOMPILER_RT_BUILD_XRAY=No \ + -DCOMPILER_RT_INCLUDE_TESTS=No \ + -DCOMPILER_RT_HAS_FPIC_FLAG=No \ + -DCOMPILER_RT_ENABLE_IOS=No \ + -DCOMPILER_RT_BUILD_CRT=No \ + -DCOMPILER_RT_BUILD_BUILTINS=No \ + -DCOMPILER_RT_DEFAULT_TARGET_ONLY=Yes \ + -DLLVM_DEFAULT_TARGET_TRIPLE="wasm32-unknown-unknown" \ + -DLLVM_ENABLE_ASSERTIONS=1 \ + -DLLVM_PARALLEL_LINK_JOBS=$LW_JOBS_LLVM_LINK \ + -DLLVM_PARALLEL_COMPILE_JOBS=$LW_JOBS_LLVM_COMPILE \ + "$LW_SRC/llvm/llvm" + + cmake --build "$LW_BUILD/llvm" + cmake --install "$LW_BUILD/llvm" + handled=1;;& + + "build-kernel"|"all-kernel"|"build"|"all"|"build-os") + mkdir -p "$LW_BUILD/kernel" + # Note: LLVM=/blah/ MUST start AND END with a trailing slash, or it will be interpreted as LLVM=1 (which looks for system clang etc.)! + # Unfortunately this means the value cannot be escaped in 'single quotes', which means the path cannot contain spaces... + # Note: kernel docs often show setting CC=clang but don't do this (or you will get system clang due to the above). + LW_KERNEL_MAKE="make" + LW_KERNEL_MAKE+=" O='$LW_BUILD/kernel'" + LW_KERNEL_MAKE+=" ARCH=wasm" + LW_KERNEL_MAKE+=" LLVM=$LW_INSTALL/llvm/bin/" + LW_KERNEL_MAKE+=" CROSS_COMPILE=wasm32-unknown-unknown-" + LW_KERNEL_MAKE+=" HOSTCC=gcc" + ( + cd "$LW_SRC/kernel" + #$LW_KERNEL_MAKE menuconfig + #exit 1 + + $LW_KERNEL_MAKE defconfig + $LW_KERNEL_MAKE -j $LW_JOBS_KERNEL_COMPILE V=1 + $LW_KERNEL_MAKE headers_install + ) + mkdir -p "$LW_INSTALL/kernel/include" + cp -R "$LW_BUILD/kernel/usr/include/." "$LW_INSTALL/kernel/include" + cp "$LW_BUILD/kernel/vmlinux" "$LW_INSTALL/kernel/vmlinux.wasm" + handled=1;;& + + "build-musl"|"all-musl"|"build"|"all"|"build-os") + mkdir -p "$LW_BUILD/musl" + ( + cd "$LW_BUILD/musl" + + # LIBCC is set mostly to something non-empty, which is needed for the build to succeed. + # Note how we build --disable-shared (i.e. disable dynamic linking by musl) but with -fPIC and -shared. + CROSS_COMPILE="$LW_INSTALL/llvm/bin/llvm-" \ + CC="$LW_INSTALL/llvm/bin/clang" \ + CFLAGS="--target=wasm32-unknown-unknown -Xclang -target-feature -Xclang +atomics -Xclang -target-feature -Xclang +bulk-memory -fPIC -Wl,-shared" \ + LIBCC="--rtlib=compiler-rt" \ + "$LW_SRC/musl/configure" --target=wasm --prefix=/ --disable-shared "--srcdir=$LW_SRC/musl" + make -j $LW_JOBS_MUSL_COMPILE + + # NOTE: do not forget destdir or you may ruin the host system!!! + # We set --prefix to / as include/lib dirs are auto picked up by LLVM then (using --sysroot). + mkdir -p "$LW_INSTALL/musl" + DESTDIR="$LW_INSTALL/musl" make install + ) + handled=1;;& + + "build-busybox-kernel-headers"|"all-busybox-kernel-headers"|"build"|"all"|"build-os") + rm -rf "$LW_INSTALL/busybox-kernel-headers" + mkdir -p "$LW_INSTALL/busybox-kernel-headers" + cp -R "$LW_INSTALL/kernel/include/." "$LW_INSTALL/busybox-kernel-headers" + ( + cd "$LW_INSTALL/busybox-kernel-headers" + patch -p1 < "$LW_ROOT/patches/busybox-kernel-headers/busybox-kernel-headers-for-musl.patch" + ) + handled=1;;& + + "build-busybox"|"all-busybox"|"build"|"all"|"build-os") + mkdir -p "$LW_BUILD/busybox" + mkdir -p "$LW_INSTALL/busybox" + cd "$LW_SRC/busybox" + for CMD in "wasm_defconfig" "-j $LW_JOBS_BUSYBOX_COMPILE" "install" + do # make wasm_defconfig, make, make install (CONFIG_PREFIX is set below for install path). + # The path escaping is a bit tricky but this seems to work... somehow... + make "O=$LW_BUILD/busybox" ARCH=wasm "CONFIG_PREFIX=$LW_INSTALL/busybox" \ + "CROSS_COMPILE=$LW_INSTALL/llvm/bin/" "CONFIG_SYSROOT=$LW_INSTALL/musl" \ + CONFIG_EXTRA_CFLAGS="$CFLAGS -isystem '$LW_INSTALL/busybox-kernel-headers' -D__linux__ -fPIC" \ + $CMD + done + handled=1;;& + + "build-initramfs"|"all-initramfs"|"build"|"all"|"build-os") + mkdir -p "$LW_INSTALL/initramfs" + + # First, create the base by copying a template with some device files. + # This base is created by tools/make-initramfs-base.sh but requires root to run. + cp "$LW_ROOT/patches/initramfs/initramfs-base.cpio" "$LW_INSTALL/initramfs/initramfs.cpio" + + # Then copy BusyBox into it. + ( + cd "$LW_INSTALL/busybox" + # The below command must run in the directory of the archive (i.e. read "find ."). + find . -print0 | cpio --null -ov --format=newc -A -O "$LW_INSTALL/initramfs/initramfs.cpio" + ) + + # And copy a simple init too. + ( + cd "$LW_ROOT/patches/initramfs/" + # The below command must run in the same directory as the root of the files it will copy. + echo "./init" | cpio -ov --format=newc -A -O "$LW_INSTALL/initramfs/initramfs.cpio" + ) + + # Finally we should zip it up so that it takes less space. This is the file to distribute. + rm -f "$LW_INSTALL/initramfs/initramfs.cpio.gz" + gzip "$LW_INSTALL/initramfs/initramfs.cpio" + handled=1;;& + + ""|"help") + echo "Usage: $0 [action]" + echo " where action is one of:" + echo " all -- Fetch and build everything." + echo " fetch -- Fetch everything." + echo " build -- Build everything (no fetching)." + echo " all-xxx -- Fetch and build component xxx." + echo " fetch-xxx -- Fetch component xxx." + echo " build-xxx -- Build component xxx (no fetching)." + echo " build-tools -- Build all build tool components (llvm)." + echo " build-os -- Build all OS software (excluding build tools)." + echo " and components include (in order): llvm, kernel, musl, busybox-kernel-headers, busybox, initramfs." + echo "" + echo "Fetch will download and patch the source. Build will configure, compile and install (to a folder in the workspace)." + echo "" + echo "To clean, simply delete the files in the src, build or install folders. Incremental re-building is possible." + echo "" + echo "The following variables are currently used. They can be overridden using environment variables with the same name." + echo "Paths are commonly automatically made absolute. If a relative path is given, it is evaluated in relation to the CWD." + echo "---------------" + echo "LW_WORKSPACE=$LW_WORKSPACE" + echo "LW_SRC=$LW_SRC" + echo "LW_BUILD=$LW_BUILD" + echo "LW_INSTALL=$LW_INSTALL" + echo "LW_GITFLAGS=$LW_GITFLAGS" + echo "---------------" + exit 1 + handled=1;;& +esac + +if ! [ "$handled" = 1 ]; then + # *) would not work above as ;;& would redirect all cases to *) + echo "Unknown action parameter: $1" + exit 1 +fi diff --git a/runtime/bright.css b/runtime/bright.css new file mode 100644 index 0000000..733f452 --- /dev/null +++ b/runtime/bright.css @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +body { + font-family: sans-serif; + letter-spacing: 0.4px; + color: black; +} + +h1 { + text-decoration: underline; +} + +h1, +h2, +h3, +h4, +h5, +h6 { + letter-spacing: 0.9px; + margin: 0; +} + +article, +#terminal { + max-width: 50em; +} + +p { + margin-top: 0; + margin-bottom: 1em; +} + +kbd { + color: #222222; + background: #dddddd; + border: 1px solid #b4b4b4; + border-radius: 0.1em; + display: inline-block; + white-space: nowrap; + padding: 0.15em; + margin: 0.15em 0; +} + +li { + margin-bottom: 1em; +} + +li li { + margin-bottom: 0.5em; + border: 0 none; +} \ No newline at end of file diff --git a/runtime/index.html b/runtime/index.html new file mode 100644 index 0000000..f456142 --- /dev/null +++ b/runtime/index.html @@ -0,0 +1,263 @@ + + + + + + + + + + +

Linux/Wasm

+
+
+Examples: ls          watch uptime              head /proc/cpuinfo
+          pwd         usleep 1234567            ps | grep kthreadd
+          top         vi file.txt               find /proc -name cmdline -maxdepth 2
+          mount       exec sh                   echo Hello >> world && cat world
+          iostat      strings /bin/busybox      grep "Cpus_allowed_list" < /proc/self/status
+
+

+ The console takes over Ctrl + C etc. Depending on your platform and browser, adding + Shift to the combo may work. Using Ctrl + Insert for copy and Shift + + Insert for paste may also work. Right-clicking and using the context menu should also work. +

+

+ A small Q&A follows. As always, if you are unsure about how some piece of software works, take a look at the + source code! +

+ +

What am I watching?

+

+ The Linux kernel, booting in your browser, powered by WebAssembly (Wasm). + The included programs (shell and standard commands) are provided by BusyBox, backed by a musl libc implementation. + The terminal emulator is provided by Xterm.js. +

+

+ This is a proof-of-concept to get a discussion started, not a stable nor a secure system. Many + workarounds (hacks) are needed to pull this thing off. Maybe this tech demo can steer development of + Wasm, Linux, LLVM and the other components needed onto a path where a Wasm-powered Linux system can be supported + in a production setting, but there is a long road ahead and all platforms need to change in fundamental ways for + that to happen in a convincing way. Not to mention the human aspect - do all stakeholders even want to + support such an odd platform as Wasm, or the niche use cases it currently caters? +

+ +

Known bugs

+

+ Sometimes the whole system will lock up. Reloading the page will reboot it. To debug further, the Web Console + might come in handy (F12 in most browsers). I recommend Chromium-based browsers over Firefox, as the + latter does not work very well when debugging Wasm projects of this size. Just be aware that things run slower + while debugging. I'm still working on the instability issues but wanted to release a first version now that it + boots and runs basic commands! Most crashes I have seen are typically originating from one of these root causes: +

+ + +

How does this work?

+

+ Wasm is similar to every other arch in Linux, but also different. One important difference is that there is no way + to suspend execution of a task. There is a way around this though: Linux supports up to 8k CPUs (or possibly + more...). We can just spin up a new CPU dedicated to each user task (process/thread) and never preempt it. Each + task is backed by a Web Worker, which is in practice backed by a thread in the host OS (through the WebAssembly + implementation). This essentially offloads the actual scheduling of each task in the Linux/Wasm guest to the host + OS scheduler, as the guest kernel has been tricked to have a lot of CPUs that ping-pong between executing a single + user task and their own idle tasks (and some kthreads now and then - as we know they play nice and won't hog the + CPU, they can execute for a brief moment on any CPU in the guest system). +

+

+ No graceful preemption also means that interrupts or signals don't work fully. There is some support for + interrupts on a dedicated CPU. It is used to deliver timing interrupts and IPIs that control advanced scheduling. + Signal handlers only work if the user process plays nice: if all threads never do any syscalls (i.e. hog the CPU), + the signal can never be delivered. Thankfully, most programs play nice, and those that don't should be easy to fix + one way or another (e.g. spawn a thread that sits idle and receives signals, and cooperates with the main thread). +

+ +

What are the limitations?

+

+ As mentioned above, no interruptions of tasks are possible. No MMU, every process and the kernel lives in the same + address space. Wasm is a more or less a strict Harward architecture, where code can be loaded but not modified at + runtime. JIT compilation could in theory still work, you would just need to compile the code before launching it, + but no runtime patching would be allowed (for example, the jump label kernel feature would not work well). +

+

+ Wasm is an evolving specification and new extensions are continuously being added. While there are some quite + limiting aspects of the standard today, things improve all the time. Some of the hacks employed to make this demo + work today may be unnecessary in the Wasm version of tomorrow. +

+ +

Is this optimized?

+

+ No. There has more or less been no optimization of the current build. In fact, de-optimizations have been applied + to enable debugging. There are many optimizations waiting to be done which could make the whole thing boot and + run even faster. Perhaps the largest performance saver could, however, be to boot once and then only download a + (compressed) and pre-booted image to end users. As Wasm is completely sandboxed and not dependent on any hardware + at boot, such a "hibernated" or "snapshot" image would be able to launch instantly. +

+

+ Booting each of the secondary CPUs is also done in serial order right now, which takes a lot of time and could + probably be done in parallel. I have not profiled the code but I suspect the reason it takes a long time is the + maintenance on the JavaScript side, because the code that runs in Wasm when booting a CPU is rather slim to begin + with. +

+

+ The current host implementation handles a lot of things with postMessage() between workers and the main thread. + This seems to add quite some overhead. Perhaps it would be possible to speed this up by using Atomics.waitAsync() + from the main thread on the SharedArrayBuffer instead, and also queue up requests to avoid the slow path of + calling between Wasm and JS all the time. Workers could also talk to each other directly via the SharedArrayBuffer + in this scheme. As Shared Workers mature (currently debugging support is a bit weak and Wasm Modules and Memories + cannot be passed to them), a few calls could be parallelized. Before that, perhaps a normal Worker could do part + of what is currently done on the main thread, with or without postMessage() semantics. +

+ +

How does this differ from previous attempts?

+

+ Linux in the browser has been done a few times before, either by slow emulation of other architectures in Wasm or + even pure JavaScript, or by running Linux as a library (LKL aka. um). Such attempts have inspired this more direct + direct approach. The goal is to expose the syscalls that the Linux kernel provides. This should allow porting of + many more programs than possible with WASI or the current generation of Emscripten. Note that a program does not + necessarily have to run as a process inside Linux either, you could have just one (or a few) frontend threads that + you use for syscalls, possibly via some kind of message passing. This way, your program does not have to live + inside the memory space shared by the kernel - it can be completely sandboxed. The limitation of such an approach + is that you would not be able share memory, e.g. mmap()ing shared areas between programs would not work. +

+ +

I want to hack away at this, how do I get started?

+

+ Check out the wasm-linux repo. It contains a script to build everything (LLVM, the Linux kernel, BusyBox, Musl, + and some other glue) into a workspace folder. The script is kept simple to get anyone started, but not required in + any way. You may also, optionally, use Docker to build things into a sandbox. +

+ +

What's next?

+

+ Getting some kind of graphics working could be fun. One could try to implement EGL with WebGL as backend, exposing + an OpenGL ES interface. Emscripten seems to already have a good portion of this work implemented. +

+

+ Another area worth exploring is Dwarf support, to be able to debug line-by-line in the C code. This should be + fairly easy to add, and most browsers support it, but I didn't bother as I wanted to learn the Wasm instruction + set. What could teach you better than following along the assembly listing and cross-referencing each instruction + to a C statement (possibly optimized and inlined - you even get learn the compiler's Wasm-specific tricks)? +

+

+ I have not tried C++ but I think that it may require some special attention. Just like setjmp/longjmp, exceptions + need to be handled in a graceful way. Wasm has native support for this but it may need some tweaking to work. And + then there is libcxx and who knows what crazy situations that beast may put you into. +

+

+ Looking further than the Web as a platform, Wasm also shows promise in other applications that need multi-platform + sandboxing. Examples include smart contracts, multi-platform apps, GPUs, agentic AI, and your next hype. +

+ +

Wasm wish list

+ + +

+ There are proposals for Stack Switching and Memory Control that could enable a better Linux experience on Wasm. + They are not quite there yet, some tweaks are needed to make them compatible with the Linux use case, but with the + right motivation we can get there. True hibernation of execution state could also be quite interesting (boot once + and re-use a booted system). This is already possible via emulation, similar to how setjmp/longjmp is implemented + today, but would be more elegant and performant if supported natively by the browser. +

+

+ I opted to not support support double-return as in fork/vfork (and setjmp/longjmp), even if LLVM supports it with + some runtime help. The reason is that I feel like it's not ready yet and I don't need it enough. Emscripten has + proven that it is possible, even if today's approaches are rather clumsy and slow. The Stack Switching proposal + hopefully fixes the problems of today's approaches and it's enough for me to know that a proper solution is in the + works. While this is all great for legacy code, using these constructs always seemed a bit problematic to me. How + you can write code without setjmp/longjmp should be quite obvious - but how about fork/vfork? The answer is clone! + The clone syscall is mostly known for its use with pthreads, where the flag CLONE_VM and its friends are used. + But, you can achieve both fork-like and vfork-like functionality by supplying different flags to clone (the Wasm + port of BusyBox for example swaps vfork() for a clone() with CLONE_VFORK specified). The best part of using clone + to do vforks is that you can supply a separate stack for the child function! This makes clone-based vforks much + safer and capable than their traditional plain vfork counterparts (e.g., you're allowed to call functions with + clone-based vforks, unlike in traditional vforks where the double-return on the same stack forbids this). +

+
+ \ No newline at end of file diff --git a/runtime/linux-worker.js b/runtime/linux-worker.js new file mode 100644 index 0000000..fe3de89 --- /dev/null +++ b/runtime/linux-worker.js @@ -0,0 +1,512 @@ +// SPDX-License-Identifier: GPL-2.0-only + +(function (console) { + let port = self; + let memory = null; // Note: memory.buffer has to be re-accessed after growing the memory! + let locks = null; + const text_decoder = new TextDecoder("utf-8"); + const text_encoder = new TextEncoder(); + + /// A string denoting the runner name (same as Worker name), useful for debugging. + let runner_name = "[Unknown]"; + + /// SAB-backed storage for last process in switch_to (when it returns back from another task). + let switch_to_last_task = null; + + /// The vmlinux instance, to handle boot, idle, kthreads and syscalls etc. + let vmlinux_instance = null; + + /// The user executable (if any) to run when we're not in vmlinux. + let user_executable = null; + let user_executable_params = null; + + /// The user executabe instance, or null. Try using the instance variable in the promise over this one if possible. + let user_executable_instance = null; + let user_executable_imports = null; + + /// Flag that a clone callback should be called instead of _start(). + let should_call_clone_callback = false; + + /// A messenger to synchronize with the main thread, as well as communicate how many bytes were read on the console. + let console_read_messenger = new Int32Array(new SharedArrayBuffer(4)); + + /// An exception type used to abort part of execution (useful for collapsing the call stack of user code). + class Trap extends Error { + constructor(kind) { + super("This exception should be ignored. It is part of Linux/Wasm host glue."); + Error.captureStackTrace && Error.captureStackTrace(this, Trap); + this.name = "Trap"; + this.kind = kind; + } + } + + const log = (message) => { + port.postMessage({ + method: "log", + message: "[Runner " + runner_name + "]: " + message, + }); + }; + + /// Get a JS string object from a (nul-terminated) C-string in a Uint8Array. + const get_cstring = (memory, index) => { + const memory_u8 = new Uint8Array(memory.buffer); + let end; + for (end = index; memory_u8[end]; ++end); // Find terminating nul-character. + return text_decoder.decode(memory_u8.slice(index, end)); + }; + + const lock_notify = (lock, count) => { + Atomics.store(locks._memory, locks[lock], 1); + Atomics.notify(locks._memory, locks[lock], count || 1); + }; + + const lock_wait = (lock) => { + Atomics.wait(locks._memory, locks[lock], 0); + Atomics.store(locks._memory, locks[lock], 0); + }; + + const serialize_me = () => { + // Wait for some other task or CPU to wake us up. + lock_wait("serialize"); + return switch_to_last_task[0]; // last_task was written by the caller just prior to waking. + }; + + /// Callbacks from within Linux/Wasm out to our host code (cpu is not neccessarily ours). + const host_callbacks = { + /// Start secondary CPU. + wasm_start_cpu: (cpu, idle_task, start_stack) => { + // New web workers cannot be spawned from within a Worker in most browsers. It can currently not be spawned from + // within a SharedWorker in any browser. Do it on the main thread instead. + port.postMessage({ method: "start_secondary", cpu: cpu, idle_task: idle_task, start_stack: start_stack }); + }, + + /// Stop secondary CPU (rather abruptly). + wasm_stop_cpu: (cpu) => { + port.postMessage({ method: "stop_secondary", cpu: cpu }); + }, + + /// Creation of tasks on our end. Runs them too. + wasm_create_and_run_task: (prev_task, new_task, name, bin_start, bin_end, data_start, table_start) => { + // Tell main to create the new task, and then run it for the first time! + port.postMessage({ + method: "create_and_run_task", + prev_task: prev_task, + new_task: new_task, + name: get_cstring(memory, name), + + // For user tasks, there is user code to load first before trying to run it. + user_executable: bin_start ? { + bin_start: bin_start, + bin_end: bin_end, + data_start: data_start, + table_start: table_start, + } : null, + }); + + // Serialize this (old) task. + return serialize_me(); + }, + + /// Remove a task created by wasm_create_and_run_task(). + wasm_release_task: (dead_task) => { + port.postMessage({ + method: "release_task", + dead_task: dead_task, + }); + }, + + /// Serialization of tasks (idle tasks and before SMP is started). + wasm_serialize_tasks: (prev_task, next_task) => { + // Notify the next task that it can run again. + port.postMessage({ + method: "serialize_tasks", + prev_task: prev_task, + next_task: next_task, + }); + + // Serialize this (old) task. + return serialize_me(); + }, + + /// Kernel panic. We can't proceed. + wasm_panic: (msg) => { + const message = "Kernel panic: " + get_cstring(memory, msg); + console.error(message); + log(message); + + // This will stop execution of the current task. + throw new Trap("panic"); + }, + + /// Dump a stack trace into a text buffer. (The exact format is implementation-defined and varies by browser.) + wasm_dump_stacktrace: (stack_trace, max_size) => { + try { + throw new Error(); + } catch (error) { + const memory_u8 = new Uint8Array(memory.buffer); + const encoded = text_encoder.encode(error.stack).slice(0, max_size - 1); + memory_u8.set(encoded, stack_trace); + memory_u8[stack_trace + encoded.length] = 0; + } + }, + + /// Replace the currently executing image (kthread spawning init, or user process) with a new user process image. + wasm_load_executable: (bin_start, bin_end, data_start, table_start) => { + user_executable = WebAssembly.compile(new Uint8Array(memory.buffer).slice(bin_start, bin_end)); + user_executable_params = { + data_start: data_start, + table_start: table_start, + }; + + // We release our reference already, just to be sure. The promise chain will still have a reference until the + // kernel exits back to userland, which will termintate the user executable with a Trap. + user_executable_instance = null; + user_executable_imports = null; + }, + + /// Handle user mode return (e.g. from syscall) that should not proceed normally. (Not called on normal returns.) + wasm_user_mode_tail: (flow) => { + if (flow == -1) { + // Exec has been called and we should not return from the syscall. Trap() to collapse the call stack of the user + // executable. When swallowed, run the new user executable that was already preloaded by wasm_load_executable(). + // This takes precedence of signal handlers or signal return - no reason to run any old user code! + throw new Trap("reload_program"); + } else if (flow >= 1 && flow <= 3) { + // First, handle any signal (possibly stacked). Then, handle any signal return (happens after stacked signals). + // If exec() happens, we will slip out in the catch-else clause, ensuring the sigreturn does not proceed. + if (flow & 1) { + try { + if (user_executable_instance.exports.__libc_handle_signal) { + // Setup signal frame... + user_executable_imports.env.__stack_pointer.value = vmlinux_instance.exports.get_user_stack_pointer(); + user_executable_instance.exports.__set_tls_base(vmlinux_instance.exports.get_user_tls_base()); + + user_executable_instance.exports.__libc_handle_signal(); + throw new Error("Wasm function __libc_handle_signal() returned (it should never return)!"); + } else { + throw new Error("Wasm function __libc_handle_signal() not defined!"); + } + } catch (error) { + if (error instanceof Trap && error.kind == "signal_return") { + // ...restore signal frame. + user_executable_imports.env.__stack_pointer.value = vmlinux_instance.exports.get_user_stack_pointer(); + user_executable_instance.exports.__set_tls_base(vmlinux_instance.exports.get_user_tls_base()); + } else { + // Either a genuine error, or a Trap() from exec() (signal handlers are allowed to call exec()). + throw error; + } + } + } + + if (flow & 2) { + throw new Trap("signal_return"); + } + } else { + throw new Error("wasm_syscall_tail called with unknown kind"); + } + }, + + // After this line follows host callbacks used by various drivers. In the future, we may make drivers more + // modularized and allow them to allocate certain resources, like host callbacks, IRQ numbers, even syscalls... + + // Host callbacks by the Wasm-default clocksource. + + wasm_cpu_clock_get_monotonic: () => { + // Convert this double in ms to u64 in us. + // Modern browsers can on good days reach 5us accuracy, given that the platform supports it. + return BigInt(Math.round(1000 * (performance.timeOrigin + performance.now()))) * 1000n; + }, + + // Host callbacks used by the Wasm-default console driver. + + wasm_driver_hvc_put: (buffer, count) => { + const memory_u8 = new Uint8Array(memory.buffer); + + port.postMessage({ + method: "console_write", + message: text_decoder.decode(memory_u8.slice(buffer, buffer + count)), + }); + + return count; + }, + + wasm_driver_hvc_get: (buffer, count) => { + // Reset lock. Using .store() for the memory barrier. + Atomics.store(console_read_messenger, 0, -1); + + // Tell the main thread to write any input into memory, up to count bytes. + port.postMessage({ + method: "console_read", + buffer: buffer, + count: count, + console_read_messenger: console_read_messenger, + }); + + // Wait for a response from the main thread about how many bytes were actually written, could be 0. + Atomics.wait(console_read_messenger, 0, -1); + let console_read_count = Atomics.load(console_read_messenger, 0); + return console_read_count; + }, + }; + + /// Callbacks from the main thread. + const message_callbacks = { + init: (message) => { + runner_name = message.runner_name; + memory = message.memory; + locks = message.locks; + switch_to_last_task = message.last_task; // Only defined for tasks and CPU 0 (init task). + + if (message.user_executable) { + // We are in a new runner that should duplicate the user executable. Happens when someone calls clone(). + host_callbacks.wasm_load_executable( + message.user_executable.bin_start, + message.user_executable.bin_end, + message.user_executable.data_start, + message.user_executable.table_start); + } + + let import_object = { + env: { + ...host_callbacks, + memory: message.memory, + }, + }; + + // We have to fixup unimplemented syscalls as they are declared but not defined by vmlinux (to avoid the + // ni_syscall soup with unimplemented syscalls, which fails on Wasm due to a variable amount of arguments). Since + // these syscalls should not really be called anyway, we can have a slow js stub deal with them, and it can handle + // variable arguments gracefully! + const ni_syscall = () => { return -38 /* aka. -ENOSYS */; }; + for (const imported of WebAssembly.Module.imports(message.vmlinux)) { + if (imported.name.startsWith("sys_") && imported.module == "env" + && imported.kind == "function") { + import_object.env[imported.name] = ni_syscall; + } + } + + // This is a global error handler that is used when calling Wasm code. + const wasm_error = (error) => { + log("Wasm crash: " + error.toString()); + console.error(error); + + if (vmlinux_instance) { + vmlinux_instance.exports.raise_exception(); + throw new Error("raise_exception() returned"); + } else { + // Only log stack if vmlinux is not up already - it will dump stacks itself. + log(error.stack); + throw error; + } + }; + + const vmlinux_setup = () => { + // Instantiate a vmlinux Wasm Module. This will implicitly run __wasm_init_memory, which will effectively: + // * Copy all passive data segments into their (static) position. + // * Clear BSS (in its static position). + // * Drop all passive data segments. + // An in-memory atomic flag ensures this only happens the first time vmlinux is instantiated on the main memory. + return WebAssembly.instantiate(message.vmlinux, import_object).then((instance) => { + vmlinux_instance = instance; + }); + }; + + const vmlinux_run = () => { + if (message.runner_type == "primary_cpu") { + // Notify the main thread about init task so that it knows where it resides in memory. + port.postMessage({ + method: "start_primary", + init_task: vmlinux_instance.exports.init_task.value, + }); + + // Setup the boot command line. We have the luxury to be able to write to it directly. The maximum length is + // not set here but is set by COMMAND_LINE_SIZE (defaults to 512 bytes). + const cmdline = message.boot_cmdline + "\0"; + const cmdline_buffer = vmlinux_instance.exports.boot_command_line.value; + new Uint8Array(memory.buffer).set(text_encoder.encode(cmdline), cmdline_buffer); + + // Grow the memory to fit initrd and copy it. + // + // All typed arrays and views on memory.buffer become invalid by growing and need to be re-created. grow() + // will return the old size, which becomes our base address for initrd. + const initrd_start = memory.grow(((message.initrd.byteLength + 0xFFFF) / 0x10000) | 0) * 0x10000; + const initrd_end = initrd_start + message.initrd.byteLength; + new Uint8Array(memory.buffer).set(new Uint8Array(message.initrd), initrd_start); + new DataView(memory.buffer).setUint32(vmlinux_instance.exports.initrd_start.value, initrd_start, true); + new DataView(memory.buffer).setUint32(vmlinux_instance.exports.initrd_end.value, initrd_end, true); + + // This will boot the maching on the primary CPU. Later on, it will boot secondaries... + // + // _start sets up the Wasm global __stack_pointer to init_stack and calls start_kernel(). Note that this will + // grow the memory and thus all views on memory.buffer become invalid. + vmlinux_instance.exports._start(); + + // _start() will never return, unless it fails to allocate all memoy it wants to. + throw new Error("_start did not even succeed in allocating 16 pages of RAM, aborting..."); + } else if (message.runner_type == "secondary_cpu") { + // start_secondary() will never return. It can be killed by terminate() on this Worker. + vmlinux_instance.exports._start_secondary(message.start_stack); + + throw new Error("start_secondary returned"); + } else if (message.runner_type == "task") { + // A fresh task, possibly serialized on CPU 0 before secondaries are brought up. + should_call_clone_callback = vmlinux_instance.exports.ret_from_fork(message.prev_task, message.new_task); + + // Two cases exist when we reach here: + // 1. The kthread that spawned init retuned. + // The code will already have been loaded, just execute it. + // + // 2. Someone called clone. + // We should call the clone callback on the user executable, which has already been loaded. + // + // Notably, we don't end up here after exec() syscalls. Instead, the user instance is reloaded directly. + return; + } else { + throw new Error("Unknown runner_type: " + message.runner_type); + } + }; + + const user_executable_setup = () => { + const stack_pointer = vmlinux_instance.exports.get_user_stack_pointer(); + const tls_base = vmlinux_instance.exports.get_user_tls_base(); + + user_executable_imports = { + env: { + memory: memory, + __memory_base: new WebAssembly.Global({ value: 'i32', mutable: false }, user_executable_params.data_start), + __stack_pointer: new WebAssembly.Global({ value: 'i32', mutable: true }, stack_pointer), + __indirect_function_table: new WebAssembly.Table({ initial: 4096, element: "anyfunc" }), // TODO: fix this! + __table_base: new WebAssembly.Global({ value: 'i32', mutable: false }, user_executable_params.table_start), + + // To be correct, we should save AND restore these globals between the user instance and vmlinux instance: + // __stack_pointer <-> __user_stack_pointer + // __tls_base <-> __user_tls_base + // The kernel interacts with them in the following ways: + // * Diagnostics (reading them and displaying them in informational messages). + // * ret_from_fork: writes stack and tls. We have to deal with it, but not here, as this is not a syscall! + // * syscall exec: tls should be kept even if the process image is replaced (probably has no real use case). + // * syscall clone: stack and tls should be transfered to the new instance, unless overridden. + // * signal handlers: also not a syscall - vmlinux calls the host, perhaps during syscall return! + // The kernel never modifies neither of them for the task that makes a syscall. + // + // To make syscalls faster (allowing them to not go through a slow JavaScript wrapper), we skip transferring + // them back to the user instance. They always have to be transferred to vmlinux at syscall sites, as a + // signal being handled in its return path would need to save (and restore) them on its signal stack. + __wasm_syscall_0: vmlinux_instance.exports.wasm_syscall_0, + __wasm_syscall_1: vmlinux_instance.exports.wasm_syscall_1, + __wasm_syscall_2: vmlinux_instance.exports.wasm_syscall_2, + __wasm_syscall_3: vmlinux_instance.exports.wasm_syscall_3, + __wasm_syscall_4: vmlinux_instance.exports.wasm_syscall_4, + __wasm_syscall_5: vmlinux_instance.exports.wasm_syscall_5, + __wasm_syscall_6: vmlinux_instance.exports.wasm_syscall_6, + + __wasm_abort: () => { + debugger + throw WebAssembly.RuntimeError('abort'); + }, + }, + }; + + // Instantiate a user Wasm Module. This will implicitly run __wasm_init_memory, which will effectively: + // * Initialize the TLS pointer (to a data_start-relocated static area, for the first thread). + // * Copy all passive data segments into their (data_start-relocated) position. + // * Clear BSS (data_start-relocated). + // * Drop all passive data segments (except the TLS region, which is saved, but unused in the musl case). + // An atomic flag ensures this only happens for the first thread to be started (using instantiate). + // + // The TLS pointer will be initialized in the following way ways: + // * kthread-returns-to-init: __user_tls_base would be 0 as it's zero-initialized on the kthreads switch_stack. + // (We are ignoring it.) __wasm_init_memory() would initialize it to the static area as described above. + // + // * exec: __user_tls_base should have been the value of the process calling exec (during the syscall). However, + // we would want to restore it as part of initializing the runtime, which is exactly what __wasm_init_memory() + // does. This also means that whatever value the task calling exec() supplied for tls is ignored. + // + // * clone: clone explicitly passes its tls pointer to the kernel as part of the syscall. Unless the tls pointer + // has been overridden with CLONE_SETTLS, it will be copied from the old task to the new one. This is mostly + // useful when CLONE_VFORK is used, in which case the new task can borrow the TLS until it calls exec or exit. + let woken = user_executable.then((user_module) => WebAssembly.instantiate(user_module, user_executable_imports)); + + woken = woken.then((instance) => { + instance.exports.__wasm_apply_data_relocs(); + if (should_call_clone_callback) { + // Note: __wasm_init_tls cannot be used as it would also re-initilize the _Thread_local variables' data. But + // on a clone(), it is none of our business to do that. It's up to the libc to do that as part of pthreads. + // Indeed, for example on a clone with CLONE_VFORK, the right thing to do may be to borrow the parent's TLS. + // Unfortunately, LLVM does not export __tls_base directly on dynamic libraries, so we go through a wrapper. + instance.exports.__set_tls_base(tls_base); + } + user_executable_instance = instance; + return instance; + }); + + return woken; + }; + + const user_executable_run = (instance) => { + if (should_call_clone_callback) { + // We have to reset this state, because if the clone callback calls exec, we have to run _start() instead! + should_call_clone_callback = false; + + if (instance.exports.__libc_clone_callback) { + instance.exports.__libc_clone_callback(); + throw new Error("Wasm function __libc_clone_callback() returned (it should never return)!"); + } else { + throw new Error("Wasm function __libc_clone_callback() not defined!"); + } + } else { + if (instance.exports._start) { + // Ideally libc would do this instead of the usual __init_array stuff (e.g. override __libc_start_init in + // musl). However, a reference to __wasm_call_ctors becomes a GOT import in -fPIC code, perhaps rightfully + // so with the current implementation and use case on LLVM. Anyway, we do it here, slightly early on... + if (instance.exports.__wasm_call_ctors) { + instance.exports.__wasm_call_ctors(); + } + + // TLS: somewhat incorrectly contains 0 instead of the TP before exec(). Since we will anyway not care about + // its value (__wasm_apply_data_relocs() called would have overwritten it in this case) it does not matter. + instance.exports._start(); + throw new Error("Wasm function _start() returned (it should never return)!"); + } else { + throw new Error("Wasm function _start() not defined!"); + } + } + }; + + const user_executable_error = (error) => { + if (error instanceof Trap) { + if (error.kind == "reload_program") { + // Someone called exec and the currently executing code should stop. We should run the new user code already + // loaded by wasm_load_executable(). + return user_executable_chain(); + } else if (error.kind == "panic") { + // This has already been handled - just swallow it. This Worker will be done - but kept for later debugging. + } else { + throw new Error("Unexpected Wasm host Trap " + error.kind); + } + } else { + wasm_error(error); + } + }; + + const user_executable_chain = () => { + // user_executable_error() may deal with an exec() trap and recursively call run_chain() again. + return user_executable_setup().then(user_executable_run).catch(user_executable_error); + }; + + // All tasks start in the kernel, some return to userland, where they should never return. If they return, we + // handle this as an error and wait. Our life ends when the kernel kills us by terminating the whole Worker. Oh, + // and exex() can trap us, in which case we have to circle back to loading new user code and executing it agian. + vmlinux_setup().then(vmlinux_run).catch(wasm_error).then(user_executable_chain); + }, + }; + + self.onmessage = (message_event) => { + const data = message_event.data; + message_callbacks[data.method](data); + }; + + self.onmessageerror = (error) => { + throw error; + }; +})(console); diff --git a/runtime/linux.js b/runtime/linux.js new file mode 100644 index 0000000..0e78e3a --- /dev/null +++ b/runtime/linux.js @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/// Create a Linux machine and run it. +const linux = async (worker_url, vmlinux, boot_cmdline, initrd, log, console_write) => { + /// Dict of online CPUs. + const cpus = {}; + + /// Dict of tasks. + const tasks = {}; + + /// Input buffer (from keyboard to tty). + let input_buffer = new ArrayBuffer(0); + + const text_decoder = new TextDecoder("utf-8"); + const text_encoder = new TextEncoder(); + + const lock_notify = (locks, lock, count) => { + Atomics.store(locks._memory, locks[lock], 1); + Atomics.notify(locks._memory, locks[lock], count || 1); + }; + + const lock_wait = (locks, lock) => { + Atomics.wait(locks._memory, locks[lock], 0); + Atomics.store(locks._memory, locks[lock], 0); + }; + + /// Callbacks from Web Workers (each one representing one task). + const message_callbacks = { + start_primary: (message) => { + // CPU 0 has init_task which sits in static storage. After booting it becomes CPU 0's idle task. The runner will + // in this special case tell us where it is so that we can register it. + log("Starting cpu 0 with init_task " + message.init_task) + tasks[message.init_task] = cpus[0]; + }, + + start_secondary: (message) => { + if (message.cpu <= 0) { + throw new Error("Trying to start secondary cpu with ID <= 0"); + } + + log("Starting cpu " + message.cpu + " (" + message.idle_task + ")" + + " with start stack " + message.start_stack); + make_cpu(message.cpu, message.idle_task, message.start_stack); + }, + + stop_secondary: (message) => { + if (message.cpu <= 0) { + // If you arrive here, you probably got panic():ed with a broken stack. + if (!confirm("Trying to stop secondary cpu with ID 0.\n\n" + + "You probably got panic():ed with a broken stack. Continue?\n\n" + + " (Say ok if you know what you are doing and want to catch the panic, otherwise cancel.)")) { + throw new Error("Trying to stop secondary cpu with ID 0"); + } + } + + if (cpus[message.cpu]) { + log("[Main]: Stopping CPU " + message.cpu); + cpus[message.cpu].worker.terminate(); + delete cpus[message.cpu]; + } else { + log("[Main]: Tried to stop CPU " + message.cpu + " but it was already stopped (broken system)!"); + } + }, + + create_and_run_task: (message) => { + // ret_from_fork will make sure the task switch finishes. + make_task(message.prev_task, message.new_task, message.name, message.user_executable); + }, + + release_task: (message) => { + // Stop the worker, which will stop script execution. This is safe as the task should be hanging on a lock waiting + // to be scheduled - which never happens as dead tasks don't get ever get scheduled. + tasks[message.dead_task].worker.terminate(); + + delete tasks[message.dead_task]; + }, + + serialize_tasks: (message) => { + // next_task was previously suspended, wake it up. + + // Tell the next task where we switched from, so that it can finish the task switch. + tasks[message.next_task].last_task[0] = message.prev_task; + + // Release the above write of last_task and wake up the task. + lock_notify(tasks[message.next_task].locks, "serialize"); + }, + + console_read: (message, worker) => { + const memory_u8 = new Uint8Array(memory.buffer); + const buffer = new Uint8Array(input_buffer); + + const used = buffer.slice(0, message.count); + memory_u8.set(used, message.buffer); + + const unused = buffer.slice(message.count); + input_buffer = unused.buffer; + + // Tell the Worker that asked for input how many bytes (perhaps 0) were actually written. + Atomics.store(message.console_read_messenger, 0, used.length); + Atomics.notify(message.console_read_messenger, 0, 1); + }, + + console_write: (message) => { + console_write(message.message); + }, + + log: (message) => { + log(message.message); + }, + }; + + /// Memory shared between all CPUs. + const memory = new WebAssembly.Memory({ + initial: 30, // TODO: extract this automatically from vmlinux. + maximum: 0x10000, // Allow the full 32-bit address space to be allocated. + shared: true, + }); + + /** + * Create and run one CPU in a background thread (a Web Worker). + * + * This will run boot code for the CPU, and then drop to run the idle task. For CPU 0 this involves booting the entire + * system, including bringing up secondary CPUs at the end, while for secondary CPUs, this just means some + * book-keeping before dropping into their own idle tasks. + */ + const make_cpu = (cpu, idle_task, start_stack) => { + const options = { + runner_type: (cpu == 0) ? "primary_cpu" : "secondary_cpu", + start_stack: start_stack, // undefined for CPU 0 + }; + + if (cpu == 0) { + options.boot_cmdline = boot_cmdline; + options.initrd = initrd; + initrd = null; // allow gc + } + + // idle_task is undefined for cpu 0, we will know it first when start_primary notifies us. + const name = "CPU " + cpu + " [boot+idle]" + (cpu != 0 ? " (" + idle_task + ")" : ""); + + const runner = make_vmlinux_runner(name, options); + cpus[cpu] = runner; + if (cpu != 0) { + tasks[idle_task] = runner; // For CPU 0, start_primary does this registration for us. + } + }; + + /** + * Create and run one task. This task has been switch_to():ed by the scheduler for the first time. + * + * In the beginning, all tasks are serialized and have to cooperate to schedule eachother, but after secondary CPUs + * are brought up, they can run concurrently (and will effectively be managed by the Wasm host OS). While we are not + * able to suspend them from JS, the host OS will do that. + */ + const make_task = (prev_task, new_task, name, user_executable) => { + const options = { + runner_type: "task", + prev_task: prev_task, + new_task: new_task, + user_executable: user_executable, + }; + tasks[new_task] = make_vmlinux_runner(name + " (" + new_task + ")", options); + }; + + /// Create a runner for vmlinux. It will run in a Web Worker and execute some specified code. + const make_vmlinux_runner = (name, options) => { + // Note: SharedWorker does not seem to allow WebAssembly Module or Memory instances posted. + const worker = new Worker(worker_url, { name: name }); + + let locks = { + serialize: 0, + }; + locks._memory = new Int32Array(new SharedArrayBuffer(Object.keys(locks).length * 4)); + + // Store for last task when wasm_serialize() returns in switch_to(). Needed for each task, both normal ones and each + // CPUs idle tasks (first called init_task (PID 0), not to be confused with init (PID 1) which is a normal task). + const last_task = new Uint32Array(new SharedArrayBuffer(4)); + + worker.onerror = (error) => { + throw error; + }; + + worker.onmessage = (message_event) => { + const data = message_event.data; + message_callbacks[data.method](data, worker); + }; + + worker.onmessageerror = (error) => { + throw error; + }; + + worker.postMessage({ + ...options, + method: "init", + vmlinux: vmlinux, + memory: memory, + locks: locks, + last_task: last_task, + runner_name: name, + }); + + return { + worker: worker, + locks: locks, + last_task: last_task, + }; + }; + + // Create the primary cpu, it will later on callback to us and we start secondaries. + make_cpu(0); + + return { + key_input: (data) => { + const key_buffer = text_encoder.encode(data); // Possibly UTF-8 (up to 16 bits). + + // Append key_buffer to the end of input_buffer. + const old_size = input_buffer.byteLength; + input_buffer = input_buffer.transfer(old_size + key_buffer.byteLength); + (new Uint8Array(input_buffer)).set(key_buffer, old_size); + } + }; +}; diff --git a/runtime/server.py b/runtime/server.py new file mode 100755 index 0000000..ab66e40 --- /dev/null +++ b/runtime/server.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 + +# This is just a simple web server intended for development purposes on the local machine. +# +# Usage: +# Place vmlinux.wasm and initramfs.cpio.gz into this directory. +# Run this script from this directory: python3 server.py +# Navigate to: http://127.0.0.1:8000/ +# +# As of 2025, Chromium and Edge (same thing really) have the best debugging capabilities for Wasm. Firefox is +# unfortunately lagging behind a bit. Keep in mind that these tools were not really built to debug an entire operating +# system and can be quite demanding on system resources. Things will hopefully improve as they get used by more people. + +from http.server import HTTPServer, SimpleHTTPRequestHandler, test +import sys + +class Server(SimpleHTTPRequestHandler): + def end_headers(self): + self.send_header('Cross-Origin-Opener-Policy', 'same-origin') + self.send_header('Cross-Origin-Embedder-Policy', 'require-corp') + self.send_header('Cache-Control:', 'no-store') + SimpleHTTPRequestHandler.end_headers(self) + +if __name__ == '__main__': + test(Server, HTTPServer, port=int(sys.argv[1]) if len(sys.argv) > 1 else 8000) diff --git a/tools/make-initramfs-base.sh b/tools/make-initramfs-base.sh new file mode 100755 index 0000000..ff0e595 --- /dev/null +++ b/tools/make-initramfs-base.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# This script creates an initial cpio file suitable to use as a base for initramfs cpio archives. +# The reason to split it up is because mknod requires the user to be root (see sudo below). + +set -e + +cd "$(dirname "$0")/../patches/initramfs" + +rm -rf initramfs/ + +mkdir -p initramfs/{bin,dev,etc,home,mnt,proc,sys,usr} +sudo mknod initramfs/dev/console c 5 1 +( + cd initramfs/ + find . -print0 | cpio --null -ov --format=newc > ../initramfs-base.cpio +) + +rm -rf initramfs/