linux-wasm/patches/kernel/0005-Add-Wasm-architecture.patch

From efdef05f887b3ea571b329f0b2a52d062635fe13 Mon Sep 17 00:00:00 2001
From: Joel Severin <joel.severin@icemanor.se>
Date: Sun, 14 Sep 2025 17:09:39 +0200
Subject: [PATCH] Add Wasm architecture

This is the bare minimum arch-specific code needed to get Linux to boot on
Wasm (WebAssembly).
---
 Makefile                                |   9 +-
 arch/wasm/Kbuild                        |   1 +
 arch/wasm/Kconfig                       |  78 ++++++
 arch/wasm/Kconfig.debug                 |  10 +
 arch/wasm/Makefile                      |  24 ++
 arch/wasm/include/asm/Kbuild            |  58 ++++
 arch/wasm/include/asm/barrier.h         |  16 ++
 arch/wasm/include/asm/cache.h           |  12 +
 arch/wasm/include/asm/cmpxchg.h         | 111 ++++++++
 arch/wasm/include/asm/cpuflags.h        |  22 ++
 arch/wasm/include/asm/current.h         |  36 +++
 arch/wasm/include/asm/delay.h           |  25 ++
 arch/wasm/include/asm/elf.h             |  66 +++++
 arch/wasm/include/asm/entry-common.h    |  16 ++
 arch/wasm/include/asm/futex.h           |  68 +++++
 arch/wasm/include/asm/irq.h             |  11 +
 arch/wasm/include/asm/irq_work.h        |  13 +
 arch/wasm/include/asm/irqflags.h        |  14 +
 arch/wasm/include/asm/linkage.h         |  22 ++
 arch/wasm/include/asm/mmu_context.h     |   8 +
 arch/wasm/include/asm/panic.h           |  15 ++
 arch/wasm/include/asm/pgtable.h         |  45 ++++
 arch/wasm/include/asm/processor.h       |  50 ++++
 arch/wasm/include/asm/ptrace.h          |  35 +++
 arch/wasm/include/asm/smp.h             |  28 ++
 arch/wasm/include/asm/stacktrace.h      |  20 ++
 arch/wasm/include/asm/syscall.h         |  62 +++++
 arch/wasm/include/asm/thread_info.h     | 105 ++++++++
 arch/wasm/include/asm/time.h            |   9 +
 arch/wasm/include/asm/vmalloc.h         |   6 +
 arch/wasm/include/asm/wasm.h            |  29 ++
 arch/wasm/include/uapi/asm/Kbuild       |   2 +
 arch/wasm/include/uapi/asm/byteorder.h  |   8 +
 arch/wasm/include/uapi/asm/ptrace.h     |  39 +++
 arch/wasm/include/uapi/asm/sigcontext.h |  13 +
 arch/wasm/include/uapi/asm/unistd.h     |   6 +
 arch/wasm/kernel/Makefile               |  21 ++
 arch/wasm/kernel/asm-offsets.c          |  36 +++
 arch/wasm/kernel/cpu.c                  |  46 ++++
 arch/wasm/kernel/cpuflags.c             |   5 +
 arch/wasm/kernel/entry.S                | 299 ++++++++++++++++++++
 arch/wasm/kernel/head.S                 | 110 ++++++++
 arch/wasm/kernel/irq.c                  |  55 ++++
 arch/wasm/kernel/irqflags.c             |  21 ++
 arch/wasm/kernel/process.c              | 282 +++++++++++++++++++
 arch/wasm/kernel/ptrace.c               |  13 +
 arch/wasm/kernel/reboot.c               |  31 +++
 arch/wasm/kernel/setup.c                |  84 ++++++
 arch/wasm/kernel/signal.c               | 189 +++++++++++++
 arch/wasm/kernel/smp.c                  | 344 ++++++++++++++++++++++++
 arch/wasm/kernel/stack.c                |  26 ++
 arch/wasm/kernel/sys_wasm.c             |  19 ++
 arch/wasm/kernel/syscall_table.c        |  37 +++
 arch/wasm/kernel/time.c                 |  88 ++++++
 arch/wasm/kernel/traps.c                | 207 ++++++++++++++
 arch/wasm/kernel/vmlinux.lds.S          |  65 +++++
 arch/wasm/lib/Makefile                  |   3 +
 arch/wasm/lib/delay.c                   |  19 ++
 arch/wasm/mm/Makefile                   |   3 +
 arch/wasm/mm/init.c                     |  21 ++
 include/asm-generic/vmlinux.lds.h       |   4 +
 include/uapi/linux/audit.h              |   1 +
 include/uapi/linux/elf-em.h             |   1 +
 scripts/Makefile.clang                  |   1 +
 scripts/Makefile.vmlinux_o              |  10 +-
 scripts/link-vmlinux.sh                 |  23 +-
 66 files changed, 3151 insertions(+), 5 deletions(-)
 create mode 100644 arch/wasm/Kbuild
 create mode 100644 arch/wasm/Kconfig
 create mode 100644 arch/wasm/Kconfig.debug
 create mode 100644 arch/wasm/Makefile
 create mode 100644 arch/wasm/include/asm/Kbuild
 create mode 100644 arch/wasm/include/asm/barrier.h
 create mode 100644 arch/wasm/include/asm/cache.h
 create mode 100644 arch/wasm/include/asm/cmpxchg.h
 create mode 100644 arch/wasm/include/asm/cpuflags.h
 create mode 100644 arch/wasm/include/asm/current.h
 create mode 100644 arch/wasm/include/asm/delay.h
 create mode 100644 arch/wasm/include/asm/elf.h
 create mode 100644 arch/wasm/include/asm/entry-common.h
 create mode 100644 arch/wasm/include/asm/futex.h
 create mode 100644 arch/wasm/include/asm/irq.h
 create mode 100644 arch/wasm/include/asm/irq_work.h
 create mode 100644 arch/wasm/include/asm/irqflags.h
 create mode 100644 arch/wasm/include/asm/linkage.h
 create mode 100644 arch/wasm/include/asm/mmu_context.h
 create mode 100644 arch/wasm/include/asm/panic.h
 create mode 100644 arch/wasm/include/asm/pgtable.h
 create mode 100644 arch/wasm/include/asm/processor.h
 create mode 100644 arch/wasm/include/asm/ptrace.h
 create mode 100644 arch/wasm/include/asm/smp.h
 create mode 100644 arch/wasm/include/asm/stacktrace.h
 create mode 100644 arch/wasm/include/asm/syscall.h
 create mode 100644 arch/wasm/include/asm/thread_info.h
 create mode 100644 arch/wasm/include/asm/time.h
 create mode 100644 arch/wasm/include/asm/vmalloc.h
 create mode 100644 arch/wasm/include/asm/wasm.h
 create mode 100644 arch/wasm/include/uapi/asm/Kbuild
 create mode 100644 arch/wasm/include/uapi/asm/byteorder.h
 create mode 100644 arch/wasm/include/uapi/asm/ptrace.h
 create mode 100644 arch/wasm/include/uapi/asm/sigcontext.h
 create mode 100644 arch/wasm/include/uapi/asm/unistd.h
 create mode 100644 arch/wasm/kernel/Makefile
 create mode 100644 arch/wasm/kernel/asm-offsets.c
 create mode 100644 arch/wasm/kernel/cpu.c
 create mode 100644 arch/wasm/kernel/cpuflags.c
 create mode 100644 arch/wasm/kernel/entry.S
 create mode 100644 arch/wasm/kernel/head.S
 create mode 100644 arch/wasm/kernel/irq.c
 create mode 100644 arch/wasm/kernel/irqflags.c
 create mode 100644 arch/wasm/kernel/process.c
 create mode 100644 arch/wasm/kernel/ptrace.c
 create mode 100644 arch/wasm/kernel/reboot.c
 create mode 100644 arch/wasm/kernel/setup.c
 create mode 100644 arch/wasm/kernel/signal.c
 create mode 100644 arch/wasm/kernel/smp.c
 create mode 100644 arch/wasm/kernel/stack.c
 create mode 100644 arch/wasm/kernel/sys_wasm.c
 create mode 100644 arch/wasm/kernel/syscall_table.c
 create mode 100644 arch/wasm/kernel/time.c
 create mode 100644 arch/wasm/kernel/traps.c
 create mode 100644 arch/wasm/kernel/vmlinux.lds.S
 create mode 100644 arch/wasm/lib/Makefile
 create mode 100644 arch/wasm/lib/delay.c
 create mode 100644 arch/wasm/mm/Makefile
 create mode 100644 arch/wasm/mm/init.c

diff --git a/Makefile b/Makefile
index 34ea74d74..c69000c85 100644
--- a/Makefile
+++ b/Makefile
@@ -479,7 +479,11 @@ KBUILD_HOSTLDLIBS   := $(HOST_LFS_LIBS) $(HOSTLDLIBS)
 CPP		= $(CC) -E
 ifneq ($(LLVM),)
 CC		= $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
-LD		= $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX)
+ifneq ($(ARCH),wasm)
+	LD	= $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX)
+else
+	LD	= $(LLVM_PREFIX)wasm-ld$(LLVM_SUFFIX)
+endif
 AR		= $(LLVM_PREFIX)llvm-ar$(LLVM_SUFFIX)
 NM		= $(LLVM_PREFIX)llvm-nm$(LLVM_SUFFIX)
 OBJCOPY		= $(LLVM_PREFIX)llvm-objcopy$(LLVM_SUFFIX)
@@ -1100,8 +1104,11 @@ KBUILD_AFLAGS   += $(KAFLAGS)
 KBUILD_CFLAGS   += $(KCFLAGS)
 KBUILD_RUSTFLAGS += $(KRUSTFLAGS)

+# Not supported in Wasm binaries yet, PR seems to be in the works (LLVM D107662).
+ifneq ($(ARCH),wasm)
 KBUILD_LDFLAGS_MODULE += --build-id=sha1
 LDFLAGS_vmlinux += --build-id=sha1
+endif

 KBUILD_LDFLAGS	+= -z noexecstack
 ifeq ($(CONFIG_LD_IS_BFD),y)
diff --git a/arch/wasm/Kbuild b/arch/wasm/Kbuild
new file mode 100644
index 000000000..a4e40e534
--- /dev/null
+++ b/arch/wasm/Kbuild
@@ -0,0 +1 @@
+# SPDX-License-Identifier: GPL-2.0-only
diff --git a/arch/wasm/Kconfig b/arch/wasm/Kconfig
new file mode 100644
index 000000000..f6e566f50
--- /dev/null
+++ b/arch/wasm/Kconfig
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+menu "Wasm-specific options"
+
+# Wasm must run on many CPUs, as a task cannot be preempted, unless terminated.
+# Each CPU becomes a thread in the host OS, and is handled by its scheduler.
+# There is no MMU support in the current version of WebAssembly.
+
+config WASM
+	bool
+	default y
+	# The execution model of one task per cpu mandates the below options.
+	# One CPU is kept clear of tasks to act as a tick broadcast device.
+	select SMP
+	# PREEMPTION and PREEMPT_COUNT is not set, disallowing kernel preemption
+	select ARCH_NO_PREEMPT
+	select GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+	# Needed by NO_HZ_FULL:
+	select HAVE_VIRT_CPU_ACCOUNTING_GEN
+	# TODO: Check that we comply with the user tracking requirements!
+	select HAVE_CONTEXT_TRACKING_USER
+
+	select NO_IP
+	select THREAD_INFO_IN_TASK
+	select ARCH_TASK_STRUCT_ON_STACK
+	select ARCH_TASK_STRUCT_ALLOCATOR
+	select ARCH_THREAD_STACK_ALLOCATOR
+	select GENERIC_SMP_IDLE_THREAD
+	select UACCESS_MEMCPY
+	select ARCH_USE_QUEUED_RWLOCKS
+	select GENERIC_CPU_DEVICES
+	select GENERIC_CSUM
+	select GENERIC_ENTRY
+	select GENERIC_HWEIGHT
+	select GENERIC_IRQ_SHOW
+	select HAVE_SYSCALL_TRACEPOINTS
+	select ARCH_HAVE_PANIC_NOTIFY
+	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_SUPPORTS_LTO_CLANG
+	select ARCH_SUPPORTS_LTO_CLANG_THIN
+
+	# TODO: Very inefficient, replace with native stuff. Our atomic impl.
+	# of xchg and cmpxchg already supports 64-bit integers, we could use it.
+	select GENERIC_ATOMIC64
+
+config SMP
+	bool "Symmetric Multi-Processing"
+	help
+	  This enables support for systems with more than one CPU.  In the
+	  context of Wasm, every task needs one CPU, since there is no
+	  preemption and no interrupts. If you say N here, you will only ever
+	  be able to run one task. Only do this if you really know what
+	  you're doing - there is a big risk you will lock up your system.
+
+	  If you don't know what to do here, say Y.
+
+config HZ
+	int
+	default 100
+
+config NR_CPUS
+	int
+	range 1 8192
+	default 64
+
+config GENERIC_CSUM
+	def_bool y
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config ARCH_HAVE_PANIC_NOTIFY
+	bool
+
+endmenu
+
+source "drivers/Kconfig"
diff --git a/arch/wasm/Kconfig.debug b/arch/wasm/Kconfig.debug
new file mode 100644
index 000000000..8fc81eafa
--- /dev/null
+++ b/arch/wasm/Kconfig.debug
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config EARLY_PRINTK
+	bool "Early printk"
+	default y
+	help
+	  Write kernel log output directly to console.log.
+
+	  This is useful for kernel debugging when your machine crashes very
+	  early before the console code is initialized.
diff --git a/arch/wasm/Makefile b/arch/wasm/Makefile
new file mode 100644
index 000000000..b86103e0b
--- /dev/null
+++ b/arch/wasm/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+KBUILD_DEFCONFIG := wasm_defconfig
+
+KCFLAGS += -EL -m32
+KCFLAGS += -nostdlib -fno-builtin
+
+# These flags are needed so that wasm-ld can be run with --shared-memory.
+KCFLAGS += -Xclang -target-feature -Xclang +atomics
+KCFLAGS += -Xclang -target-feature -Xclang +bulk-memory
+
+core-y += arch/wasm/kernel/
+core-y += arch/wasm/mm/
+libs-y += arch/wasm/lib/
+
+PHONY += bzImage
+
+all: bzImage
+
+bzImage: vmlinux
+
+define archhelp
+  echo  '* bzImage		- Compressed kernel image (arch/wasm/boot/bzImage)'
+endef
diff --git a/arch/wasm/include/asm/Kbuild b/arch/wasm/include/asm/Kbuild
new file mode 100644
index 000000000..876a533cd
--- /dev/null
+++ b/arch/wasm/include/asm/Kbuild
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+# TODO: Clean up headers that are not used by this arch.
+generic-y += access_ok.h
+generic-y += agp.h
+generic-y += asm-offsets.h
+generic-y += asm-prototypes.h
+generic-y += atomic64.h
+generic-y += audit_change_attr.h
+generic-y += audit_dir_write.h
+generic-y += audit_read.h
+generic-y += audit_signal.h
+generic-y += audit_write.h
+generic-y += bitsperlong.h
+generic-y += cmpxchg-local.h
+generic-y += early_ioremap.h
+generic-y += error-injection.h
+generic-y += export.h
+generic-y += extable.h
+generic-y += fixmap.h
+generic-y += flat.h
+generic-y += getorder.h
+generic-y += hugetlb.h
+generic-y += hyperv-tlfs.h
+generic-y += ide_iops.h
+generic-y += int-ll64.h
+generic-y += ioctl.h
+generic-y += iomap.h
+generic-y += kvm_para.h
+generic-y += kvm_types.h
+generic-y += logic_io.h
+generic-y += mcs_spinlock.h
+generic-y += memory_model.h
+generic-y += mm_hooks.h
+generic-y += mmiowb_types.h
+generic-y += mshyperv.h
+generic-y += numa.h
+generic-y += page.h
+generic-y += param.h
+generic-y += parport.h
+generic-y += pci_iomap.h
+generic-y += qrwlock.h
+generic-y += qrwlock_types.h
+generic-y += qspinlock.h
+generic-y += qspinlock_types.h
+generic-y += resource.h
+generic-y += seccomp.h
+generic-y += set_memory.h
+generic-y += signal.h
+generic-y += spinlock.h
+generic-y += spinlock_types.h
+generic-y += statfs.h
+generic-y += string.h
+generic-y += syscalls.h
+generic-y += tlb.h
+generic-y += user.h
+generic-y += vmlinux.lds.h
+generic-y += vtime.h
diff --git a/arch/wasm/include/asm/barrier.h b/arch/wasm/include/asm/barrier.h
new file mode 100644
index 000000000..86d3fc9b2
--- /dev/null
+++ b/arch/wasm/include/asm/barrier.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_BARRIER_H
+#define _ASM_WASM_BARRIER_H
+
+/*
+ * Inspired by:
+ * https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0124r7.html
+ */
+#define mb()	__atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define rmb()	__atomic_thread_fence(__ATOMIC_ACQ_REL)
+#define wmb()	__atomic_thread_fence(__ATOMIC_ACQ_REL)
+
+#include <asm-generic/barrier.h>
+
+#endif /* _ASM_WASM_BARRIER_H */
diff --git a/arch/wasm/include/asm/cache.h b/arch/wasm/include/asm/cache.h
new file mode 100644
index 000000000..1abcb0191
--- /dev/null
+++ b/arch/wasm/include/asm/cache.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_CACHE_H
+#define _ASM_WASM_CACHE_H
+
+/*
+ * Most architectures executing Wasm code has a cacheline size of 64 bytes.
+ */
+#define L1_CACHE_SHIFT		6
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+#endif /* _ASM_WASM_CACHE_H */
diff --git a/arch/wasm/include/asm/cmpxchg.h b/arch/wasm/include/asm/cmpxchg.h
new file mode 100644
index 000000000..a870f2682
--- /dev/null
+++ b/arch/wasm/include/asm/cmpxchg.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_CMPXCHG_H
+#define _ASM_WASM_CMPXCHG_H
+
+#include <linux/types.h>
+#include <linux/irqflags.h>
+
+/*
+ * Inspired by:
+ * https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0124r7.html
+ * https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/tree/include/asm-generic/iso-cmpxchg.h?h=iso-atomic
+ *
+ * TODO: McKenney et. al. above mention that atomic operations that return a
+ * value should be marked with __ATOMIC_RELAXED and wrapped with
+ * smp_mb__before_atomic()/smp_mb__after_atomic() calls. Howells above,
+ * however, just applies __ATOMIC_SEQ_CST. What is the best approach?
+ */
+
+/*
+ * This function doesn't exist, so you'll get a linker error if
+ * something tries to do an invalidly-sized xchg().
+ */
+extern unsigned long long __generic_xchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long long __generic_xchg(
+	unsigned long long val, volatile void *ptr, int size)
+{
+	switch (size) {
+	case 1:
+		return __atomic_exchange_n(
+			(volatile u8 *)ptr, (u8)val, __ATOMIC_SEQ_CST);
+
+	case 2:
+		return __atomic_exchange_n(
+			(volatile u16 *)ptr, (u16)val, __ATOMIC_SEQ_CST);
+
+	case 4:
+		return __atomic_exchange_n(
+			(volatile u32 *)ptr, (u32)val, __ATOMIC_SEQ_CST);
+
+	case 8:
+		return __atomic_exchange_n(
+			(volatile u64 *)ptr, (u64)val, __ATOMIC_SEQ_CST);
+
+	default:
+		return __generic_xchg_called_with_bad_pointer();
+	}
+}
+
+#define arch_xchg(ptr, x) ({							\
+	((__typeof__(*(ptr))) __generic_xchg((unsigned long long)(x), (ptr),	\
+			sizeof(*(ptr))));					\
+})
+
+static __always_inline unsigned long long __generic_cmpxchg(volatile void *ptr,
+	unsigned long long oldVal, unsigned long long newVal, int size)
+{
+	/*
+	 * Unlike this functions' signature, __atomic_compare_exchange_n will
+	 * modify oldVal with the actual value if the compare fails.
+	 */
+	u8 expected8;
+	u16 expected16;
+	u32 expected32;
+	u64 expected64;
+
+	switch (size) {
+	case 1:
+		expected8 = (u8)oldVal;
+		__atomic_compare_exchange_n(
+			(volatile u8 *)ptr, &expected8, (u8)newVal,
+			false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+		return expected8;
+
+	case 2:
+		expected16 = (u16)oldVal;
+		__atomic_compare_exchange_n(
+			(volatile u16 *)ptr, &expected16, (u16)newVal,
+			false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+		return expected16;
+
+	case 4:
+		expected32 = (u32)oldVal;
+		__atomic_compare_exchange_n(
+			(volatile u32 *)ptr, &expected32, (u32)newVal,
+			false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+		return expected32;
+
+	case 8:
+		expected64 = (u64)oldVal;
+		__atomic_compare_exchange_n(
+			(volatile u64 *)ptr, &expected64, (u64)newVal,
+			false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+		return expected64;
+
+	default:
+		return __generic_xchg_called_with_bad_pointer();
+	}
+}
+
+#define arch_cmpxchg(ptr, o, n) ({						\
+	((__typeof__(*(ptr)))__generic_cmpxchg((ptr), (unsigned long long)(o),	\
+			(unsigned long long)(n), sizeof(*(ptr))));			\
+})
+
+#define arch_cmpxchg64		arch_cmpxchg
+#define arch_cmpxchg_local	arch_cmpxchg
+#define arch_cmpxchg64_local	arch_cmpxchg
+
+#endif /* _ASM_WASM_CMPXCHG_H */
diff --git a/arch/wasm/include/asm/cpuflags.h b/arch/wasm/include/asm/cpuflags.h
new file mode 100644
index 000000000..365502f4f
--- /dev/null
+++ b/arch/wasm/include/asm/cpuflags.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_CPUFLAGS_H
+#define _ASM_WASM_CPUFLAGS_H
+
+#include <linux/percpu.h>
+
+/*
+ * CPU flags handled by Wasm. These are used for accounting in many places.
+ * Interrupt: 1 if local interrupts are enabled.
+ * User mode: 1 if we're not in privileged mode.
+ *
+ * The reset value is 0: we boot into privileged mode with interrupts disabled.
+ */
+#define CPUFLAGS_INTERRUPT	0
+#define CPUFLAGS_USER_MODE	1
+
+#define CPUFLAGS_RESET_VALUE	0U
+
+DECLARE_PER_CPU(unsigned long, wasm_cpuflags);
+
+#endif /* _ASM_WASM_CPUFLAGS_H */
diff --git a/arch/wasm/include/asm/current.h b/arch/wasm/include/asm/current.h
new file mode 100644
index 000000000..5f104a966
--- /dev/null
+++ b/arch/wasm/include/asm/current.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_CURRENT_H
+#define _ASM_WASM_CURRENT_H
+
+/*
+ * Questionable but necessary to keep get_current() inline, due to the cyclic
+ * dependency between task_struct and thread_info.
+ */
+#ifndef ASM_OFFSETS_C
+#include <asm/asm-offsets.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+
+struct task_struct;
+
+static inline struct task_struct *get_current(void)
+{
+#ifndef ASM_OFFSETS_C
+	char dummy; /* Something stored in the current kernel stack. */
+	unsigned long thread_page = (unsigned long)&dummy & THREAD_MASK;
+	return (struct task_struct *)(thread_page + THREAD_TASK_STRUCT_OFFSET);
+#else
+	return NULL;
+#endif
+}
+
+#define current (get_current())
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_WASM_CURRENT_H */
diff --git a/arch/wasm/include/asm/delay.h b/arch/wasm/include/asm/delay.h
new file mode 100644
index 000000000..0e3bd9346
--- /dev/null
+++ b/arch/wasm/include/asm/delay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_DELAY_H
+#define _ASM_WASM_DELAY_H
+
+extern void __delay(unsigned long loops);
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+/*
+ * Wasm uses 1 loop = 1 nanosecond. This makes the conversion easy.
+ *
+ * Just like the rest of the kernel, these macros polices you if you try to
+ * delay for too long. You should use a sleep function that calls schedule()
+ * internally if you need longer sleeps than this. In Wasm in particular, usage
+ * of these macros is really discouraged (what are you busy-waiting for?).
+ */
+
+#define udelay(n) (__builtin_constant_p(n) && (n) > 20000 ? \
+			__bad_udelay() : __delay((n) * 1000))
+
+#define ndelay(n) (__builtin_constant_p(n) && (n) > 20000000 ? \
+			__bad_ndelay() : __delay(n))
+
+#endif /* _ASM_WASM_DELAY_H */
diff --git a/arch/wasm/include/asm/elf.h b/arch/wasm/include/asm/elf.h
new file mode 100644
index 000000000..3a02588f9
--- /dev/null
+++ b/arch/wasm/include/asm/elf.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_ELF_H
+#define _ASM_WASM_ELF_H
+
+#include <uapi/linux/elf.h>
+#include <linux/compat.h>
+#include <uapi/asm/elf.h>
+#include <asm/auxvec.h>
+#include <asm/byteorder.h>
+
+#define ELF_ARCH	EM_WASM
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA	ELFDATA2LSB
+
+#define elf_check_arch(x) (((x)->e_machine == ELF_ARCH) && \
+			   ((x)->e_ident[EI_CLASS] == ELF_CLASS))
+
+extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
+#define compat_elf_check_arch	compat_elf_check_arch
+
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE	(PAGE_SIZE)
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE		((TASK_SIZE / 3) * 2)
+
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_COMPAT
+#define STACK_RND_MASK		(test_thread_flag(TIF_32BIT) ? \
+				 0x7ff >> (PAGE_SHIFT - 12) : \
+				 0x3ffff >> (PAGE_SHIFT - 12))
+#else
+#define STACK_RND_MASK		(0x3ffff >> (PAGE_SHIFT - 12))
+#endif
+#endif
+
+/*
+ * Provides information on the availiable set of ISA extensions to userspace,
+ * via a bitmap that coorespends to each single-letter ISA extension.  This is
+ * essentially defunct, but will remain for compatibility with userspace.
+ */
+#define ELF_HWCAP	(elf_hwcap & ((1UL << RISCV_ISA_EXT_BASE) - 1))
+extern unsigned long elf_hwcap;
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+#define ELF_PLATFORM	(NULL)
+
+#define COMPAT_ELF_PLATFORM	(NULL)
+
+#define ELF_CORE_COPY_REGS(dest, regs)			\
+do {							\
+	*(struct user_regs_struct *)&(dest) =		\
+		*(struct user_regs_struct *)regs;	\
+} while (0);
+
+#endif /* _ASM_WASM_ELF_H */
diff --git a/arch/wasm/include/asm/entry-common.h b/arch/wasm/include/asm/entry-common.h
new file mode 100644
index 000000000..20155e98f
--- /dev/null
+++ b/arch/wasm/include/asm/entry-common.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_ENTRY_COMMON_H
+#define _ASM_WASM_ENTRY_COMMON_H
+
+#include <asm/stacktrace.h>
+
+/*
+ * Needed by common/entry.c. Returning -1 signals failure, should it ever run...
+ *
+ * Wasm could in theory support seccomp, but the transformation from non-seccomp
+ * to seccomp mode would require quite a bit of thought to get everything right.
+ */
+#define __secure_computing(...) (-1)
+
+#endif /* _ASM_WASM_ENTRY_COMMON_H */
diff --git a/arch/wasm/include/asm/futex.h b/arch/wasm/include/asm/futex.h
new file mode 100644
index 000000000..05f901e4e
--- /dev/null
+++ b/arch/wasm/include/asm/futex.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_FUTEX_H
+#define _ASM_WASM_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+#define FUTEX_MAX_LOOPS	128
+
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+						u32 oldval, u32 newval)
+{
+	int loops;
+	u32 expected;
+
+	if (!access_ok(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	for (loops = 0; loops < FUTEX_MAX_LOOPS; ++loops) {
+		expected = oldval;
+		if (__atomic_compare_exchange_n((volatile u32 *)uaddr,
+				&expected, newval, false, __ATOMIC_SEQ_CST,
+				__ATOMIC_RELAXED)) {
+			*uval = oldval;
+			return 0;
+		}
+	}
+
+	return -EAGAIN;
+}
+
+static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
+					      u32 __user *uaddr)
+{
+	if (!access_ok(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		*oval = __atomic_exchange_n(
+			(volatile u32 *)uaddr, oparg,  __ATOMIC_SEQ_CST);
+		break;
+	case FUTEX_OP_ADD:
+		*oval = __atomic_fetch_add(
+			(volatile u32 *)uaddr, oparg,  __ATOMIC_SEQ_CST);
+		break;
+	case FUTEX_OP_OR:
+		*oval = __atomic_fetch_or(
+			(volatile u32 *)uaddr, oparg,  __ATOMIC_SEQ_CST);
+		break;
+	case FUTEX_OP_ANDN:
+		*oval = __atomic_fetch_and(
+			(volatile u32 *)uaddr, ~oparg,  __ATOMIC_SEQ_CST);
+		break;
+	case FUTEX_OP_XOR:
+		*oval = __atomic_fetch_xor(
+			(volatile u32 *)uaddr, oparg,  __ATOMIC_SEQ_CST);
+		break;
+	default:
+		return -ENOSYS;
+	}
+
+	return 0;
+}
+
+#endif /* _ASM_WASM_FUTEX_H */
diff --git a/arch/wasm/include/asm/irq.h b/arch/wasm/include/asm/irq.h
new file mode 100644
index 000000000..5069bef1f
--- /dev/null
+++ b/arch/wasm/include/asm/irq.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_IRQ_H
+#define _ASM_WASM_IRQ_H
+
+#define NR_IRQS 32
+
+#define WASM_IRQ_IPI			0
+#define WASM_IRQ_TIMER			1
+
+#endif /* _ASM_WASM_IRQ_H */
diff --git a/arch/wasm/include/asm/irq_work.h b/arch/wasm/include/asm/irq_work.h
new file mode 100644
index 000000000..fa9c40b0d
--- /dev/null
+++ b/arch/wasm/include/asm/irq_work.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_IRQ_WORK_H
+#define _ASM_WASM_IRQ_WORK_H
+
+extern void arch_irq_work_raise(void);
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return true;
+}
+
+#endif /* _ASM_WASM_IRQ_WORK_H */
diff --git a/arch/wasm/include/asm/irqflags.h b/arch/wasm/include/asm/irqflags.h
new file mode 100644
index 000000000..337a882f9
--- /dev/null
+++ b/arch/wasm/include/asm/irqflags.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_IRQFLAGS_H
+#define _ASM_WASM_IRQFLAGS_H
+
+/*
+ * arch_local_save_flags and arch_local_irq_restore are defined as non-static
+ * functions as this header is included from places where percpu-variables and
+ * even definitions for raw_smp_processor_id() cannot be included...
+ */
+
+#include <asm-generic/irqflags.h>
+
+#endif /* _ASM_WASM_IRQFLAGS_H */
diff --git a/arch/wasm/include/asm/linkage.h b/arch/wasm/include/asm/linkage.h
new file mode 100644
index 000000000..49f6776c5
--- /dev/null
+++ b/arch/wasm/include/asm/linkage.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_LINKAGE_H
+#define _ASM_WASM_LINKAGE_H
+
+/*
+ * llvm-wasm crashes when generating the output file using the default
+ * definition in linux/linking.h. In addition to this, wasm-ld does not like it
+ * if two function signatures don't match, ruling out using a function with
+ * __attribute__ ((weak, alias("sys_ni_syscall"))) here, even if it is lacking a
+ * prototype (it assumes "one" (void) param).
+ *
+ * This has to be fixed by the host (or possibly some post-process build script)
+ * because there is no way to tell which prototype to use for which symbol.
+ * Getting rid of these stray declarations to begin with (e.g. setting
+ * ARCH_HAS_SYSCALL_WRAPPER) unfortunately causes problems for the
+ * sys_call_table generation. sys_call_table could be generated in some other
+ * way (or shape) but that would require other hacks to find available syscalls.
+ */
+#define cond_syscall(x)
+
+#endif /* _ASM_WASM_LINKAGE_H */
diff --git a/arch/wasm/include/asm/mmu_context.h b/arch/wasm/include/asm/mmu_context.h
new file mode 100644
index 000000000..e9414c5c0
--- /dev/null
+++ b/arch/wasm/include/asm/mmu_context.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_MMU_CONTEXT_H
+#define _ASM_WASM_MMU_CONTEXT_H
+
+#include <asm-generic/nommu_context.h>
+
+#endif /* _ASM_WASM_MMU_CONTEXT_H */
diff --git a/arch/wasm/include/asm/panic.h b/arch/wasm/include/asm/panic.h
new file mode 100644
index 000000000..52ad0fa32
--- /dev/null
+++ b/arch/wasm/include/asm/panic.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_PANIC_H
+#define _ASM_WASM_PANIC_H
+
+#include <asm/wasm.h>
+
+static inline void arch_panic_notify(const char *msg)
+{
+	wasm_panic(msg);
+}
+
+#include <asm-generic/panic.h>
+
+#endif /* _ASM_WASM_PANIC_H */
diff --git a/arch/wasm/include/asm/pgtable.h b/arch/wasm/include/asm/pgtable.h
new file mode 100644
index 000000000..eeafad742
--- /dev/null
+++ b/arch/wasm/include/asm/pgtable.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_PGTABLE_H
+#define _ASM_WASM_PGTABLE_H
+
+#include <asm-generic/pgtable-nopud.h>
+
+#include <linux/slab.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/io.h>
+
+/*
+ * No MMU support so do nothing...
+ * Inspired by the various other NOMMU implementations in the kernel.
+ */
+
+#define pgd_present(pgd)	(1)
+#define pgd_none(pgd)		(0)
+#define pgd_bad(pgd)		(0)
+#define pgd_clear(pgdp)
+#define	pmd_offset(a, b)	((void *)0)
+
+#define PAGE_NONE	__pgprot(0)
+#define PAGE_SHARED	__pgprot(0)
+#define PAGE_COPY	__pgprot(0)
+#define PAGE_READONLY	__pgprot(0)
+#define PAGE_KERNEL	__pgprot(0)
+
+#define	VMALLOC_START	0
+#define	VMALLOC_END	0xFFFFFFFF
+#define	KMAP_START	0
+#define	KMAP_END	0xFFFFFFFF
+
+extern void paging_init(void);
+#define swapper_pg_dir ((pgd_t *) 0)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#endif /* _ASM_WASM_PGTABLE_H */
diff --git a/arch/wasm/include/asm/processor.h b/arch/wasm/include/asm/processor.h
new file mode 100644
index 000000000..93243e16d
--- /dev/null
+++ b/arch/wasm/include/asm/processor.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_PROCESSOR_H
+#define _ASM_WASM_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+struct pt_regs;
+
+/* 3 GB RAM for userspace, 1 GB for the kernel. */
+#define TASK_SIZE (0xC0000000)
+
+/*
+ * We run interrupts on CPU 1, keep it clear. Why not CPU 0? Because init needs
+ * to run on CPU 0 for a while. We don't need interrupts until SMP has started,
+ * but we need init before.
+ */
+#define IRQ_CPU 1
+
+#define cpu_relax()	barrier()
+
+struct thread_struct {
+};
+
+#define INIT_THREAD { \
+}
+
+void start_thread(struct pt_regs *regs, unsigned long stack_pointer);
+
+void do_irq_stacked(int irq_nr);
+
+int user_mode_tail(void);
+
+struct task_struct;
+static inline unsigned long __get_wchan(struct task_struct *p)
+{
+    /* Should return the function before schedule() was called. */
+    /* Will be shown under the "Waiting Channel" of the ps command. */
+    return 0;
+}
+
+/* We don't have an instruction pointer. See instruction_pointer.h */
+#define KSTK_EIP(task) (0)
+
+/* We could possibly expose the stack pointer (has some data)...? */
+#define KSTK_ESP(task) (0)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_WASM_PROCESSOR_H */
diff --git a/arch/wasm/include/asm/ptrace.h b/arch/wasm/include/asm/ptrace.h
new file mode 100644
index 000000000..40b4ff72d
--- /dev/null
+++ b/arch/wasm/include/asm/ptrace.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_PTRACE_H
+#define _ASM_WASM_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+#include <asm/cpuflags.h>
+#include <asm/irqflags.h>
+
+#ifndef __ASSEMBLY__
+
+#define user_mode(regs) ((regs)->cpuflags & BIT(CPUFLAGS_USER_MODE))
+
+/* Not available in Wasm. */
+#define instruction_pointer(regs) (0)
+
+#define current_user_stack_pointer() (0)
+
+/* Not available (maybe we could extract this from a stacktrace?) */
+#define profile_pc(regs) instruction_pointer(regs)
+
+#define task_pt_regs(task) ((struct pt_regs *)(task) - 1U)
+#define current_pt_regs() task_pt_regs(current)
+
+#define task_switch_stack(task) ((struct switch_stack *)task_pt_regs(task) - 1U)
+#define current_switch_stack() task_switch_stack(current)
+
+static inline int regs_irqs_disabled(struct pt_regs *regs)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_WASM_PTRACE_H */
diff --git a/arch/wasm/include/asm/smp.h b/arch/wasm/include/asm/smp.h
new file mode 100644
index 000000000..d47beeccb
--- /dev/null
+++ b/arch/wasm/include/asm/smp.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_SMP_H
+#define _ASM_WASM_SMP_H
+
+#include <asm/wasm.h>
+
+#ifdef CONFIG_SMP
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+void __init setup_smp_ipi(void);
+
+void arch_send_call_function_single_ipi(int cpu);
+
+static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		arch_send_call_function_single_ipi(cpu);
+}
+
+__visible void raise_interrupt(int cpu, int irq_nr);
+
+#endif /* !CONFIG_SMP */
+
+#endif /* _ASM_WASM_SMP_H */
diff --git a/arch/wasm/include/asm/stacktrace.h b/arch/wasm/include/asm/stacktrace.h
new file mode 100644
index 000000000..2f702245e
--- /dev/null
+++ b/arch/wasm/include/asm/stacktrace.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_STACKTRACE_H
+#define _ASM_WASM_STACKTRACE_H
+
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+#define WASM_STACKTRACE_MAX_SIZE 1000U
+
+static inline bool on_thread_stack(void)
+{
+	/*
+	 * Since current is directly derived from the stack pointer on Wasm, we
+	 * can do this sneaky trick of comparing stack ends.
+	 */
+	return current->stack == (void*)((unsigned long)current & THREAD_MASK);
+}
+
+#endif /* _ASM_WASM_STACKTRACE_H */
diff --git a/arch/wasm/include/asm/syscall.h b/arch/wasm/include/asm/syscall.h
new file mode 100644
index 000000000..a31199740
--- /dev/null
+++ b/arch/wasm/include/asm/syscall.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_SYSCALL_H
+#define _ASM_WASM_SYSCALL_H
+
+#include <uapi/linux/audit.h>
+#include <asm/ptrace.h>
+
+extern void (* const sys_call_table[])(void);
+
+struct task_struct;
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return regs->syscall_nr;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* We don't need to rollback anything on Wasm. */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return IS_ERR_VALUE(regs->syscall_ret) ? regs->syscall_ret : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->syscall_ret;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->syscall_ret = error ? (long)error : val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long *args)
+{
+	args[0] = regs->syscall_nr;
+	memcpy(&args[1], regs->syscall_args, sizeof(regs->syscall_args));
+}
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+	return AUDIT_ARCH_WASM32;
+}
+
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+	return false;
+}
+
+#endif /* _ASM_WASM_SYSCALL_H */
diff --git a/arch/wasm/include/asm/thread_info.h b/arch/wasm/include/asm/thread_info.h
new file mode 100644
index 000000000..2f9c43907
--- /dev/null
+++ b/arch/wasm/include/asm/thread_info.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_THREAD_INFO_H
+#define _ASM_WASM_THREAD_INFO_H
+
+#include <asm/page.h>
+
+/*
+ * In the Wasm arch, thread_info sits at the top of task_struct and both reside
+ * at the very end of the stack area (which grows downwards).
+ *
+ * HIGHER ADDRESSES
+ *
+ * [ [ [...]        ] ] ^ <- (__stack_pointer & THREAD_MASK) + THREAD_SIZE
+ * [ [ [thread_info]] ] |
+ * [ [task_struct   ] ] | <- current, current_thread_info()
+ * [ [stack         ] ] | <- (stack starts with pt_regs + possibly switch_stack)
+ * [ [ [...]        ] ] | <- __stack_pointer (growing towards lower addresses)
+ * [                  ] |
+ * [    free space    ] | THREAD_SIZE
+ * [                  ] v <- (__stack_pointer & THREAD_MASK)
+ *
+ * LOWER ADDRESSES
+ *
+ * As can be seen, current == current_thread_info() in this arch. In order to
+ * access any of these, __stack_pointer can be masked by THREAD_MASK, since
+ * the kernel stack for every task will be aligned on a THREAD_SIZE boundary.
+ *
+ * Example of memory-growing instructions	Resides in
+ * --------------------------------------------	--------------------------------
+ * iX.const, iX.load, local.get, global.get	Wasm internal stack
+ * lobal.set __stack_pointer			__stack_pointer managed stack
+ *
+ * Stack usage in Wasm is pretty sparse. Most data resides in "locals" or on the
+ * internal Wasm stack. Both of these are not accessible from within Wasm,
+ * except outside the local usage of them of course. The stack we manage is used
+ * for things that Wasm can't put on any of those, for exmaple when a pointer is
+ * constructed when taking the address of an auto variable (i.e. the
+ * function/block scope in C). That stack is referred to by the Wasm global
+ * __stack_pointer and is known by the compiler. It is not part of the Wasm
+ * standard, but makes certain parts of the C standard possible to compile. Two
+ * pages should for this reason be enough as kernel stack. struct task_struct
+ * (including struct thread_info at its base) is about 2K, leaving 6K for the
+ * kernel stack.
+ */
+#define THREAD_SIZE_ORDER	(1)
+#define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_MASK		(~(THREAD_SIZE - 1))
+
+#ifndef __ASSEMBLY__
+
+struct thread_info {
+	unsigned int		cpu;
+	unsigned int		flags;
+	int			preempt_count;	/* Needed but not really used */
+	int			instance_depth;	/* 0 = idle task, 1 = running */
+	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
+};
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.cpu = 0,				\
+	.flags = 0,				\
+	.preempt_count = INIT_PREEMPT_COUNT,	\
+	.instance_depth = 0,			\
+}
+
+struct task_struct;
+
+static inline void *arch_alloc_thread_stack_node(
+					struct task_struct *tsk, int node)
+{
+	return (void *)((unsigned long)tsk & THREAD_MASK);
+}
+
+static inline void arch_free_thread_stack(struct task_struct *tsk) { }
+
+struct task_struct *alloc_task_struct_node(int node);
+void free_task_struct(struct task_struct *tsk);
+
+#endif /* !__ASSEMBLY__ */
+
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_NOTIFY_SIGNAL	3	/* signal notifications exist */
+#define TIF_MEMDIE		4	/* is terminating due to OOM killer */
+#define TIF_NOTIFY_RESUME	5	/* callback before returning to user */
+#define TIF_NEVER_RUN		6	/* was never run by the scheduler */
+#define TIF_RELOAD_PROGRAM	7	/* should reload code at syscall end */
+#define TIF_DELIVER_SIGNAL	8	/* run sighandler at syscall end */
+#define TIF_RETURN_SIGNAL	9	/* return sighandler at syscall end */
+
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_MEMDIE		(1 << TIF_MEMDIE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_NEVER_RUN		(1 << TIF_NEVER_RUN)
+#define _TIF_RELOAD_PROGRAM	(1 << TIF_RELOAD_PROGRAM)
+#define _TIF_DELIVER_SIGNAL	(1 << TIF_DELIVER_SIGNAL)
+#define _TIF_RETURN_SIGNAL	(1 << TIF_RETURN_SIGNAL)
+
+#endif /* _ASM_WASM_THREAD_INFO_H */
diff --git a/arch/wasm/include/asm/time.h b/arch/wasm/include/asm/time.h
new file mode 100644
index 000000000..2577a1151
--- /dev/null
+++ b/arch/wasm/include/asm/time.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_TIME_H
+#define _ASM_WASM_TIME_H
+
+void wasm_clockevent_enable(void);
+void wasm_program_timer(unsigned long delta);
+
+#endif /* _ASM_WASM_TIME_H */
diff --git a/arch/wasm/include/asm/vmalloc.h b/arch/wasm/include/asm/vmalloc.h
new file mode 100644
index 000000000..f1c2216f2
--- /dev/null
+++ b/arch/wasm/include/asm/vmalloc.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_VMALLOC_H
+#define _ASM_WASM_VMALLOC_H
+
+#endif /* _ASM_WASM_VMALLOC_H */
diff --git a/arch/wasm/include/asm/wasm.h b/arch/wasm/include/asm/wasm.h
new file mode 100644
index 000000000..20decb1d5
--- /dev/null
+++ b/arch/wasm/include/asm/wasm.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_WASM_WASM_H
+#define _ASM_WASM_WASM_H
+
+/* These are symbols imported from the Wasm host. */
+
+extern void wasm_panic(const char *msg);
+extern void wasm_dump_stacktrace(char* buffer, unsigned long max_size);
+
+extern void wasm_start_cpu(unsigned int cpu, struct task_struct *idle_task,
+	unsigned long start_stack);
+extern void wasm_stop_cpu(unsigned int cpu);
+
+extern struct task_struct *wasm_create_and_run_task(
+	struct task_struct *prev_task, struct task_struct *new_task,
+	const char *name, unsigned long bin_start, unsigned long bin_end,
+	unsigned long data_start, unsigned long table_start);
+extern void wasm_release_task(struct task_struct *dead_task);
+extern struct task_struct *wasm_serialize_tasks(struct task_struct *prev_task,
+	struct task_struct *next_task);
+
+extern void wasm_load_executable(unsigned long bin_start, unsigned long bin_end,
+	unsigned long data_start, unsigned long table_start);
+extern void wasm_reload_program(void);
+
+extern void wasm_clone_callback(void);
+
+#endif /* _ASM_WASM_WASM_H */
diff --git a/arch/wasm/include/uapi/asm/Kbuild b/arch/wasm/include/uapi/asm/Kbuild
new file mode 100644
index 000000000..b4bb51a5c
--- /dev/null
+++ b/arch/wasm/include/uapi/asm/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+generic-y += ucontext.h
diff --git a/arch/wasm/include/uapi/asm/byteorder.h b/arch/wasm/include/uapi/asm/byteorder.h
new file mode 100644
index 000000000..3f8945ac4
--- /dev/null
+++ b/arch/wasm/include/uapi/asm/byteorder.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+
+#ifndef _UAPI_ASM_WASM_BYTEORDER_H
+#define _UAPI_ASM_WASM_BYTEORDER_H
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _UAPI_ASM_WASM_BYTEORDER_H */
diff --git a/arch/wasm/include/uapi/asm/ptrace.h b/arch/wasm/include/uapi/asm/ptrace.h
new file mode 100644
index 000000000..0761ce261
--- /dev/null
+++ b/arch/wasm/include/uapi/asm/ptrace.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+
+#ifndef _UAPI_ASM_WASM_PTRACE_H
+#define _UAPI_ASM_WASM_PTRACE_H
+
+#define PTRACE_SYSEMU			31
+#define PTRACE_SYSEMU_SINGLESTEP	32
+
+#ifndef __ASSEMBLY__
+
+/* Registers stored during kernel entry (syscalls, IRQs and exceptions). */
+struct pt_regs {
+	unsigned long stack_pointer;	/* The __stack_pointer global. */
+	unsigned long cpuflags;		/* CPU Flags (interrupt, user mode). */
+	int syscall_nr;			/* Needed by syscall_get_nr() etc. */
+	long syscall_args[6];		/* Needed by syscall_get_args() etc. */
+	long syscall_ret;		/* Needed by syscall_*_return() etc. */
+};
+
+#define PT_REGS_INIT ((struct pt_regs){.syscall_nr = -1})
+
+/* Registers stored when switching between user processes (and signals). */
+struct switch_stack {
+	/* When kthread, kernel thread callback with arg. */
+	int (*fn)(void *);
+	void *fn_arg;
+
+	/*  When user task, the __tls_base global. Unused by the kernel. */
+	unsigned long tls;
+};
+
+/* Registers for user processes (gdb etc.), stable ABI compared to pt_regs. */
+struct user_regs_struct {
+	unsigned long stack_pointer;
+	unsigned long tls;
+};
+
+#endif /* __ASSEMBLY__ */
+#endif /* _UAPI_ASM_WASM_PTRACE_H */
diff --git a/arch/wasm/include/uapi/asm/sigcontext.h b/arch/wasm/include/uapi/asm/sigcontext.h
new file mode 100644
index 000000000..7fa987d86
--- /dev/null
+++ b/arch/wasm/include/uapi/asm/sigcontext.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+
+#ifndef _UAPI_ASM_WASM_SIGCONTEXT_H
+#define _UAPI_ASM_WASM_SIGCONTEXT_H
+
+#include <asm/ptrace.h>
+
+/* State saved before a signal is handled, given to signal handlers. */
+struct sigcontext {
+	struct user_regs_struct regs;
+};
+
+#endif /* _UAPI_ASM_WASM_SIGCONTEXT_H */
diff --git a/arch/wasm/include/uapi/asm/unistd.h b/arch/wasm/include/uapi/asm/unistd.h
new file mode 100644
index 000000000..9729b100b
--- /dev/null
+++ b/arch/wasm/include/uapi/asm/unistd.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
+
+#include <asm-generic/unistd.h>
diff --git a/arch/wasm/kernel/Makefile b/arch/wasm/kernel/Makefile
new file mode 100644
index 000000000..a630af519
--- /dev/null
+++ b/arch/wasm/kernel/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+extra-y += vmlinux.lds
+
+obj-y += cpu.o
+obj-y += cpuflags.o
+obj-y += entry.o
+obj-y += head.o
+obj-y += irqflags.o
+obj-y += irq.o
+obj-y += process.o
+obj-y += ptrace.o
+obj-y += reboot.o
+obj-y += setup.o
+obj-y += signal.o
+obj-y += smp.o
+obj-y += stack.o
+obj-y += sys_wasm.o
+obj-y += syscall_table.o
+obj-y += time.o
+obj-y += traps.o
diff --git a/arch/wasm/kernel/asm-offsets.c b/arch/wasm/kernel/asm-offsets.c
new file mode 100644
index 000000000..272f0f461
--- /dev/null
+++ b/arch/wasm/kernel/asm-offsets.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#define ASM_OFFSETS_C
+
+#include <linux/sched.h>
+#include <linux/kbuild.h>
+#include <asm/thread_info.h>
+
+void foo(void)
+{
+/*
+ * struct task_struct is stored just above the thread stack. It is aligned by
+ * L1_CACHE_BYTES, which is enforced by init_task and the task memory allocator.
+ *
+ * sizeof(pt_regs) and sizeof(task_struct) is naturally aligned by their size.
+ * The start of the actual stack has to be 16-byte aligned when calling C code.
+ */
+#define _THREAD_TASK_STRUCT_OFFSET	ALIGN_DOWN(THREAD_SIZE - sizeof(struct task_struct), L1_CACHE_BYTES)
+#define _THREAD_PT_REGS_OFFSET		(_THREAD_TASK_STRUCT_OFFSET - sizeof(struct pt_regs))
+#define _THREAD_SWITCH_STACK_OFFSET	(_THREAD_PT_REGS_OFFSET - sizeof(struct switch_stack))
+#define _THREAD_STACK_START		ALIGN_DOWN(_THREAD_SWITCH_STACK_OFFSET, 16)
+
+	DEFINE(THREAD_TASK_STRUCT_OFFSET, _THREAD_TASK_STRUCT_OFFSET);
+	BLANK();
+
+	DEFINE(THREAD_PT_REGS_OFFSET, _THREAD_PT_REGS_OFFSET);
+	DEFINE(PT_REGS_STACK_POINTER, offsetof(struct pt_regs, stack_pointer));
+	BLANK();
+
+	DEFINE(THREAD_SWITCH_STACK_OFFSET, _THREAD_SWITCH_STACK_OFFSET);
+	DEFINE(SWITCH_STACK_TLS, offsetof(struct switch_stack, tls));
+	BLANK();
+
+	DEFINE(THREAD_STACK_START, _THREAD_STACK_START);
+	BLANK();
+}
diff --git a/arch/wasm/kernel/cpu.c b/arch/wasm/kernel/cpu.c
new file mode 100644
index 000000000..5fb9aa8ba
--- /dev/null
+++ b/arch/wasm/kernel/cpu.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_PROC_FS
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	if (*pos == nr_cpu_ids)
+		return NULL;
+
+	*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	if ((*pos) < nr_cpu_ids)
+		return (void *)(uintptr_t)(1 + *pos);
+	return NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+static int c_show(struct seq_file *m, void *v)
+{
+	unsigned long cpu_id = (unsigned long)v - 1;
+
+	seq_printf(m, "processor\t: %lu\n", cpu_id);
+	seq_printf(m, "vendor_id\t: Wasm\n");
+
+	return 0;
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show,
+};
+
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/wasm/kernel/cpuflags.c b/arch/wasm/kernel/cpuflags.c
new file mode 100644
index 000000000..a97e9b58a
--- /dev/null
+++ b/arch/wasm/kernel/cpuflags.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/cpuflags.h>
+
+DEFINE_PER_CPU(unsigned long, wasm_cpuflags) = CPUFLAGS_RESET_VALUE;
diff --git a/arch/wasm/kernel/entry.S b/arch/wasm/kernel/entry.S
new file mode 100644
index 000000000..04087b23f
--- /dev/null
+++ b/arch/wasm/kernel/entry.S
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/thread_info.h>
+
+#include <asm/asm-offsets.h>
+
+
+.globaltype __stack_pointer, i32
+
+.section .globals,"",@
+
+.globaltype __user_stack_pointer, i32
+__user_stack_pointer:
+.globaltype __user_tls_base, i32
+__user_tls_base:
+
+.section .text,"",@
+
+.globl get_user_stack_pointer
+get_user_stack_pointer:
+	.functype get_user_stack_pointer() -> (i32)
+	global.get __user_stack_pointer
+	end_function
+
+.globl get_user_tls_base
+get_user_tls_base:
+	.functype get_user_tls_base() -> (i32)
+	global.get __user_tls_base
+	end_function
+
+.functype user_mode_tail() -> (i32)
+.functype wasm_user_mode_tail(i32) -> ()
+
+.globl _user_mode_tail
+_user_mode_tail:
+	.functype _user_mode_tail() -> ()
+	.local i32 /* 0: flow */
+
+	block
+		call user_mode_tail
+		local.tee 0
+		i32.eqz
+		br_if 0
+
+		local.get 0
+		call wasm_user_mode_tail
+	end_block
+
+	end_function
+
+/*
+ * HIGH ADDRESSES
+ * --------------
+ * (thread end)        <- (current & THREAD_MASK) + THREAD_SIZE
+ * [task_struct]       <- current
+ * [pt_regs]
+ * [switch_stack]      <- initial __stack_pointer
+ * (alignment)
+ *                     <- Ready to call C code (16-byte aligned).
+ * (...free space...)
+ *
+ * (thread start)      <- current & THREAD_MASK
+ * -------------
+ * LOW ADDRESSES
+ */
+
+.functype __ret_from_fork(i32, i32) -> (i32)
+
+/* New process. Called by Wasm host when it runs a task for the first time. */
+.globl ret_from_fork
+ret_from_fork:
+	/* struct task_struct *prev_task, struct task_struct *next_task */
+	.functype ret_from_fork(i32, i32) -> (i32)
+
+	/* We can't switch back to a task so no need to save into prev_task. */
+
+	/* Load __stack_pointer from the new task's kernel stack area. */
+	local.get 1
+	i32.const zeroptr-THREAD_TASK_STRUCT_OFFSET+THREAD_STACK_START
+	i32.add
+	global.set __stack_pointer
+
+	/*
+	 * Finish up in C. Returns true if we have a clone callback to call.
+	 * (Upon return, the correct cpuflags for userland have been loaded.)
+	 */
+	local.get 0
+	local.get 1
+	call __ret_from_fork
+
+	/* Load __user_stack_pointer. */
+	local.get 1
+	i32.const zeroptr-THREAD_TASK_STRUCT_OFFSET+THREAD_PT_REGS_OFFSET+PT_REGS_STACK_POINTER
+	i32.add
+	i32.load 0
+	global.set __user_stack_pointer
+
+	/* Load __user_tls_base. */
+	local.get 1
+	i32.const zeroptr-THREAD_TASK_STRUCT_OFFSET+THREAD_SWITCH_STACK_OFFSET+SWITCH_STACK_TLS
+	i32.add
+	i32.load 0
+	global.set __user_tls_base
+
+	/* Clean up the stack. */
+	global.get __stack_pointer
+	i32.const zeroptr-THREAD_STACK_START+THREAD_TASK_STRUCT_OFFSET
+	i32.add
+	global.set __stack_pointer
+
+	/* In theory, the first thing we execute may be a signal handler. */
+	call _user_mode_tail
+
+	end_function
+
+.macro WASM_SYSCALL_ASM_HEAD
+	/*
+	 * The kernel expects pt_regs to be populated so save what we know.
+	 * The following fields are saved in the C part of this handling:
+	 * * cpuflags is set to appropriate values.
+	 * * syscall_nr and syscall_args are set from our call parameters.
+	 * * syscall_ret is set and returned to us.
+	 */
+
+	local.get 0
+	global.set __user_stack_pointer
+	local.get 1
+	global.set __user_tls_base
+
+	/* Allocate pt_regs + switch_stack + stack alignment. */
+	global.get __stack_pointer
+	i32.const zeroptr-THREAD_TASK_STRUCT_OFFSET+THREAD_STACK_START
+	i32.add
+	global.set __stack_pointer
+
+	/* Save __user_stack_pointer. */
+	global.get __stack_pointer
+	i32.const zeroptr-THREAD_STACK_START+THREAD_PT_REGS_OFFSET+PT_REGS_STACK_POINTER
+	i32.add
+	global.get __user_stack_pointer
+	i32.store 0
+
+	/* Save __user_tls_base. */
+	global.get __stack_pointer
+	i32.const zeroptr-THREAD_STACK_START+THREAD_SWITCH_STACK_OFFSET+SWITCH_STACK_TLS
+	i32.add
+	global.get __user_tls_base
+	i32.store 0
+
+	/*
+	 * Note: we don't need to swap the stack pointer, it already happened
+	 * automatically when calling into the vmlinux Wasm instance.
+	 */
+.endm
+
+.macro WASM_SYSCALL_ASM_FOOT
+	/* (The cpuflags have already been restored in C.) */
+
+	/* Load __user_stack_pointer. */
+	global.get __stack_pointer
+	i32.const zeroptr-THREAD_STACK_START+THREAD_PT_REGS_OFFSET+PT_REGS_STACK_POINTER
+	i32.add
+	i32.load 0
+	global.set __user_stack_pointer
+
+	/* Load __user_tls_base. */
+	global.get __stack_pointer
+	i32.const zeroptr-THREAD_STACK_START+THREAD_SWITCH_STACK_OFFSET+SWITCH_STACK_TLS
+	i32.add
+	i32.load 0
+	global.set __user_tls_base
+
+	/* Deallocate stack alignment + switch_stack + pt_regs. */
+	global.get __stack_pointer
+	i32.const zeroptr-THREAD_STACK_START+THREAD_TASK_STRUCT_OFFSET
+	i32.add
+	global.set __stack_pointer
+
+	/*
+	* Note: we don't need to swap the __stack_pointer, it will
+	* happen automatically when returning back into the user code
+	* Wasm instance (as that instance has its own __stack_pointer).
+	*
+	* We can exploit this as we're basically in userland, but with
+	* the kernel stack pointer loaded. This allows us to play tricks
+	* with execution in userland without setting a program counter.
+	*
+	* This is where signal handlers are called, and returned, and exec()
+	* calls stop the execution of the user program. In the case of exec()
+	* and signal return, the call stack collapses (this call never returns).
+	*/
+	call _user_mode_tail
+
+	end_function
+.endm
+
+.functype __wasm_syscall_0(i32) -> (i32)
+.functype __wasm_syscall_1(i32, i32) -> (i32)
+.functype __wasm_syscall_2(i32, i32, i32) -> (i32)
+.functype __wasm_syscall_3(i32, i32, i32, i32) -> (i32)
+.functype __wasm_syscall_4(i32, i32, i32, i32, i32) -> (i32)
+.functype __wasm_syscall_5(i32, i32, i32, i32, i32, i32) -> (i32)
+.functype __wasm_syscall_6(i32, i32, i32, i32, i32, i32, i32) -> (i32)
+
+/*
+ * These syscall functions should be called from userland code. In order to skip
+ * slow JavaScript glue code, they directly transfer all state needed into the
+ * kernel. This means that two initial parammeters are added for sp and tp.
+ * Parameter 0		[sp]: the userland stack pointer.
+ * Parameter 1		[tp]: the userland TLS pointer.
+ * Parameter 2		[nr]: the syscall nr.
+ * Parameter 3..8	[argN]: syscall argument(s) 0..5, where applicable.
+ *
+ * The kernel never modifies sp or tp for the calling task during syscalls and
+ * there is thus no need to restore them after the syscall returns. Apart from
+ * diagnosics, they only play a role in the clone and exec family of syscalls.
+ * Clone can be made to copy the supplied sp and tp to the new task. Exec should
+ * maintain the tp even for new process images (this use case is quite sketchy).
+ * Considering that the kernel does not care if userland even has a stack or TLS
+ * area, it would be OK to not transfer these pointers at all if desired. In
+ * both the clone and exec cases, the initial values of sp and tp would not be
+ * loaded at a syscall site, but instead during the ret_from_fork code flow.
+ */
+.globl wasm_syscall_0
+wasm_syscall_0:
+	.functype wasm_syscall_0(i32, i32, i32) -> (i32)
+	WASM_SYSCALL_ASM_HEAD
+	local.get 2
+	call __wasm_syscall_0
+	WASM_SYSCALL_ASM_FOOT
+
+.globl wasm_syscall_1
+wasm_syscall_1:
+	.functype wasm_syscall_1(i32, i32, i32, i32) -> (i32)
+	WASM_SYSCALL_ASM_HEAD
+	local.get 2
+	local.get 3
+	call __wasm_syscall_1
+	WASM_SYSCALL_ASM_FOOT
+
+.globl wasm_syscall_2
+wasm_syscall_2:
+	.functype wasm_syscall_2(i32, i32, i32, i32, i32) -> (i32)
+	WASM_SYSCALL_ASM_HEAD
+	local.get 2
+	local.get 3
+	local.get 4
+	call __wasm_syscall_2
+	WASM_SYSCALL_ASM_FOOT
+
+.globl wasm_syscall_3
+wasm_syscall_3:
+	.functype wasm_syscall_3(i32, i32, i32, i32, i32, i32) -> (i32)
+	WASM_SYSCALL_ASM_HEAD
+	local.get 2
+	local.get 3
+	local.get 4
+	local.get 5
+	call __wasm_syscall_3
+	WASM_SYSCALL_ASM_FOOT
+
+.globl wasm_syscall_4
+wasm_syscall_4:
+	.functype wasm_syscall_4(i32, i32, i32, i32, i32, i32, i32) -> (i32)
+	WASM_SYSCALL_ASM_HEAD
+	local.get 2
+	local.get 3
+	local.get 4
+	local.get 5
+	local.get 6
+	call __wasm_syscall_4
+	WASM_SYSCALL_ASM_FOOT
+
+.globl wasm_syscall_5
+wasm_syscall_5:
+	.functype wasm_syscall_5(i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
+	WASM_SYSCALL_ASM_HEAD
+	local.get 2
+	local.get 3
+	local.get 4
+	local.get 5
+	local.get 6
+	local.get 7
+	call __wasm_syscall_5
+	WASM_SYSCALL_ASM_FOOT
+
+.globl wasm_syscall_6
+wasm_syscall_6:
+	.functype wasm_syscall_6(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
+	WASM_SYSCALL_ASM_HEAD
+	local.get 2
+	local.get 3
+	local.get 4
+	local.get 5
+	local.get 6
+	local.get 7
+	local.get 8
+	call __wasm_syscall_6
+	WASM_SYSCALL_ASM_FOOT
diff --git a/arch/wasm/kernel/head.S b/arch/wasm/kernel/head.S
new file mode 100644
index 000000000..e7403fcf2
--- /dev/null
+++ b/arch/wasm/kernel/head.S
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+.globaltype __stack_pointer, i32
+
+.functype start_kernel() -> ()
+.functype start_secondary() -> ()
+
+/* vmlinux entrypoint */
+.globl _start
+_start:
+	.functype _start() -> ()
+	.local i32 /* alloc_limit */
+
+	/*
+	 * init_task resides just after the start of the stack. Higher addresses
+	 * contain task_struct init_task data, while the stack grows downwards.
+	 *
+	 * The initial stack needs to be 16-byte aligned when calling C code.
+	 * init_task already has a much higher alignment (by L1_CACHE_BYTES).
+	 */
+	i32.const init_task
+	global.set __stack_pointer
+
+	/* Save static memory used by vmlinux. */
+	i32.const memory_start
+	i32.const 0x10000 /* The first page is reserved for trapping nullptr. */
+	i32.store 0
+	i32.const memory_kernel_break
+	memory.size 0
+	i32.const 0x10000 /* Multiply by Wasm page size (65k). */
+	i32.mul
+	i32.store 0
+
+	/*
+	 * By some trial-and-error in Firefox and (mostly) Chromium:
+	 * * Allocating the full address space (4 GB) works most of the time.
+	 * * Allocating 4 GB fails often enough to be unstable. Curiously, it
+	 *   does not seem to have anything to do with free memory, and just
+	 *   reloading the page fixes it. Waiting a bit might be beneficial.
+	 * * Doing it from within Wasm seems to succeed more often(?).
+	 * * Allocating it in one go works better than stepping, as below, and
+	 *   would allow 4 GB. But stepping is more reliable.
+	 * * Stepping all the way from 4 GB makes Chromium accept the allocation
+	 *   soon enough, but then crash with SIGILL, probably because of OOM.
+	 * * Stepping from 500 MB is a tradeoff with all things considered. It
+	 *   ought indeed to be enough for anybody! (Oh, old joke, sorry...)
+	 *
+	 * Considering the above heuristics, a fair approach seems to start high
+	 * and aggressively step downwards, one page at a time. But not too high
+	 * or there will be OOM troubles related to current default browser
+	 * settings. Sadly, browsers don't seem to handle this in a very well
+	 * defined way, and we have to be moderately aggressive. An even more
+	 * aggressive approach that surprisingly works is to try again and again
+	 * with the same allocation size, but stepping almost achieves that.
+	 *
+	 * Whatever happens, the memory is zero-initialized and hopefully
+	 * overcommitted by the host OS. If it is not, that should be fixed!
+	 * Even better would be MMU support in Wasm, and this problem would be
+	 * solved altogether. And a whole slew of other problems too!
+	 *
+	 * Note that we cannot allocate the last page from within Wasm (even
+	 * though it is possible from the JavaScript host to create a Memory
+	 * with initial: 0x10000, memory.grow only allows us to get to 0xFFFF).
+	 * This is not too bad, as this is almost like not placing anything in
+	 * the first page to catch null pointers. This guards underflow instead.
+	 */
+	i32.const 0x2000 /* Immediately decremented by 1 in the loop below. */
+	memory.size 0 /* Returns the current number of pages. */
+	i32.sub /* Try grow by the difference, (max - curr). */
+	local.set 0
+	loop
+		local.get 0
+		i32.const 1
+		i32.sub
+		local.tee 0
+
+		memory.grow 0
+		i32.const -1 /* Check if allocation failed (returned -1). */
+		i32.eq
+		br_if 0
+	end_loop
+
+	block
+		local.get 0
+		i32.const 16
+		i32.lt_u
+		br_if 0
+
+		i32.const memory_end
+		local.get 0
+		i32.const 0x10000 /* Multiply by Wasm page size (65k). */
+		i32.mul
+		i32.store 0
+
+		call start_kernel /* Start the kernel! */
+	end_block
+
+	/* If we ever get here, the memory allocation failed. */
+	end_function
+
+.globl _start_secondary
+_start_secondary:
+	.functype _start_secondary(i32) -> ()
+	local.get 0
+	global.set __stack_pointer
+	call start_secondary
+	/* start_secondary should never return. */
+	unreachable
+
+	end_function
diff --git a/arch/wasm/kernel/irq.c b/arch/wasm/kernel/irq.c
new file mode 100644
index 000000000..9092bf194
--- /dev/null
+++ b/arch/wasm/kernel/irq.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <asm/smp.h>
+
+static unsigned int wasm_irq_startup(struct irq_data *data)
+{
+	return 0;
+}
+
+static void wasm_irq_noop(struct irq_data *data)
+{
+}
+
+static int
+wasm_irq_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		     bool force)
+{
+#ifdef CONFIG_SMP
+	printk("wasm_irq_set_affinity: %d %d %d", data->irq, cpumask_weight(mask), cpumask_first(mask));
+	return 0;
+#endif
+}
+
+struct irq_chip wasm_irq_chip = {
+	.name			= "wasm",
+	.irq_startup		= wasm_irq_startup,
+	.irq_shutdown		= wasm_irq_noop,
+	.irq_enable		= wasm_irq_noop,
+	.irq_disable		= wasm_irq_noop,
+	.irq_ack		= wasm_irq_noop,
+	.irq_mask		= wasm_irq_noop,
+	.irq_unmask		= wasm_irq_noop,
+	.irq_set_affinity	= wasm_irq_set_affinity,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+void __init init_IRQ(void)
+{
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; ++irq) {
+		if (irq == WASM_IRQ_IPI || irq == WASM_IRQ_TIMER) {
+			irq_set_percpu_devid(irq);
+			irq_set_chip_and_handler(
+				irq, &wasm_irq_chip, handle_percpu_devid_irq);
+		} else {
+			irq_set_chip_and_handler(
+				irq, &wasm_irq_chip, handle_simple_irq);
+		}
+	}
+
+	setup_smp_ipi();
+}
diff --git a/arch/wasm/kernel/irqflags.c b/arch/wasm/kernel/irqflags.c
new file mode 100644
index 000000000..cd8e86e90
--- /dev/null
+++ b/arch/wasm/kernel/irqflags.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/irqflags.h>
+
+#include <linux/bitops.h>
+#include <asm/cpuflags.h>
+
+unsigned long arch_local_save_flags(void)
+{
+	if (*this_cpu_ptr(&wasm_cpuflags) & BIT(CPUFLAGS_INTERRUPT))
+		return ARCH_IRQ_ENABLED;
+	return ARCH_IRQ_DISABLED;
+}
+
+void arch_local_irq_restore(unsigned long flags)
+{
+	if (flags == ARCH_IRQ_DISABLED)
+		*this_cpu_ptr(&wasm_cpuflags) &= ~BIT(CPUFLAGS_INTERRUPT);
+	else
+		*this_cpu_ptr(&wasm_cpuflags) |= BIT(CPUFLAGS_INTERRUPT);
+}
diff --git a/arch/wasm/kernel/process.c b/arch/wasm/kernel/process.c
new file mode 100644
index 000000000..1eaa35d8f
--- /dev/null
+++ b/arch/wasm/kernel/process.c
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/entry-common.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
+#include <linux/printk.h>
+#include <asm/cpuflags.h>
+#include <asm/wasm.h>
+
+static cpumask_t user_cpus = CPU_MASK_NONE;
+
+struct task_struct *__sched
+__switch_to(struct task_struct *prev_task, struct task_struct *next_task)
+{
+	/*
+	 * Here, a typical arch would normally:
+	 * * Swap registers and stack.
+	 * * Return to the instruction pointer of the new task.
+	 *
+	 * For new tasks (after fork), it would normally:
+	 * * Swap registers and stack.
+	 * * Call schedule_tail(), now in the context of the new process.
+	 * * If there is a kernel_fn set, call it with correct callback arg.
+	 * * Call syscall_exit_to_user_mode(current_pt_regs()) or equivalent.
+	 *   -> Return to the new instruciton pointer, ending up in userland.
+	 *
+	 * When prev_task was swapped in again (on another reschedule), we would
+	 * continue here and return back to the scheduler. However, Wasm can't
+	 * do this. Only the in-memory part of the call stack can be swapped and
+	 * there is no way to jump. Instead, we have to resort to serializing
+	 * processes (in the cooperative multitasking sense) by launching
+	 * several threads of execution on the host and use locks to make sure
+	 * that only one process at a time is running on the same cpu.
+	 *
+	 * As soon as more CPUs are enabled, we can start running concurrently,
+	 * by putting each task (except idle) on separate CPUs. Before that,
+	 * init and kthreadd will need to both run, until smp is started.
+	 * Thankfully we have control over these threads and know they will not
+	 * hog the CPU. They might call schedule() on longer waits, that's fine.
+	 *
+	 * There is still the issue with idle threads, we could optimize to get
+	 * away with them, which would cut the number of tasks used in the
+	 * system by half. However, doing so is pretty annoying, as the idle
+	 * loop is actually doing something and will eventually need to call
+	 * schedule_idle(). For now, we serialize them too.
+	 */
+
+	struct task_struct *last_task;
+	char name[TASK_COMM_LEN];
+
+	/* For user code. */
+	unsigned long bin_start = 0U;
+	unsigned long bin_end = 0U;
+	unsigned long data_start = 0U;
+
+	if (task_thread_info(next_task)->flags & _TIF_NEVER_RUN) {
+		task_thread_info(next_task)->flags &= ~_TIF_NEVER_RUN;
+
+		/* Get the name to aid debugging. */
+		get_task_comm(name, next_task);
+
+		/* For user executables, we need to clone the Wasm instance. */
+		if (next_task->mm->start_code) {
+			bin_start = next_task->mm->start_code;
+			bin_end = next_task->mm->end_code;
+			data_start = next_task->mm->start_data;
+		}
+
+		/* This is called instead of serialize the first time. */
+		last_task = wasm_create_and_run_task(prev_task, next_task, name,
+			bin_start, bin_end, data_start, 0U);
+	} else {
+		last_task = wasm_serialize_tasks(prev_task, next_task);
+	}
+
+	/* If/when we reach here, we got __switch_to():ed by another task. */
+
+	/* last_task is the previous task (never prev_task, maybe next_task). */
+	return last_task;
+}
+
+static int user_task_set_affinity(struct task_struct *p)
+{
+	/*
+	 * TODO: This function needs a review of proper approach and locking!
+	 * It's probably best to take a step back and think about how this
+	 * should be implemented properly in the first place, instead of adding
+	 * band aid on top of about every line that violates this and that. That
+	 * includes fixing release_thread() and garbage collecting unused CPUs.
+	 *
+	 * We may also have to move kthreads to IRQ_CPU (with an option of the
+	 * boot cpu before IRQ_CPU is up) in case they risk getting blocked.
+	 */
+	int retval;
+	int cpu;
+
+	/* Kthreads can be allowed to run on any online CPU. */
+	if (p->flags & PF_KTHREAD)
+		return 0;
+
+hack:
+	cpu = cpumask_first_zero(&user_cpus);
+	if (cpu >= nr_cpu_ids)
+		return -EBUSY;
+
+	if(cpu == IRQ_CPU) {
+		/* TODO: We should mark IRQ_CPU as taken at boot instead. */
+		cpumask_set_cpu(cpu, &user_cpus);
+		goto hack;
+	}
+
+	if (!cpu_online(cpu)) {
+		BUG_ON(!cpu_possible(cpu));
+
+		/* We should add_cpu(cpu) if we properly supported hotplug... */
+		retval = cpu_device_up(get_cpu_device(cpu));
+		if (retval)
+			return retval;
+	}
+
+	cpumask_set_cpu(cpu, &user_cpus);
+
+	retval = set_cpus_allowed_ptr(p, cpumask_of(cpu));
+	if (retval) {
+		cpumask_clear_cpu(cpu, &user_cpus);
+		return retval;
+	}
+
+	p->flags |= PF_NO_SETAFFINITY;
+
+	return 0;
+}
+
+asmlinkage unsigned
+__ret_from_fork(struct task_struct *prev_task, struct task_struct *next_task)
+{
+	struct switch_stack *next_switch_stack = task_switch_stack(next_task);
+
+	schedule_tail(prev_task);
+
+	/* Kernel thread callback. */
+	if (next_switch_stack->fn) {
+		next_switch_stack->fn(next_switch_stack->fn_arg);
+		/*
+		* Kernel threads can return, and in doing so, return to user
+		* space. This happens for the first user process (init).
+		*/
+
+		BUG_ON(current->flags & PF_KTHREAD);
+
+		/*
+		 * The binfmt loader would have set _TIF_RELOAD_PROGRAM
+		 * but we clear it now so that future syscalls don't trap.
+		 */
+		current_thread_info()->flags &= ~_TIF_RELOAD_PROGRAM;
+	}
+
+	/*
+	 * syscall_exit_to_user_mode() turns off interrupts, as most
+	 * architectures would IRET right after it, enabling them again. We
+	 * emulate this behaviour by loading cpuflags, which should both enable
+	 * interrupts again but also drop the privilege level down to USER_MODE.
+	 */
+	syscall_exit_to_user_mode(current_pt_regs());
+	*this_cpu_ptr(&wasm_cpuflags) = current_pt_regs()->cpuflags;
+
+	/*
+	 * After returning, the Wasm module binary will be initialized and run.
+	 * We run any signal handlers that should be run first, then:
+	 * kthread case: the host will call _start().
+	 * clone callback case: the host will call __libc_clone_callback().
+	 */
+	return !(next_switch_stack->fn);
+}
+
+void flush_thread(void)
+{
+	/* Wasm has no FP state to reset, so do nothing. */
+}
+
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+{
+	struct pt_regs *parent_pt_regs = current_pt_regs();
+	struct switch_stack *parent_switch_stack = current_switch_stack();
+
+	struct pt_regs *child_pt_regs = task_pt_regs(p);
+	struct switch_stack *child_switch_stack = task_switch_stack(p);
+
+	task_thread_info(p)->flags |= _TIF_NEVER_RUN;
+
+	if (unlikely(args->fn)) {
+		/* Kernel thread */
+		memset(child_pt_regs, 0, sizeof(*child_pt_regs));
+		child_pt_regs->stack_pointer = (unsigned long)child_switch_stack;
+		child_pt_regs->cpuflags = BIT(CPUFLAGS_INTERRUPT);
+
+		memset(child_switch_stack, 0, sizeof(*child_switch_stack));
+		child_switch_stack->fn = args->fn;
+		child_switch_stack->fn_arg = args->fn_arg;
+	} else {
+		/* User thread */
+		*child_pt_regs = *parent_pt_regs;
+		if (args->stack)
+			child_pt_regs->stack_pointer = args->stack;
+
+		*child_switch_stack = *parent_switch_stack;
+		child_switch_stack->fn = NULL;
+		child_switch_stack->fn_arg = NULL;
+		if (args->flags & CLONE_SETTLS)
+			child_switch_stack->tls = args->tls;
+	}
+
+	if (!p->mm->binfmt) {
+		/* These are normally not zeroed out in copy_process(). */
+		current->mm->start_code = 0;
+		current->mm->end_code = 0;
+		current->mm->start_stack = 0;
+		current->mm->start_data = 0;
+		current->mm->end_data = 0;
+	}
+
+	return user_task_set_affinity(p);
+}
+
+/*
+ * Set up a thread for executing a new program.
+ */
+void start_thread(struct pt_regs *regs, unsigned long stack_pointer)
+{
+	memset(regs, 0, sizeof(*regs));
+	regs->stack_pointer = stack_pointer;
+	regs->cpuflags = BIT(CPUFLAGS_USER_MODE) | BIT(CPUFLAGS_INTERRUPT);
+
+	wasm_load_executable(current->mm->start_code, current->mm->end_code,
+		current->mm->start_data, 0U);
+
+	/* Reload the program when the current syscall exits. */
+	current_thread_info()->flags |= _TIF_RELOAD_PROGRAM;
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+	/* TODO: This code also needs review, like user_task_set_affinity(). */
+	if (!(dead_task->flags & PF_KTHREAD)) {
+		BUG_ON(dead_task->nr_cpus_allowed != 1);
+		BUG_ON(cpumask_first(&dead_task->cpus_mask)
+			!= task_thread_info(dead_task)->cpu);
+		cpumask_clear_cpu(task_thread_info(dead_task)->cpu, &user_cpus);
+	}
+
+	wasm_release_task(dead_task);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	show_regs_print_info(KERN_DEFAULT);
+
+	pr_cont("cpuflags: %08x sp: %08x flags: %08x preempt_count: %08x\n",
+		(unsigned)regs->cpuflags,
+		(unsigned)regs->stack_pointer,
+		(unsigned)current_thread_info()->flags,
+		(unsigned)current_thread_info()->preempt_count);
+}
+
+void show_stack(struct task_struct *task, unsigned long *stack,
+		const char *loglvl)
+{
+	char *stack_trace;
+
+	printk("%sStack from %08lx:", loglvl, (unsigned long)stack);
+
+	stack_trace = kmalloc(WASM_STACKTRACE_MAX_SIZE, GFP_ATOMIC);
+	if (stack_trace) {
+		wasm_dump_stacktrace(stack_trace, WASM_STACKTRACE_MAX_SIZE);
+		printk("%s", stack_trace);
+	} else {
+		printk("Failed to allocate stack trace buffer.");
+	}
+	kfree(stack_trace);
+}
diff --git a/arch/wasm/kernel/ptrace.c b/arch/wasm/kernel/ptrace.c
new file mode 100644
index 000000000..a52667068
--- /dev/null
+++ b/arch/wasm/kernel/ptrace.c
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/ptrace.h>
+
+void ptrace_disable(struct task_struct *child)
+{
+}
+
+long arch_ptrace(struct task_struct *child, long request, unsigned long addr,
+	unsigned long data)
+{
+	return ptrace_request(child, request, addr, data);
+}
diff --git a/arch/wasm/kernel/reboot.c b/arch/wasm/kernel/reboot.c
new file mode 100644
index 000000000..271e4ef64
--- /dev/null
+++ b/arch/wasm/kernel/reboot.c
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/reboot.h>
+#include <linux/irqflags.h>
+#include <linux/smp.h>
+#include <linux/printk.h>
+
+void machine_restart(char *cmd)
+{
+	local_irq_disable();
+	smp_send_stop();
+
+	do_kernel_restart(cmd);
+
+	printk("Reboot failed -- System halted\n");
+	for (;;);
+}
+
+void machine_halt(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+	for (;;);
+}
+
+void machine_power_off(void)
+{
+	local_irq_disable();
+	smp_send_stop();
+	do_kernel_power_off();
+}
diff --git a/arch/wasm/kernel/setup.c b/arch/wasm/kernel/setup.c
new file mode 100644
index 000000000..2ea9cc364
--- /dev/null
+++ b/arch/wasm/kernel/setup.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/init.h>
+#include <linux/screen_info.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+
+/*
+ * The format of "screen_info" is strange, and due to early
+ * i386-setup code. This is just enough to make the console
+ * code think we're on a VGA color display.
+ */
+struct screen_info screen_info = {
+	.orig_x = 0,
+	.orig_y = 25,
+	.orig_video_cols = 80,
+	.orig_video_lines = 25,
+	.orig_video_isVGA = 1,
+	.orig_video_points = 16,
+};
+
+unsigned long memory_start;
+EXPORT_SYMBOL(memory_start);
+
+unsigned long memory_end;
+EXPORT_SYMBOL(memory_end);
+
+unsigned long memory_kernel_break;
+EXPORT_SYMBOL(memory_kernel_break);
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	unsigned i;
+
+	for_each_possible_cpu(i)
+		set_cpu_present(i, true);
+}
+
+void __init smp_init_cpus(void)
+{
+	unsigned i;
+	unsigned int ncpus = NR_CPUS; /* TODO: make this configurable */
+
+	pr_info("%s: Core Count = %d\n", __func__, ncpus);
+
+	if (ncpus > NR_CPUS) {
+		ncpus = NR_CPUS;
+		pr_info("%s: limiting core count by %d\n", __func__, ncpus);
+	}
+
+	for (i = 0; i < ncpus; ++i)
+		set_cpu_possible(i, true);
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	unsigned long max_zone_pfn[MAX_NR_ZONES] = {0};
+
+	/* Save unparsed command line copy for /proc/cmdline */
+	*cmdline_p = boot_command_line;
+
+	parse_early_param();
+
+	/* See head.S for the logic that sets up these values. */
+	memblock_reserve(memory_start, memory_kernel_break - memory_start);
+	memblock_add(memory_start, memory_end - memory_start);
+
+	/* pcpu_find_block_fit() returns signed 32-bit memory addresses, ugh. */
+	memblock_set_current_limit(0x80000000); /* Only positive addresses. */
+
+	/* This is needed so that more than 128 allocations can be made. */
+	memblock_allow_resize();
+
+	/* Initialize zones, so that memory can be allocated beyond bootmem. */
+	max_zone_pfn[ZONE_NORMAL] = memory_end >> PAGE_SHIFT;
+	free_area_init(max_zone_pfn);
+
+	smp_init_cpus();
+}
diff --git a/arch/wasm/kernel/signal.c b/arch/wasm/kernel/signal.c
new file mode 100644
index 000000000..ec1753ae2
--- /dev/null
+++ b/arch/wasm/kernel/signal.c
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/entry-common.h>
+#include <asm/ucontext.h>
+
+struct rt_sigframe {
+	int sig_param;
+
+	/*
+	 * info_param and uc_param are convenience pointers that allow extension
+	 * of rt_sigframe in the future and easy detection of whether SA_SIGINFO
+	 * is set (they are NULL if it is not set).
+	 */
+	siginfo_t *info_param;
+	void *uc_param;
+
+	union {
+		/* Signature the kernel uses internaly ("opaque type"). */
+		__sighandler_t sa_handler;
+
+		/* Signature libc should use when SA_SIGINFO is set. */
+		void (*sigaction)(int sig, siginfo_t *info, void *uc);
+
+		/* Signature libc should use when SA_SIGINFO is not set. */
+		void (*handler)(int sig);
+	};
+
+	/*
+	 * info and uc are used for sigaction (SA_SIGINFO) but ignored for
+	 * handler (!SA_SIGINFO). However, uc is still used by the kernel when
+	 * handler is used, as it stores the regs to restore upon sigreturn.
+	 * This is what allows signal handlers to be stacked.
+	 */
+	struct siginfo info;
+	struct ucontext uc;
+};
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct pt_regs *regs = current_pt_regs();
+	struct switch_stack *switch_stack = current_switch_stack();
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)regs->stack_pointer;
+	struct user_regs_struct __user *user_regs = &frame->uc.uc_mcontext.regs;
+	sigset_t set;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	if (!access_ok(frame, sizeof(*frame)))
+		goto badframe;
+
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	set_current_blocked(&set);
+
+	if (__get_user(regs->stack_pointer, &user_regs->stack_pointer))
+		goto badframe;
+	if (__get_user(switch_stack->tls, &user_regs->tls))
+		goto badframe;
+
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+
+	current_thread_info()->flags |= _TIF_RETURN_SIGNAL;
+
+	return 0;
+
+badframe:
+	force_sig(SIGSEGV);
+
+	return 0;
+}
+
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+	struct pt_regs *regs)
+{
+	struct switch_stack *switch_stack = (struct switch_stack *)regs - 1U;
+	unsigned long stack_pointer = sigsp(regs->stack_pointer, ksig);
+	struct rt_sigframe __user *frame;
+	long err = 0;
+
+	/*
+	 * Allocate storage for frame, aligning it for itself and for further C
+	 * function calling (which shoulld really be the largest alignment...).
+	 */
+	stack_pointer -= sizeof(*frame);
+	stack_pointer &= -16UL;
+	stack_pointer &= -(unsigned long)__alignof__(*frame);
+	frame = (struct rt_sigframe __user *)stack_pointer;
+
+	if (!access_ok(frame, sizeof(*frame)))
+		return -EFAULT;
+
+	/* struct siginfo info */
+	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+	/* struct ucontext uc */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(NULL, &frame->uc.uc_link);
+	err |= __save_altstack(&frame->uc.uc_stack, regs->stack_pointer);
+	err |= __put_user(regs->stack_pointer,
+			  &frame->uc.uc_mcontext.regs.stack_pointer);
+	err |= __put_user(switch_stack->tls, &frame->uc.uc_mcontext.regs.tls);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	/* int sig_param */
+	err |= __put_user(ksig->sig, &frame->sig_param);
+
+	/* info_param and uc_param convenience pointers */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+		err |= __put_user(&frame->info, &frame->info_param);
+		err |= __put_user(&frame->uc, &frame->uc_param);
+	} else {
+		err |= __put_user(NULL, &frame->info_param);
+		err |= __put_user(NULL, &frame->uc_param);
+	}
+
+	/* __sighandler_t sa_handler */
+	err |= __put_user(ksig->ka.sa.sa_handler, &frame->sa_handler);
+
+	if (err)
+		return -EFAULT;
+
+	regs->stack_pointer = stack_pointer;
+	current_thread_info()->flags |= _TIF_DELIVER_SIGNAL;
+
+	return 0;
+}
+
+static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int ret;
+
+	/* If we're from a syscall, cancel syscall restarting if appropriate. */
+	if (regs->syscall_nr != -1) {
+		switch (regs->syscall_ret) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->syscall_ret = -EINTR;
+			break;
+
+		case -ERESTARTSYS:
+			if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
+				regs->syscall_ret = -EINTR;
+				break;
+			}
+			fallthrough;
+		case -ERESTARTNOINTR:
+		default:
+                        /* We will actually restart in these cases. */
+			break;
+		}
+	}
+
+	rseq_signal_deliver(ksig, regs);
+
+	/*
+	 * Wasm always uses rt-frames - the libc will have to figure out which
+	 * signature to call the handler with depending on if SA_SIGINFO is set.
+	 */
+	ret = setup_rt_frame(ksig, oldset, regs);
+
+	signal_setup_done(ret, ksig, 0);
+}
+
+void arch_do_signal_or_restart(struct pt_regs *regs)
+{
+	struct ksignal ksig;
+
+	if (get_signal(&ksig)) {
+		handle_signal(&ksig, regs);
+		return;
+	}
+
+	/*
+	 * Restart is handled in the syscall wrapper instead, as Wasm can't
+	 * fiddle with the instruction pointer to re-run the syscall. (Restart
+	 * may be canceled by handle_signal() above if we're handling a signal.)
+	 */
+
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask
+	 * back.
+	 */
+	restore_saved_sigmask();
+}
diff --git a/arch/wasm/kernel/smp.c b/arch/wasm/kernel/smp.c
new file mode 100644
index 000000000..c105e5259
--- /dev/null
+++ b/arch/wasm/kernel/smp.c
@@ -0,0 +1,344 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/clockchips.h>
+#include <linux/completion.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/irq_work.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/time.h>
+#include <asm/wasm.h>
+
+extern unsigned long long wasm_cpu_clock_get_monotonic(void);
+
+static DECLARE_COMPLETION(cpu_running);
+
+#if NR_IRQS > 32
+#error "NR_IRQS too high"
+#endif
+static DEFINE_PER_CPU(unsigned int, raised_irqs);
+
+#define TIMER_NEVER_EXPIRE (-1)
+static DEFINE_PER_CPU(long long, local_timer_expiries) = TIMER_NEVER_EXPIRE;
+
+enum ipi_type {
+	IPI_RESCHEDULE			= 0,
+	IPI_CALL_FUNC			= 1,
+	IPI_RECEIVE_BROADCAST		= 2,
+	IPI_IRQ_WORK			= 3,
+};
+#define IPI_MASK(ipi_type) ((unsigned int)(1U << (int)(ipi_type)))
+static DEFINE_PER_CPU(unsigned int, raised_ipis);
+
+void smp_send_stop(void)
+{
+	unsigned int cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	for_each_online_cpu(cpu) {
+		if (likely(cpu != this_cpu))
+			wasm_stop_cpu(cpu);
+	}
+}
+
+/* Run for each cpu except the first one, to bring the others up. */
+int __cpu_up(unsigned int cpu, struct task_struct *idle_task)
+{
+	/* Use 16-byte aligned stack to be able to call C functions. */
+	unsigned long stack_start = (unsigned long)idle_task & -16;
+
+	task_thread_info(idle_task)->cpu = cpu;
+
+	/* Needed so that __switch_to does not create a new Wasm task. */
+	task_thread_info(idle_task)->flags &= ~_TIF_NEVER_RUN;
+
+	reinit_completion(&cpu_running);
+
+	 /* Will create a new Wasm instance and call start_secondary(). */
+	wasm_start_cpu(cpu, idle_task, (unsigned long)stack_start);
+
+	/* Wait for CPU to finish startup & mark itself online before return. */
+	wait_for_completion(&cpu_running);
+	return 0;
+}
+
+/*
+ * First thing to run on the secondary CPUs.
+ *
+ * Launched by __cpu_up(), which calls out to the Wasm host. The Wasm host calls
+ * _start_secondary, which sets up the __stack_pointer and then calls us.
+ */
+__visible void start_secondary(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	notify_cpu_starting(cpu);
+	set_cpu_online(cpu, true);
+
+	enable_percpu_irq(WASM_IRQ_IPI, IRQ_TYPE_NONE);
+
+	/*
+	 * Notify boot CPU that we're up & online and it can safely return
+	 * from __cpu_up(). IPIs need to be enabled (enable_percpu_irq above).
+	 */
+	complete(&cpu_running);
+
+	wasm_clockevent_enable();
+
+	local_irq_enable();
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); /* Enter idle. */
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
+}
+
+__visible void raise_interrupt(int cpu, int irq_nr)
+{
+	/*
+	 * Note: this function may be called independent of the kernel, outside
+	 * any CPU or taks. Do not call kernel functions in here!
+	 *
+	 * per_cpu_ptr() is however safe to call (unlike e.g. this_cpu_ptr()).
+	 */
+	unsigned int *raised_irqs_ptr = per_cpu_ptr(&raised_irqs, cpu);
+
+	if (irq_nr >= NR_IRQS)
+		return;
+
+	__atomic_or_fetch(raised_irqs_ptr, 1U << irq_nr, __ATOMIC_SEQ_CST);
+	__builtin_wasm_memory_atomic_notify(raised_irqs_ptr, 1U);
+}
+
+static void send_ipi_message(int cpu, enum ipi_type ipi)
+{
+	unsigned int *raised_ipis_ptr = per_cpu_ptr(&raised_ipis, cpu);
+	__atomic_or_fetch(raised_ipis_ptr, IPI_MASK(ipi), __ATOMIC_SEQ_CST);
+
+	raise_interrupt(cpu, WASM_IRQ_IPI);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	/*
+	 * This is not ideal, as these can only be handled when the other CPU is
+	 * idle, but it allows using the kernel completion API when there is
+	 * really only one task running. This is the case when the primary CPU
+	 * is booting up secondaries, waiting for them to wake up and finish
+	 * their part of the boot process. Ideally, we'd get rid of IPI calls.
+	 */
+
+	preempt_disable();
+	send_ipi_message(cpu, IPI_CALL_FUNC);
+	preempt_enable();
+}
+
+void arch_smp_send_reschedule(int cpu)
+{
+	preempt_disable();
+	send_ipi_message(cpu, IPI_RESCHEDULE);
+	preempt_enable();
+}
+
+extern void arch_irq_work_raise(void)
+{
+	/* This is a special IPI sent to ourselves, to break out of context. */
+
+	preempt_disable();
+	send_ipi_message(smp_processor_id(), IPI_IRQ_WORK);
+	preempt_enable();
+}
+
+void tick_broadcast(const struct cpumask *mask)
+{
+	int cpu;
+
+	preempt_disable();
+
+	for_each_cpu(cpu, mask) {
+		send_ipi_message(cpu, IPI_RECEIVE_BROADCAST);
+	}
+
+	preempt_enable();
+}
+
+void wasm_program_timer(unsigned long delta)
+{
+	unsigned long long now;
+	unsigned long long expiry = 0ULL;
+
+	unsigned int *raised_irqs_ptr = this_cpu_ptr(&raised_irqs);
+	long long *expiry_ptr = this_cpu_ptr(&local_timer_expiries);
+
+	if (delta == 0UL) {
+		/* Optimization: set expiry to 0 to immediately expire. */
+	} else {
+		now = wasm_cpu_clock_get_monotonic();
+		expiry = now + (unsigned long long)delta;
+
+		/*
+		 * This overflow will realistically never happen. Calling panic
+		 * instead of returning a non-zero value is warranted, as the
+		 * calling code would otherwise enter an infinite loop...
+		 */
+		if (expiry < now || expiry > (unsigned long long)LLONG_MAX)
+			panic("clockevent expiry too large");
+	}
+
+	__atomic_store_n(expiry_ptr, (long long)expiry, __ATOMIC_SEQ_CST);
+
+	/*
+	 * We notify on raised_irqs since that's what we're waiting on in the
+	 * idle loop. It does not matter if it's still 0 - it will wake anyway.
+	 */
+	__builtin_wasm_memory_atomic_notify(raised_irqs_ptr, 1U);
+}
+
+static irqreturn_t handle_IPI(int irq_nr, void *dev_id)
+{
+	unsigned int *ipi_mask_ptr = dev_id;
+	unsigned int ipi_mask = __atomic_exchange_n(ipi_mask_ptr, 0U,
+						    __ATOMIC_SEQ_CST);
+
+	if (ipi_mask & IPI_MASK(IPI_RECEIVE_BROADCAST)) {
+		/* Useful in NO_HZ_FULL case where no task is running. */
+		tick_receive_broadcast();
+	}
+
+	if (ipi_mask & IPI_MASK(IPI_CALL_FUNC))
+		generic_smp_call_function_interrupt();
+
+	if (ipi_mask & IPI_MASK(IPI_RESCHEDULE))
+		scheduler_ipi();
+
+	if (ipi_mask & IPI_MASK(IPI_IRQ_WORK))
+		irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+void __init setup_smp_ipi(void)
+{
+	/* This is run on the boot cpu only. We need to enable others later. */
+
+	if (request_percpu_irq(WASM_IRQ_IPI, handle_IPI, "IPI", &raised_ipis))
+		panic("Failed to register IPI IRQ");
+
+	enable_percpu_irq(WASM_IRQ_IPI, IRQ_TYPE_NONE);
+}
+
+void arch_cpu_idle(void)
+{
+	/* Note: The idle task will not migrate so per_cpu state is stable. */
+	unsigned int *raised_irqs_ptr = this_cpu_ptr(&raised_irqs);
+	unsigned int raised_irqs;
+	long long *expiry_ptr = this_cpu_ptr(&local_timer_expiries);
+	long long expiry;
+	long long timeout;
+	unsigned long long now;
+	int irq_nr;
+
+	/*
+	 * This function is supposed to sleep until an interrupt comes in. The
+	 * fact these events can only be detected from the idle task makes these
+	 * "interrupts" unreliable unless there are no tasks on this CPU's
+	 * runqueue at all times. Therefore, one CPU (IRQ_CPU) is dedicated to
+	 * handle interrupts only, no user tasks are allowed to run on it.
+	 *
+	 * Additionally, the clockevent subsystem can wake us, either because it
+	 * wants to program a new timer expiry (arming or re-arming the timer),
+	 * or because an already armed timer is expiring. The clockevent
+	 * subsystem can also request a broadcast - i.e. waking up other CPUs
+	 * from a dedicated broadcast device (living on IRQ_CPU). It's important
+	 * that all CPUs can handle programming of timers, since it's being used
+	 * when the system boots (before NO_HZ_IDLE kicks in). Additionally,
+	 * some kernel functions (e.g. schedule_timeout()) rely on timers to
+	 * wake them up when no task is running on the CPU. These events and
+	 * broadcasts will of course happen in a best-effort fashion on CPUs
+	 * where there are tasks running, as they cannot be stopped.
+	 *
+	 * Wasm-specific wait primitives are used so that the Wasm VM can yield
+	 * to the host OS. In a sense, it's like calling schedule(), but on the
+	 * host. Callling schedule() here would just send us back, busy-waiting.
+	 */
+	for (;;) {
+		expiry = __atomic_load_n(expiry_ptr, __ATOMIC_SEQ_CST);
+
+reprocess:
+		if (expiry > 0LL) {
+			now = wasm_cpu_clock_get_monotonic();
+
+			/* This will realistically never happen. */
+			if (now > (unsigned long long)LLONG_MAX)
+				panic("time is too far into the future");
+
+			if ((long long)now >= expiry)
+				timeout = 0LL;
+			else
+				timeout = expiry - now;
+		} else {
+			/*
+			 * Just like magic:
+			 * If expiry is 0 => timeout becomes 0.
+			 * If expiry is forever => timeout becomes forever.
+			 */
+			timeout = expiry;
+		}
+
+		/* timeout == 0 iff the timer expired this iteration */
+		if (timeout == 0LL) {
+			/*
+			 * It may be tempting to raise the timer interrupt
+			 * already here, but that would not comply with the
+			 * clockevent API, which mandates that re-programming
+			 * of the device also cancels any pending event first.
+			 */
+
+			/* Try resetting the timer to never expire. */
+			if (!__atomic_compare_exchange_n(expiry_ptr, &expiry,
+					TIMER_NEVER_EXPIRE, false,
+					__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+				/*
+				 * Expiry changed under our rug - re-process it.
+				 * This goto is slightly faster than "continue;"
+				 * as the compare-and-swap above will already
+				 * have loaded the new expiry value on failure.
+				*/
+				goto reprocess;
+			}
+
+			raise_interrupt(smp_processor_id(), WASM_IRQ_TIMER);
+
+			if (smp_processor_id() != IRQ_CPU)
+				timeout = TIMER_NEVER_EXPIRE;
+		}
+
+		if (timeout != 0LL)
+			__builtin_wasm_memory_atomic_wait32(raised_irqs_ptr, 0U,
+							    timeout);
+
+		raised_irqs = __atomic_exchange_n(raised_irqs_ptr, 0U,
+						  __ATOMIC_SEQ_CST);
+
+		/*
+		 * In the case of some raised_irqs, handle it, then we will come
+		 * back here in a future invocation of this function. This
+		 * function retuns so that that idle framework can do its job,
+		 * for example if TIF_NEEDS_RESCHED is set by some IPI.
+		 */
+		if (raised_irqs)
+			break;
+	}
+
+	irq_nr = 0;
+	while (raised_irqs) {
+		if (raised_irqs & 1U)
+			do_irq_stacked(irq_nr);
+
+		raised_irqs >>= 1;
+		++irq_nr;
+	}
+}
diff --git a/arch/wasm/kernel/stack.c b/arch/wasm/kernel/stack.c
new file mode 100644
index 000000000..a0f1c314b
--- /dev/null
+++ b/arch/wasm/kernel/stack.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/kasan.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+
+struct task_struct *alloc_task_struct_node(int node)
+{
+	struct page *page =
+		alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
+
+	if (unlikely(!page))
+		return NULL;
+
+	return (struct task_struct *)((unsigned long)page_address(page) +
+		ALIGN_DOWN(THREAD_SIZE - sizeof(struct task_struct),
+			   L1_CACHE_BYTES));
+}
+
+void free_task_struct(struct task_struct *tsk)
+{
+	free_pages((unsigned long)tsk & THREAD_MASK, THREAD_SIZE_ORDER);
+}
diff --git a/arch/wasm/kernel/sys_wasm.c b/arch/wasm/kernel/sys_wasm.c
new file mode 100644
index 000000000..c3b2404e0
--- /dev/null
+++ b/arch/wasm/kernel/sys_wasm.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/syscalls.h>
+
+// SYS_mmap2()
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
+	unsigned long, prot, unsigned long, flags, unsigned long, fd,
+	unsigned long, pgoff)
+{
+	/*
+	 * The "page size" for mmap2 should always be 4K (shift 12). Some
+	 * architectures use their native page size or whatnot, and that's why
+	 * this syscall exists in its own version for each architecture...
+	 *
+	 * Some architectures check the alignment, but that's out of spec.
+	 */
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
+			       pgoff >> (PAGE_SHIFT - 12));
+}
diff --git a/arch/wasm/kernel/syscall_table.c b/arch/wasm/kernel/syscall_table.c
new file mode 100644
index 000000000..3c4e9b5e6
--- /dev/null
+++ b/arch/wasm/kernel/syscall_table.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/syscalls.h>
+#include <asm-generic/syscalls.h>
+#include <asm/syscall.h>
+
+/*
+ * We should probably use some soft variant of CONFIG_COMPAT yet to be invented.
+ * TODO: This hack should be replaced with proper selection of compat syscalls!
+ */
+
+ static long sys_truncate64_fixup(const char __user *pathname,
+				  unsigned long length_lo,
+				  unsigned long length_hi)
+{
+	return sys_truncate64(pathname,
+		((unsigned long long)length_hi << 32) | length_lo);
+}
+
+static long sys_ftruncate64_fixup(unsigned int fd,
+				  unsigned long length_lo,
+				  unsigned long length_hi)
+{
+	return sys_ftruncate64(fd,
+		((unsigned long long)length_hi << 32) | length_lo);
+}
+
+void (* const sys_call_table[__NR_syscalls])(void) = {
+	[0 ... __NR_syscalls-1] = (void (*)(void))sys_ni_syscall,
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (void (*)(void))(call),
+#include <asm/unistd.h>
+
+	[__NR_truncate64] = (void (*)(void))sys_truncate64_fixup,
+	[__NR_ftruncate64] = (void (*)(void))sys_ftruncate64_fixup,
+};
diff --git a/arch/wasm/kernel/time.c b/arch/wasm/kernel/time.c
new file mode 100644
index 000000000..af65bc3f0
--- /dev/null
+++ b/arch/wasm/kernel/time.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+
+extern unsigned long long wasm_cpu_clock_get_monotonic(void);
+
+/* Wasm clock source: derived from Wasm host cpu clock (monotonic). */
+
+static unsigned long long wasm_clocksource_read(struct clocksource *cs)
+{
+	return wasm_cpu_clock_get_monotonic();
+}
+
+static struct clocksource wasm_clocksource = {
+	.name = "wasm_cpu_clock",
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+	.rating = 200,
+	.read = wasm_clocksource_read,
+	.mask = CLOCKSOURCE_MASK(64),
+};
+
+static int __init wasm_clocksource_init(void)
+{
+	return clocksource_register_khz(&wasm_clocksource, 1000000U /* 1 ns */);
+}
+
+/* Wasm clock event: derived from Wasm atomic wait timeouts (in smp.c). */
+
+static int wasm_clockevent_set_next_event(unsigned long delta,
+	struct clock_event_device *dev)
+{
+	wasm_program_timer(delta);
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct clock_event_device, wasm_clockevents) = {
+	.name			= "wasm_timer",
+	.features		= CLOCK_EVT_FEAT_ONESHOT,
+	.rating			= 300,
+	.set_next_event		= wasm_clockevent_set_next_event,
+};
+
+static irqreturn_t __irq_entry wasm_timer_interrupt(int irq_nr, void *dev_id)
+{
+	struct clock_event_device *ce_dev = dev_id;
+
+	ce_dev->event_handler(ce_dev);
+
+	return IRQ_HANDLED;
+}
+
+static int __init wasm_clockevent_init(void)
+{
+	/* Requested here, enabled in wasm_clockevent_enable() for each cpu. */
+	return request_percpu_irq(WASM_IRQ_TIMER, wasm_timer_interrupt,
+				  "wasm-timer", &wasm_clockevents);
+}
+
+void wasm_clockevent_enable(void)
+{
+	struct clock_event_device *ce_dev = this_cpu_ptr(&wasm_clockevents);
+
+	ce_dev->cpumask = cpumask_of(smp_processor_id());
+	ce_dev->irq = WASM_IRQ_TIMER;
+	clockevents_config_and_register(ce_dev, 1000000000, 0, ~0U);
+
+	enable_percpu_irq(WASM_IRQ_TIMER, IRQ_TYPE_NONE);
+}
+
+/* Called very early in the boot, only CPU 0 is up so far! */
+void __init time_init(void)
+{
+	/* Time is an illusion and yet here we are... */
+	if (wasm_clocksource_init())
+		panic("Failed to initialize Wasm clocksource");
+
+	if (wasm_clockevent_init())
+		panic("Failed to initialize Wasm clock_event");
+
+	/* Only for CPU 0, secondaries will be enabled as they come up. */
+	wasm_clockevent_enable();
+}
diff --git a/arch/wasm/kernel/traps.c b/arch/wasm/kernel/traps.c
new file mode 100644
index 000000000..928a2338a
--- /dev/null
+++ b/arch/wasm/kernel/traps.c
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/entry-common.h>
+#include <linux/syscalls.h>
+#include <asm/cpuflags.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/syscall.h>
+
+static inline void exception_enter(struct pt_regs *regs)
+{
+	unsigned long *cpuflags = this_cpu_ptr(&wasm_cpuflags);
+
+	regs->cpuflags = *cpuflags;
+	*cpuflags &= ~(BIT(CPUFLAGS_USER_MODE) | BIT(CPUFLAGS_INTERRUPT));
+}
+
+static inline void exception_exit(struct pt_regs *regs)
+{
+	unsigned long *cpuflags = this_cpu_ptr(&wasm_cpuflags);
+
+	*cpuflags = regs->cpuflags;
+}
+
+#define WASM_SYSCALL_N(x, args, cast_args, ...)					\
+	__visible long __wasm_syscall_##x args					\
+	{									\
+		long syscall = n;						\
+		struct pt_regs *regs = current_pt_regs();			\
+		void (*fn)(void);						\
+		long syscall_args[] = {__MAP(x,__SC_ARGS,__VA_ARGS__)};		\
+		bool restart;							\
+										\
+		exception_enter(regs);						\
+										\
+		regs->syscall_nr = n;						\
+		memcpy(regs->syscall_args, syscall_args, sizeof(syscall_args));	\
+		regs->syscall_ret = -ENOSYS;					\
+										\
+		if (user_mode(regs)) {						\
+			do {							\
+				syscall = syscall_enter_from_user_mode(		\
+					regs, syscall);				\
+										\
+				if (syscall >= 0 && syscall < __NR_syscalls) {	\
+					fn = sys_call_table[syscall];		\
+					if (syscall == __NR_restart_syscall) {	\
+						regs->syscall_ret = sys_restart_syscall(); \
+					} else if (fn != (void (*)(void))sys_ni_syscall) { \
+						regs->syscall_ret = ((long (*)(cast_args)) \
+							fn)(__MAP(x,__SC_ARGS,__VA_ARGS__)); \
+					}					\
+				}						\
+										\
+				syscall_exit_to_user_mode(regs);		\
+										\
+				switch (regs->syscall_ret) {			\
+				case -ERESTART_RESTARTBLOCK:			\
+					syscall = __NR_restart_syscall;		\
+					fallthrough;				\
+				case -ERESTARTNOHAND:				\
+				case -ERESTARTSYS:				\
+				case -ERESTARTNOINTR:				\
+					restart = true;				\
+					break;					\
+				default:					\
+					restart = false;			\
+				}						\
+			} while (restart);					\
+		} else {							\
+			irqentry_state_t state = irqentry_nmi_enter(regs);	\
+										\
+			panic("Syscall called when in kernel mode");		\
+										\
+			irqentry_nmi_exit(regs, state);				\
+		}								\
+										\
+		exception_exit(regs);						\
+										\
+		return regs->syscall_ret;					\
+	}
+#define WASM_SYSCALL(x, ...) WASM_SYSCALL_N(					\
+	x,									\
+	(long n, __MAP(x,__SC_DECL,__VA_ARGS__)),				\
+	__MAP(x,__SC_DECL,__VA_ARGS__),						\
+	__VA_ARGS__)
+
+WASM_SYSCALL_N(0, (long n), void)
+WASM_SYSCALL(1, long, a)
+WASM_SYSCALL(2, long, a, long, b)
+WASM_SYSCALL(3, long, a, long, b, long, c)
+WASM_SYSCALL(4, long, a, long, b, long, c, long, d)
+WASM_SYSCALL(5, long, a, long, b, long, c, long, d, long, e)
+WASM_SYSCALL(6, long, a, long, b, long, c, long, d, long, e, long, f)
+
+/*
+ * Final check before syscall return (after pt_regs have been restored).
+ *
+ * If exec() was called, we reload user program code. If there is a signal
+ * handler to call, we call it. (Both will not happen, as exec blocks handlers.)
+ *
+ * Returns the direction of program flow:
+ * -1 if exec() was called and the Wasm host should reload the user program.
+ *  1 if a signal was delivered => the Wasm host should start signal handling.
+ *  2 if a sigreturn happened => the Wasm host should cancel signal handling.
+ *  3 if a signal was delivered AND a sigreturn (of an older signal handler)
+ *    happened => the Wasm host should first handle the new signal (stacked),
+ *    then cancel the old signal handler (after the stacked signal returns).
+ *  0 if nothing should be done and the syscall should return normally.
+ * In the case of exec(), the syscall should never fully return to the caller.
+ */
+int user_mode_tail(void)
+{
+	struct thread_info *thread_info = current_thread_info();
+	const bool reload = thread_info->flags & _TIF_RELOAD_PROGRAM;
+	const bool deliver = thread_info->flags & _TIF_DELIVER_SIGNAL;
+	const bool retn = thread_info->flags & _TIF_RETURN_SIGNAL;
+
+	if (reload) {
+		BUG_ON(deliver);
+		BUG_ON(retn);
+
+		thread_info->flags &= ~_TIF_RELOAD_PROGRAM;
+		return -1;
+	} else if (deliver || retn) {
+		BUG_ON(reload);
+
+		if (deliver)
+			thread_info->flags &= ~_TIF_DELIVER_SIGNAL;
+
+		if (retn)
+			thread_info->flags &= ~_TIF_RETURN_SIGNAL;
+
+		return (deliver ? 1 : 0) | (retn ? 2 : 0);
+	}
+
+	return 0;
+}
+
+static void do_irq(struct pt_regs *regs, int irq_nr)
+{
+	struct pt_regs *old_regs;
+	irqentry_state_t state = irqentry_enter(regs);
+
+	irq_enter_rcu();
+	old_regs = set_irq_regs(regs);
+	generic_handle_irq(irq_nr);
+	set_irq_regs(old_regs);
+	irq_exit_rcu();
+
+	irqentry_exit(regs, state);
+}
+
+void do_irq_stacked(int irq_nr)
+{
+	/*
+	 * This is a bit odd but somewhere in this function's frame we start an
+	 * exception frame. Exactly where the boundary is does not matter in
+	 * practice, some data may end up on either "wrong" end of the boundary.
+	 */
+	struct pt_regs regs = PT_REGS_INIT;
+	regs.stack_pointer = (unsigned long)&regs + sizeof(regs);
+	exception_enter(&regs);
+
+	do_irq(&regs, irq_nr);
+
+	exception_exit(&regs);
+}
+
+/* Do an exception. There are currently no exception types in Wasm. */
+static void do_exception(struct pt_regs *regs)
+{
+	/*
+	 * The host is currently responsible for reporting the full error. We
+	 * just mark this error as SIGILL but it could be anything.
+	 */
+	if (user_mode(regs)) {
+		irqentry_enter_from_user_mode(regs);
+		force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)0U);
+		irqentry_exit_to_user_mode(regs);
+	} else {
+		irqentry_state_t state = irqentry_nmi_enter(regs);
+		make_task_dead(SIGILL);
+		irqentry_nmi_exit(regs, state);
+	}
+}
+
+/*
+* This function is called from the host when things break either in kernel code
+* or user code. That code will never continue to execute - we have to report the
+* error and try to recover in the best way possible.
+*/
+__visible void raise_exception(void)
+{
+	/*
+	 * This is a bit odd but somewhere in this function's frame we start an
+	 * exception frame. Exactly where the boundary is does not matter in
+	 * practice, some data may end up on either "wrong" end of the boundary.
+	 */
+	struct pt_regs regs = PT_REGS_INIT;
+	regs.stack_pointer = (unsigned long)&regs + sizeof(regs);
+	exception_enter(&regs);
+
+	do_exception(&regs);
+
+	exception_exit(&regs);
+}
diff --git a/arch/wasm/kernel/vmlinux.lds.S b/arch/wasm/kernel/vmlinux.lds.S
new file mode 100644
index 000000000..1ae0641dc
--- /dev/null
+++ b/arch/wasm/kernel/vmlinux.lds.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/asm-offsets.h>
+
+/* Put init_task after init_stack in the thread stack. */
+#define INIT_TASK_OFFSET THREAD_TASK_STRUCT_OFFSET
+
+#include <asm/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+
+SECTIONS
+{
+	/* To refer to addres 0 in assembly, but as a relocation. */
+	zeroptr = 0;
+
+	/* Begin 1 Wasm page (65k) in so that we can dodge null-pointer. */
+	. = 0x10000;
+
+	__init_begin = .;
+	HEAD_TEXT_SECTION
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	INIT_DATA_SECTION(16)
+	PERCPU_SECTION(L1_CACHE_BYTES)
+	__init_end = .;
+
+	.text : {
+		_text = .;
+		_stext = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		ENTRY_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
+		_etext = .;
+	}
+
+	_sdata = .;
+	RO_DATA(PAGE_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	_edata = .;
+
+	EXCEPTION_TABLE(16)
+
+	BSS_SECTION(0, 0, 0)
+	_end = .;
+
+/*
+	Not supported by wasm-ld linker script hack:
+	STABS_DEBUG
+	DWARF_DEBUG
+	ELF_DETAILS
+*/
+
+	DISCARDS		// must be the last
+}
+
+/*
+ * Due to the way linker scripts are implemented in wasm-ld, any symbol-alias
+ * assignments have to happen after the symbol has been placed into the output.
+ */
+jiffies = jiffies_64;
diff --git a/arch/wasm/lib/Makefile b/arch/wasm/lib/Makefile
new file mode 100644
index 000000000..8e4e35012
--- /dev/null
+++ b/arch/wasm/lib/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+lib-y += delay.o
diff --git a/arch/wasm/lib/delay.c b/arch/wasm/lib/delay.c
new file mode 100644
index 000000000..4db76b463
--- /dev/null
+++ b/arch/wasm/lib/delay.c
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/delay.h>
+
+void calibrate_delay(void)
+{
+	/* Wasm convention: lpj = nanoseconds per Hz */
+	loops_per_jiffy = 1000000000 / HZ;
+}
+
+void __delay(unsigned long cycles)
+{
+	unsigned int dummy = 0U;
+
+	mb();
+	__builtin_wasm_memory_atomic_wait32(&dummy, 0U, (long long)cycles);
+	mb();
+}
+EXPORT_SYMBOL(__delay);
diff --git a/arch/wasm/mm/Makefile b/arch/wasm/mm/Makefile
new file mode 100644
index 000000000..661744a43
--- /dev/null
+++ b/arch/wasm/mm/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-y += init.o
diff --git a/arch/wasm/mm/init.c b/arch/wasm/mm/init.c
new file mode 100644
index 000000000..5469d62e5
--- /dev/null
+++ b/arch/wasm/mm/init.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <asm/page.h>
+
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
+void __init mem_init(void)
+{
+	/* These are needed by some code to know which pages are valid. */
+	high_memory = (void *)memory_end;
+	max_pfn = PFN_DOWN(memory_end);
+	min_low_pfn = PFN_DOWN(memory_start);
+	max_low_pfn = max_pfn;
+	set_max_mapnr(max_low_pfn - min_low_pfn);
+
+	memblock_free_all();
+}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b331a3947..86d5a3ee4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -404,11 +404,15 @@
 	. = ALIGN(align);						\
 	*(.data..cacheline_aligned)

+#ifndef INIT_TASK_OFFSET
+#define INIT_TASK_OFFSET (0)
+#endif
 #define INIT_TASK_DATA(align)						\
 	. = ALIGN(align);						\
 	__start_init_task = .;						\
 	init_thread_union = .;						\
 	init_stack = .;							\
+	. = . + INIT_TASK_OFFSET;					\
 	KEEP(*(.data..init_task))					\
 	KEEP(*(.data..init_thread_info))				\
 	. = __start_init_task + THREAD_SIZE;				\
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index d676ed2b2..f0aee0f22 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -437,6 +437,7 @@ enum {
 #define AUDIT_ARCH_TILEGX32	(EM_TILEGX|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_TILEPRO	(EM_TILEPRO|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_UNICORE	(EM_UNICORE|__AUDIT_ARCH_LE)
+#define AUDIT_ARCH_WASM32	(EM_WASM32|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_XTENSA	(EM_XTENSA)
 #define AUDIT_ARCH_LOONGARCH32	(EM_LOONGARCH|__AUDIT_ARCH_LE)
diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index ef38c2bc5..aaca659a7 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -52,6 +52,7 @@
 #define EM_BPF		247	/* Linux BPF - in-kernel virtual machine */
 #define EM_CSKY		252	/* C-SKY */
 #define EM_LOONGARCH	258	/* LoongArch */
+#define EM_WASM32	264	/* WebAssembly wasm32 */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */

 /*
diff --git a/scripts/Makefile.clang b/scripts/Makefile.clang
index 058a4c0f8..4253c0177 100644
--- a/scripts/Makefile.clang
+++ b/scripts/Makefile.clang
@@ -9,6 +9,7 @@ CLANG_TARGET_FLAGS_mips		:= mipsel-linux-gnu
 CLANG_TARGET_FLAGS_powerpc	:= powerpc64le-linux-gnu
 CLANG_TARGET_FLAGS_riscv	:= riscv64-linux-gnu
 CLANG_TARGET_FLAGS_s390		:= s390x-linux-gnu
+CLANG_TARGET_FLAGS_wasm		:= wasm32-unknown-unknown
 CLANG_TARGET_FLAGS_x86		:= x86_64-linux-gnu
 CLANG_TARGET_FLAGS_um		:= $(CLANG_TARGET_FLAGS_$(SUBARCH))
 CLANG_TARGET_FLAGS		:= $(CLANG_TARGET_FLAGS_$(SRCARCH))
diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
index 0edfdb403..9fdae5e6d 100644
--- a/scripts/Makefile.vmlinux_o
+++ b/scripts/Makefile.vmlinux_o
@@ -44,12 +44,20 @@ objtool-args = $(vmlinux-objtool-args-y) --link
 # Link of vmlinux.o used for section mismatch analysis
 # ---------------------------------------------------------------------------

+ifneq ($(ARCH),wasm)
+	circular-resolved-libs = -start-group $(KBUILD_VMLINUX_LIBS) --end-group
+else
+	# LLVM wasm-ld does not support --start-group and --end-group. This is
+	# not as good as grouping them, but it might just work!
+	circular-resolved-libs = $(KBUILD_VMLINUX_LIBS) $(KBUILD_VMLINUX_LIBS)
+endif
+
 quiet_cmd_ld_vmlinux.o = LD      $@
       cmd_ld_vmlinux.o = \
 	$(LD) ${KBUILD_LDFLAGS} -r -o $@ \
 	$(addprefix -T , $(initcalls-lds)) \
 	--whole-archive vmlinux.a --no-whole-archive \
-	--start-group $(KBUILD_VMLINUX_LIBS) --end-group \
+	$(circular-resolved-libs) \
 	$(cmd_objtool)

 define rule_ld_vmlinux.o
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index a432b171b..023eed789 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -66,7 +66,15 @@ vmlinux_link()
 		libs=
 	else
 		objs=vmlinux.a
-		libs="${KBUILD_VMLINUX_LIBS}"
+
+		if [ "${ARCH}" = "wasm" ]; then
+			# LLVM wasm-ld does not support --start-group and
+			# --end-group. This is not as good as grouping them, but
+			# it might just work!
+			libs="${KBUILD_VMLINUX_LIBS} ${KBUILD_VMLINUX_LIBS}"
+		else
+			libs="${wl}--start-group ${KBUILD_VMLINUX_LIBS} ${wl}--end-group"
+		fi
 	fi

 	if is_enabled CONFIG_MODULES; then
@@ -87,6 +95,16 @@ vmlinux_link()
 		ldlibs=
 	fi

+	# wasm-ld has very simple linker scripts and needs some extra setup.
+	if [ "${ARCH}" = "wasm" ]; then
+		ldflags="${ldflags} --no-entry --error-limit=0"
+		ldflags="${ldflags} --export-all --export-table"
+		ldflags="${ldflags} --no-merge-data-segments -no-gc-sections"
+		ldflags="${ldflags} --import-memory --shared-memory"
+		ldflags="${ldflags} --max-memory=$((1<<32))"
+		ldflags="${ldflags} --import-undefined"
+	fi
+
 	ldflags="${ldflags} ${wl}--script=${objtree}/${KBUILD_LDS}"

 	# The kallsyms linking does not need debug symbols included.
@@ -100,8 +118,7 @@ vmlinux_link()

 	${ld} ${ldflags} -o ${output}					\
 		${wl}--whole-archive ${objs} ${wl}--no-whole-archive	\
-		${wl}--start-group ${libs} ${wl}--end-group		\
-		$@ ${ldlibs}
+		${libs} $@ ${ldlibs}
 }

 # generate .BTF typeinfo from DWARF debuginfo
--
2.25.1