From efdef05f887b3ea571b329f0b2a52d062635fe13 Mon Sep 17 00:00:00 2001 From: Joel Severin Date: Sun, 14 Sep 2025 17:09:39 +0200 Subject: [PATCH] Add Wasm architecture This is the bare minimum arch-specific code needed to get Linux to boot on Wasm (WebAssembly). --- Makefile | 9 +- arch/wasm/Kbuild | 1 + arch/wasm/Kconfig | 78 ++++++ arch/wasm/Kconfig.debug | 10 + arch/wasm/Makefile | 24 ++ arch/wasm/include/asm/Kbuild | 58 ++++ arch/wasm/include/asm/barrier.h | 16 ++ arch/wasm/include/asm/cache.h | 12 + arch/wasm/include/asm/cmpxchg.h | 111 ++++++++ arch/wasm/include/asm/cpuflags.h | 22 ++ arch/wasm/include/asm/current.h | 36 +++ arch/wasm/include/asm/delay.h | 25 ++ arch/wasm/include/asm/elf.h | 66 +++++ arch/wasm/include/asm/entry-common.h | 16 ++ arch/wasm/include/asm/futex.h | 68 +++++ arch/wasm/include/asm/irq.h | 11 + arch/wasm/include/asm/irq_work.h | 13 + arch/wasm/include/asm/irqflags.h | 14 + arch/wasm/include/asm/linkage.h | 22 ++ arch/wasm/include/asm/mmu_context.h | 8 + arch/wasm/include/asm/panic.h | 15 ++ arch/wasm/include/asm/pgtable.h | 45 ++++ arch/wasm/include/asm/processor.h | 50 ++++ arch/wasm/include/asm/ptrace.h | 35 +++ arch/wasm/include/asm/smp.h | 28 ++ arch/wasm/include/asm/stacktrace.h | 20 ++ arch/wasm/include/asm/syscall.h | 62 +++++ arch/wasm/include/asm/thread_info.h | 105 ++++++++ arch/wasm/include/asm/time.h | 9 + arch/wasm/include/asm/vmalloc.h | 6 + arch/wasm/include/asm/wasm.h | 29 ++ arch/wasm/include/uapi/asm/Kbuild | 2 + arch/wasm/include/uapi/asm/byteorder.h | 8 + arch/wasm/include/uapi/asm/ptrace.h | 39 +++ arch/wasm/include/uapi/asm/sigcontext.h | 13 + arch/wasm/include/uapi/asm/unistd.h | 6 + arch/wasm/kernel/Makefile | 21 ++ arch/wasm/kernel/asm-offsets.c | 36 +++ arch/wasm/kernel/cpu.c | 46 ++++ arch/wasm/kernel/cpuflags.c | 5 + arch/wasm/kernel/entry.S | 299 ++++++++++++++++++++ arch/wasm/kernel/head.S | 110 ++++++++ arch/wasm/kernel/irq.c | 55 ++++ arch/wasm/kernel/irqflags.c | 21 ++ arch/wasm/kernel/process.c | 282 +++++++++++++++++++ arch/wasm/kernel/ptrace.c | 13 + arch/wasm/kernel/reboot.c | 31 +++ arch/wasm/kernel/setup.c | 84 ++++++ arch/wasm/kernel/signal.c | 189 +++++++++++++ arch/wasm/kernel/smp.c | 344 ++++++++++++++++++++++++ arch/wasm/kernel/stack.c | 26 ++ arch/wasm/kernel/sys_wasm.c | 19 ++ arch/wasm/kernel/syscall_table.c | 37 +++ arch/wasm/kernel/time.c | 88 ++++++ arch/wasm/kernel/traps.c | 207 ++++++++++++++ arch/wasm/kernel/vmlinux.lds.S | 65 +++++ arch/wasm/lib/Makefile | 3 + arch/wasm/lib/delay.c | 19 ++ arch/wasm/mm/Makefile | 3 + arch/wasm/mm/init.c | 21 ++ include/asm-generic/vmlinux.lds.h | 4 + include/uapi/linux/audit.h | 1 + include/uapi/linux/elf-em.h | 1 + scripts/Makefile.clang | 1 + scripts/Makefile.vmlinux_o | 10 +- scripts/link-vmlinux.sh | 23 +- 66 files changed, 3151 insertions(+), 5 deletions(-) create mode 100644 arch/wasm/Kbuild create mode 100644 arch/wasm/Kconfig create mode 100644 arch/wasm/Kconfig.debug create mode 100644 arch/wasm/Makefile create mode 100644 arch/wasm/include/asm/Kbuild create mode 100644 arch/wasm/include/asm/barrier.h create mode 100644 arch/wasm/include/asm/cache.h create mode 100644 arch/wasm/include/asm/cmpxchg.h create mode 100644 arch/wasm/include/asm/cpuflags.h create mode 100644 arch/wasm/include/asm/current.h create mode 100644 arch/wasm/include/asm/delay.h create mode 100644 arch/wasm/include/asm/elf.h create mode 100644 arch/wasm/include/asm/entry-common.h create mode 100644 arch/wasm/include/asm/futex.h create mode 100644 arch/wasm/include/asm/irq.h create mode 100644 arch/wasm/include/asm/irq_work.h create mode 100644 arch/wasm/include/asm/irqflags.h create mode 100644 arch/wasm/include/asm/linkage.h create mode 100644 arch/wasm/include/asm/mmu_context.h create mode 100644 arch/wasm/include/asm/panic.h create mode 100644 arch/wasm/include/asm/pgtable.h create mode 100644 arch/wasm/include/asm/processor.h create mode 100644 arch/wasm/include/asm/ptrace.h create mode 100644 arch/wasm/include/asm/smp.h create mode 100644 arch/wasm/include/asm/stacktrace.h create mode 100644 arch/wasm/include/asm/syscall.h create mode 100644 arch/wasm/include/asm/thread_info.h create mode 100644 arch/wasm/include/asm/time.h create mode 100644 arch/wasm/include/asm/vmalloc.h create mode 100644 arch/wasm/include/asm/wasm.h create mode 100644 arch/wasm/include/uapi/asm/Kbuild create mode 100644 arch/wasm/include/uapi/asm/byteorder.h create mode 100644 arch/wasm/include/uapi/asm/ptrace.h create mode 100644 arch/wasm/include/uapi/asm/sigcontext.h create mode 100644 arch/wasm/include/uapi/asm/unistd.h create mode 100644 arch/wasm/kernel/Makefile create mode 100644 arch/wasm/kernel/asm-offsets.c create mode 100644 arch/wasm/kernel/cpu.c create mode 100644 arch/wasm/kernel/cpuflags.c create mode 100644 arch/wasm/kernel/entry.S create mode 100644 arch/wasm/kernel/head.S create mode 100644 arch/wasm/kernel/irq.c create mode 100644 arch/wasm/kernel/irqflags.c create mode 100644 arch/wasm/kernel/process.c create mode 100644 arch/wasm/kernel/ptrace.c create mode 100644 arch/wasm/kernel/reboot.c create mode 100644 arch/wasm/kernel/setup.c create mode 100644 arch/wasm/kernel/signal.c create mode 100644 arch/wasm/kernel/smp.c create mode 100644 arch/wasm/kernel/stack.c create mode 100644 arch/wasm/kernel/sys_wasm.c create mode 100644 arch/wasm/kernel/syscall_table.c create mode 100644 arch/wasm/kernel/time.c create mode 100644 arch/wasm/kernel/traps.c create mode 100644 arch/wasm/kernel/vmlinux.lds.S create mode 100644 arch/wasm/lib/Makefile create mode 100644 arch/wasm/lib/delay.c create mode 100644 arch/wasm/mm/Makefile create mode 100644 arch/wasm/mm/init.c diff --git a/Makefile b/Makefile index 34ea74d74..c69000c85 100644 --- a/Makefile +++ b/Makefile @@ -479,7 +479,11 @@ KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) CPP = $(CC) -E ifneq ($(LLVM),) CC = $(LLVM_PREFIX)clang$(LLVM_SUFFIX) -LD = $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX) +ifneq ($(ARCH),wasm) + LD = $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX) +else + LD = $(LLVM_PREFIX)wasm-ld$(LLVM_SUFFIX) +endif AR = $(LLVM_PREFIX)llvm-ar$(LLVM_SUFFIX) NM = $(LLVM_PREFIX)llvm-nm$(LLVM_SUFFIX) OBJCOPY = $(LLVM_PREFIX)llvm-objcopy$(LLVM_SUFFIX) @@ -1100,8 +1104,11 @@ KBUILD_AFLAGS += $(KAFLAGS) KBUILD_CFLAGS += $(KCFLAGS) KBUILD_RUSTFLAGS += $(KRUSTFLAGS) +# Not supported in Wasm binaries yet, PR seems to be in the works (LLVM D107662). +ifneq ($(ARCH),wasm) KBUILD_LDFLAGS_MODULE += --build-id=sha1 LDFLAGS_vmlinux += --build-id=sha1 +endif KBUILD_LDFLAGS += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) diff --git a/arch/wasm/Kbuild b/arch/wasm/Kbuild new file mode 100644 index 000000000..a4e40e534 --- /dev/null +++ b/arch/wasm/Kbuild @@ -0,0 +1 @@ +# SPDX-License-Identifier: GPL-2.0-only diff --git a/arch/wasm/Kconfig b/arch/wasm/Kconfig new file mode 100644 index 000000000..f6e566f50 --- /dev/null +++ b/arch/wasm/Kconfig @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menu "Wasm-specific options" + +# Wasm must run on many CPUs, as a task cannot be preempted, unless terminated. +# Each CPU becomes a thread in the host OS, and is handled by its scheduler. +# There is no MMU support in the current version of WebAssembly. + +config WASM + bool + default y + # The execution model of one task per cpu mandates the below options. + # One CPU is kept clear of tasks to act as a tick broadcast device. + select SMP + # PREEMPTION and PREEMPT_COUNT is not set, disallowing kernel preemption + select ARCH_NO_PREEMPT + select GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + # Needed by NO_HZ_FULL: + select HAVE_VIRT_CPU_ACCOUNTING_GEN + # TODO: Check that we comply with the user tracking requirements! + select HAVE_CONTEXT_TRACKING_USER + + select NO_IP + select THREAD_INFO_IN_TASK + select ARCH_TASK_STRUCT_ON_STACK + select ARCH_TASK_STRUCT_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR + select GENERIC_SMP_IDLE_THREAD + select UACCESS_MEMCPY + select ARCH_USE_QUEUED_RWLOCKS + select GENERIC_CPU_DEVICES + select GENERIC_CSUM + select GENERIC_ENTRY + select GENERIC_HWEIGHT + select GENERIC_IRQ_SHOW + select HAVE_SYSCALL_TRACEPOINTS + select ARCH_HAVE_PANIC_NOTIFY + select ARCH_USE_BUILTIN_BSWAP + select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_LTO_CLANG_THIN + + # TODO: Very inefficient, replace with native stuff. Our atomic impl. + # of xchg and cmpxchg already supports 64-bit integers, we could use it. + select GENERIC_ATOMIC64 + +config SMP + bool "Symmetric Multi-Processing" + help + This enables support for systems with more than one CPU. In the + context of Wasm, every task needs one CPU, since there is no + preemption and no interrupts. If you say N here, you will only ever + be able to run one task. Only do this if you really know what + you're doing - there is a big risk you will lock up your system. + + If you don't know what to do here, say Y. + +config HZ + int + default 100 + +config NR_CPUS + int + range 1 8192 + default 64 + +config GENERIC_CSUM + def_bool y + +config GENERIC_HWEIGHT + def_bool y + +config ARCH_HAVE_PANIC_NOTIFY + bool + +endmenu + +source "drivers/Kconfig" diff --git a/arch/wasm/Kconfig.debug b/arch/wasm/Kconfig.debug new file mode 100644 index 000000000..8fc81eafa --- /dev/null +++ b/arch/wasm/Kconfig.debug @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config EARLY_PRINTK + bool "Early printk" + default y + help + Write kernel log output directly to console.log. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. diff --git a/arch/wasm/Makefile b/arch/wasm/Makefile new file mode 100644 index 000000000..b86103e0b --- /dev/null +++ b/arch/wasm/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0-only + +KBUILD_DEFCONFIG := wasm_defconfig + +KCFLAGS += -EL -m32 +KCFLAGS += -nostdlib -fno-builtin + +# These flags are needed so that wasm-ld can be run with --shared-memory. +KCFLAGS += -Xclang -target-feature -Xclang +atomics +KCFLAGS += -Xclang -target-feature -Xclang +bulk-memory + +core-y += arch/wasm/kernel/ +core-y += arch/wasm/mm/ +libs-y += arch/wasm/lib/ + +PHONY += bzImage + +all: bzImage + +bzImage: vmlinux + +define archhelp + echo '* bzImage - Compressed kernel image (arch/wasm/boot/bzImage)' +endef diff --git a/arch/wasm/include/asm/Kbuild b/arch/wasm/include/asm/Kbuild new file mode 100644 index 000000000..876a533cd --- /dev/null +++ b/arch/wasm/include/asm/Kbuild @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: GPL-2.0-only + +# TODO: Clean up headers that are not used by this arch. +generic-y += access_ok.h +generic-y += agp.h +generic-y += asm-offsets.h +generic-y += asm-prototypes.h +generic-y += atomic64.h +generic-y += audit_change_attr.h +generic-y += audit_dir_write.h +generic-y += audit_read.h +generic-y += audit_signal.h +generic-y += audit_write.h +generic-y += bitsperlong.h +generic-y += cmpxchg-local.h +generic-y += early_ioremap.h +generic-y += error-injection.h +generic-y += export.h +generic-y += extable.h +generic-y += fixmap.h +generic-y += flat.h +generic-y += getorder.h +generic-y += hugetlb.h +generic-y += hyperv-tlfs.h +generic-y += ide_iops.h +generic-y += int-ll64.h +generic-y += ioctl.h +generic-y += iomap.h +generic-y += kvm_para.h +generic-y += kvm_types.h +generic-y += logic_io.h +generic-y += mcs_spinlock.h +generic-y += memory_model.h +generic-y += mm_hooks.h +generic-y += mmiowb_types.h +generic-y += mshyperv.h +generic-y += numa.h +generic-y += page.h +generic-y += param.h +generic-y += parport.h +generic-y += pci_iomap.h +generic-y += qrwlock.h +generic-y += qrwlock_types.h +generic-y += qspinlock.h +generic-y += qspinlock_types.h +generic-y += resource.h +generic-y += seccomp.h +generic-y += set_memory.h +generic-y += signal.h +generic-y += spinlock.h +generic-y += spinlock_types.h +generic-y += statfs.h +generic-y += string.h +generic-y += syscalls.h +generic-y += tlb.h +generic-y += user.h +generic-y += vmlinux.lds.h +generic-y += vtime.h diff --git a/arch/wasm/include/asm/barrier.h b/arch/wasm/include/asm/barrier.h new file mode 100644 index 000000000..86d3fc9b2 --- /dev/null +++ b/arch/wasm/include/asm/barrier.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_BARRIER_H +#define _ASM_WASM_BARRIER_H + +/* + * Inspired by: + * https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0124r7.html + */ +#define mb() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define rmb() __atomic_thread_fence(__ATOMIC_ACQ_REL) +#define wmb() __atomic_thread_fence(__ATOMIC_ACQ_REL) + +#include + +#endif /* _ASM_WASM_BARRIER_H */ diff --git a/arch/wasm/include/asm/cache.h b/arch/wasm/include/asm/cache.h new file mode 100644 index 000000000..1abcb0191 --- /dev/null +++ b/arch/wasm/include/asm/cache.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_CACHE_H +#define _ASM_WASM_CACHE_H + +/* + * Most architectures executing Wasm code has a cacheline size of 64 bytes. + */ +#define L1_CACHE_SHIFT 6 +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#endif /* _ASM_WASM_CACHE_H */ diff --git a/arch/wasm/include/asm/cmpxchg.h b/arch/wasm/include/asm/cmpxchg.h new file mode 100644 index 000000000..a870f2682 --- /dev/null +++ b/arch/wasm/include/asm/cmpxchg.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_CMPXCHG_H +#define _ASM_WASM_CMPXCHG_H + +#include +#include + +/* + * Inspired by: + * https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0124r7.html + * https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/tree/include/asm-generic/iso-cmpxchg.h?h=iso-atomic + * + * TODO: McKenney et. al. above mention that atomic operations that return a + * value should be marked with __ATOMIC_RELAXED and wrapped with + * smp_mb__before_atomic()/smp_mb__after_atomic() calls. Howells above, + * however, just applies __ATOMIC_SEQ_CST. What is the best approach? + */ + +/* + * This function doesn't exist, so you'll get a linker error if + * something tries to do an invalidly-sized xchg(). + */ +extern unsigned long long __generic_xchg_called_with_bad_pointer(void); + +static __always_inline unsigned long long __generic_xchg( + unsigned long long val, volatile void *ptr, int size) +{ + switch (size) { + case 1: + return __atomic_exchange_n( + (volatile u8 *)ptr, (u8)val, __ATOMIC_SEQ_CST); + + case 2: + return __atomic_exchange_n( + (volatile u16 *)ptr, (u16)val, __ATOMIC_SEQ_CST); + + case 4: + return __atomic_exchange_n( + (volatile u32 *)ptr, (u32)val, __ATOMIC_SEQ_CST); + + case 8: + return __atomic_exchange_n( + (volatile u64 *)ptr, (u64)val, __ATOMIC_SEQ_CST); + + default: + return __generic_xchg_called_with_bad_pointer(); + } +} + +#define arch_xchg(ptr, x) ({ \ + ((__typeof__(*(ptr))) __generic_xchg((unsigned long long)(x), (ptr), \ + sizeof(*(ptr)))); \ +}) + +static __always_inline unsigned long long __generic_cmpxchg(volatile void *ptr, + unsigned long long oldVal, unsigned long long newVal, int size) +{ + /* + * Unlike this functions' signature, __atomic_compare_exchange_n will + * modify oldVal with the actual value if the compare fails. + */ + u8 expected8; + u16 expected16; + u32 expected32; + u64 expected64; + + switch (size) { + case 1: + expected8 = (u8)oldVal; + __atomic_compare_exchange_n( + (volatile u8 *)ptr, &expected8, (u8)newVal, + false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); + return expected8; + + case 2: + expected16 = (u16)oldVal; + __atomic_compare_exchange_n( + (volatile u16 *)ptr, &expected16, (u16)newVal, + false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); + return expected16; + + case 4: + expected32 = (u32)oldVal; + __atomic_compare_exchange_n( + (volatile u32 *)ptr, &expected32, (u32)newVal, + false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); + return expected32; + + case 8: + expected64 = (u64)oldVal; + __atomic_compare_exchange_n( + (volatile u64 *)ptr, &expected64, (u64)newVal, + false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); + return expected64; + + default: + return __generic_xchg_called_with_bad_pointer(); + } +} + +#define arch_cmpxchg(ptr, o, n) ({ \ + ((__typeof__(*(ptr)))__generic_cmpxchg((ptr), (unsigned long long)(o), \ + (unsigned long long)(n), sizeof(*(ptr)))); \ +}) + +#define arch_cmpxchg64 arch_cmpxchg +#define arch_cmpxchg_local arch_cmpxchg +#define arch_cmpxchg64_local arch_cmpxchg + +#endif /* _ASM_WASM_CMPXCHG_H */ diff --git a/arch/wasm/include/asm/cpuflags.h b/arch/wasm/include/asm/cpuflags.h new file mode 100644 index 000000000..365502f4f --- /dev/null +++ b/arch/wasm/include/asm/cpuflags.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_CPUFLAGS_H +#define _ASM_WASM_CPUFLAGS_H + +#include + +/* + * CPU flags handled by Wasm. These are used for accounting in many places. + * Interrupt: 1 if local interrupts are enabled. + * User mode: 1 if we're not in privileged mode. + * + * The reset value is 0: we boot into privileged mode with interrupts disabled. + */ +#define CPUFLAGS_INTERRUPT 0 +#define CPUFLAGS_USER_MODE 1 + +#define CPUFLAGS_RESET_VALUE 0U + +DECLARE_PER_CPU(unsigned long, wasm_cpuflags); + +#endif /* _ASM_WASM_CPUFLAGS_H */ diff --git a/arch/wasm/include/asm/current.h b/arch/wasm/include/asm/current.h new file mode 100644 index 000000000..5f104a966 --- /dev/null +++ b/arch/wasm/include/asm/current.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_CURRENT_H +#define _ASM_WASM_CURRENT_H + +/* + * Questionable but necessary to keep get_current() inline, due to the cyclic + * dependency between task_struct and thread_info. + */ +#ifndef ASM_OFFSETS_C +#include +#endif + +#ifndef __ASSEMBLY__ + +#include +#include + +struct task_struct; + +static inline struct task_struct *get_current(void) +{ +#ifndef ASM_OFFSETS_C + char dummy; /* Something stored in the current kernel stack. */ + unsigned long thread_page = (unsigned long)&dummy & THREAD_MASK; + return (struct task_struct *)(thread_page + THREAD_TASK_STRUCT_OFFSET); +#else + return NULL; +#endif +} + +#define current (get_current()) + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_WASM_CURRENT_H */ diff --git a/arch/wasm/include/asm/delay.h b/arch/wasm/include/asm/delay.h new file mode 100644 index 000000000..0e3bd9346 --- /dev/null +++ b/arch/wasm/include/asm/delay.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_DELAY_H +#define _ASM_WASM_DELAY_H + +extern void __delay(unsigned long loops); +extern void __bad_udelay(void); +extern void __bad_ndelay(void); + +/* + * Wasm uses 1 loop = 1 nanosecond. This makes the conversion easy. + * + * Just like the rest of the kernel, these macros polices you if you try to + * delay for too long. You should use a sleep function that calls schedule() + * internally if you need longer sleeps than this. In Wasm in particular, usage + * of these macros is really discouraged (what are you busy-waiting for?). + */ + +#define udelay(n) (__builtin_constant_p(n) && (n) > 20000 ? \ + __bad_udelay() : __delay((n) * 1000)) + +#define ndelay(n) (__builtin_constant_p(n) && (n) > 20000000 ? \ + __bad_ndelay() : __delay(n)) + +#endif /* _ASM_WASM_DELAY_H */ diff --git a/arch/wasm/include/asm/elf.h b/arch/wasm/include/asm/elf.h new file mode 100644 index 000000000..3a02588f9 --- /dev/null +++ b/arch/wasm/include/asm/elf.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_ELF_H +#define _ASM_WASM_ELF_H + +#include +#include +#include +#include +#include + +#define ELF_ARCH EM_WASM +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB + +#define elf_check_arch(x) (((x)->e_machine == ELF_ARCH) && \ + ((x)->e_ident[EI_CLASS] == ELF_CLASS)) + +extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); +#define compat_elf_check_arch compat_elf_check_arch + +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE (PAGE_SIZE) + +/* + * This is the location that an ET_DYN program is loaded if exec'ed. Typical + * use of this is to invoke "./ld.so someprog" to test out a new version of + * the loader. We need to make sure that it is out of the way of the program + * that it will "exec", and that there is sufficient room for the brk. + */ +#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) + +#ifdef CONFIG_64BIT +#ifdef CONFIG_COMPAT +#define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ + 0x7ff >> (PAGE_SHIFT - 12) : \ + 0x3ffff >> (PAGE_SHIFT - 12)) +#else +#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) +#endif +#endif + +/* + * Provides information on the availiable set of ISA extensions to userspace, + * via a bitmap that coorespends to each single-letter ISA extension. This is + * essentially defunct, but will remain for compatibility with userspace. + */ +#define ELF_HWCAP (elf_hwcap & ((1UL << RISCV_ISA_EXT_BASE) - 1)) +extern unsigned long elf_hwcap; + +/* + * This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + */ +#define ELF_PLATFORM (NULL) + +#define COMPAT_ELF_PLATFORM (NULL) + +#define ELF_CORE_COPY_REGS(dest, regs) \ +do { \ + *(struct user_regs_struct *)&(dest) = \ + *(struct user_regs_struct *)regs; \ +} while (0); + +#endif /* _ASM_WASM_ELF_H */ diff --git a/arch/wasm/include/asm/entry-common.h b/arch/wasm/include/asm/entry-common.h new file mode 100644 index 000000000..20155e98f --- /dev/null +++ b/arch/wasm/include/asm/entry-common.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_ENTRY_COMMON_H +#define _ASM_WASM_ENTRY_COMMON_H + +#include + +/* + * Needed by common/entry.c. Returning -1 signals failure, should it ever run... + * + * Wasm could in theory support seccomp, but the transformation from non-seccomp + * to seccomp mode would require quite a bit of thought to get everything right. + */ +#define __secure_computing(...) (-1) + +#endif /* _ASM_WASM_ENTRY_COMMON_H */ diff --git a/arch/wasm/include/asm/futex.h b/arch/wasm/include/asm/futex.h new file mode 100644 index 000000000..05f901e4e --- /dev/null +++ b/arch/wasm/include/asm/futex.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_FUTEX_H +#define _ASM_WASM_FUTEX_H + +#include +#include +#include + +#define FUTEX_MAX_LOOPS 128 + +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int loops; + u32 expected; + + if (!access_ok(uaddr, sizeof(u32))) + return -EFAULT; + + for (loops = 0; loops < FUTEX_MAX_LOOPS; ++loops) { + expected = oldval; + if (__atomic_compare_exchange_n((volatile u32 *)uaddr, + &expected, newval, false, __ATOMIC_SEQ_CST, + __ATOMIC_RELAXED)) { + *uval = oldval; + return 0; + } + } + + return -EAGAIN; +} + +static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, + u32 __user *uaddr) +{ + if (!access_ok(uaddr, sizeof(u32))) + return -EFAULT; + + switch (op) { + case FUTEX_OP_SET: + *oval = __atomic_exchange_n( + (volatile u32 *)uaddr, oparg, __ATOMIC_SEQ_CST); + break; + case FUTEX_OP_ADD: + *oval = __atomic_fetch_add( + (volatile u32 *)uaddr, oparg, __ATOMIC_SEQ_CST); + break; + case FUTEX_OP_OR: + *oval = __atomic_fetch_or( + (volatile u32 *)uaddr, oparg, __ATOMIC_SEQ_CST); + break; + case FUTEX_OP_ANDN: + *oval = __atomic_fetch_and( + (volatile u32 *)uaddr, ~oparg, __ATOMIC_SEQ_CST); + break; + case FUTEX_OP_XOR: + *oval = __atomic_fetch_xor( + (volatile u32 *)uaddr, oparg, __ATOMIC_SEQ_CST); + break; + default: + return -ENOSYS; + } + + return 0; +} + +#endif /* _ASM_WASM_FUTEX_H */ diff --git a/arch/wasm/include/asm/irq.h b/arch/wasm/include/asm/irq.h new file mode 100644 index 000000000..5069bef1f --- /dev/null +++ b/arch/wasm/include/asm/irq.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_IRQ_H +#define _ASM_WASM_IRQ_H + +#define NR_IRQS 32 + +#define WASM_IRQ_IPI 0 +#define WASM_IRQ_TIMER 1 + +#endif /* _ASM_WASM_IRQ_H */ diff --git a/arch/wasm/include/asm/irq_work.h b/arch/wasm/include/asm/irq_work.h new file mode 100644 index 000000000..fa9c40b0d --- /dev/null +++ b/arch/wasm/include/asm/irq_work.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_IRQ_WORK_H +#define _ASM_WASM_IRQ_WORK_H + +extern void arch_irq_work_raise(void); + +static inline bool arch_irq_work_has_interrupt(void) +{ + return true; +} + +#endif /* _ASM_WASM_IRQ_WORK_H */ diff --git a/arch/wasm/include/asm/irqflags.h b/arch/wasm/include/asm/irqflags.h new file mode 100644 index 000000000..337a882f9 --- /dev/null +++ b/arch/wasm/include/asm/irqflags.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_IRQFLAGS_H +#define _ASM_WASM_IRQFLAGS_H + +/* + * arch_local_save_flags and arch_local_irq_restore are defined as non-static + * functions as this header is included from places where percpu-variables and + * even definitions for raw_smp_processor_id() cannot be included... + */ + +#include + +#endif /* _ASM_WASM_IRQFLAGS_H */ diff --git a/arch/wasm/include/asm/linkage.h b/arch/wasm/include/asm/linkage.h new file mode 100644 index 000000000..49f6776c5 --- /dev/null +++ b/arch/wasm/include/asm/linkage.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_LINKAGE_H +#define _ASM_WASM_LINKAGE_H + +/* + * llvm-wasm crashes when generating the output file using the default + * definition in linux/linking.h. In addition to this, wasm-ld does not like it + * if two function signatures don't match, ruling out using a function with + * __attribute__ ((weak, alias("sys_ni_syscall"))) here, even if it is lacking a + * prototype (it assumes "one" (void) param). + * + * This has to be fixed by the host (or possibly some post-process build script) + * because there is no way to tell which prototype to use for which symbol. + * Getting rid of these stray declarations to begin with (e.g. setting + * ARCH_HAS_SYSCALL_WRAPPER) unfortunately causes problems for the + * sys_call_table generation. sys_call_table could be generated in some other + * way (or shape) but that would require other hacks to find available syscalls. + */ +#define cond_syscall(x) + +#endif /* _ASM_WASM_LINKAGE_H */ diff --git a/arch/wasm/include/asm/mmu_context.h b/arch/wasm/include/asm/mmu_context.h new file mode 100644 index 000000000..e9414c5c0 --- /dev/null +++ b/arch/wasm/include/asm/mmu_context.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_MMU_CONTEXT_H +#define _ASM_WASM_MMU_CONTEXT_H + +#include + +#endif /* _ASM_WASM_MMU_CONTEXT_H */ diff --git a/arch/wasm/include/asm/panic.h b/arch/wasm/include/asm/panic.h new file mode 100644 index 000000000..52ad0fa32 --- /dev/null +++ b/arch/wasm/include/asm/panic.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_PANIC_H +#define _ASM_WASM_PANIC_H + +#include + +static inline void arch_panic_notify(const char *msg) +{ + wasm_panic(msg); +} + +#include + +#endif /* _ASM_WASM_PANIC_H */ diff --git a/arch/wasm/include/asm/pgtable.h b/arch/wasm/include/asm/pgtable.h new file mode 100644 index 000000000..eeafad742 --- /dev/null +++ b/arch/wasm/include/asm/pgtable.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_PGTABLE_H +#define _ASM_WASM_PGTABLE_H + +#include + +#include +#include +#include +#include + +/* + * No MMU support so do nothing... + * Inspired by the various other NOMMU implementations in the kernel. + */ + +#define pgd_present(pgd) (1) +#define pgd_none(pgd) (0) +#define pgd_bad(pgd) (0) +#define pgd_clear(pgdp) +#define pmd_offset(a, b) ((void *)0) + +#define PAGE_NONE __pgprot(0) +#define PAGE_SHARED __pgprot(0) +#define PAGE_COPY __pgprot(0) +#define PAGE_READONLY __pgprot(0) +#define PAGE_KERNEL __pgprot(0) + +#define VMALLOC_START 0 +#define VMALLOC_END 0xFFFFFFFF +#define KMAP_START 0 +#define KMAP_END 0xFFFFFFFF + +extern void paging_init(void); +#define swapper_pg_dir ((pgd_t *) 0) + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +#endif /* _ASM_WASM_PGTABLE_H */ diff --git a/arch/wasm/include/asm/processor.h b/arch/wasm/include/asm/processor.h new file mode 100644 index 000000000..93243e16d --- /dev/null +++ b/arch/wasm/include/asm/processor.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_PROCESSOR_H +#define _ASM_WASM_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +struct pt_regs; + +/* 3 GB RAM for userspace, 1 GB for the kernel. */ +#define TASK_SIZE (0xC0000000) + +/* + * We run interrupts on CPU 1, keep it clear. Why not CPU 0? Because init needs + * to run on CPU 0 for a while. We don't need interrupts until SMP has started, + * but we need init before. + */ +#define IRQ_CPU 1 + +#define cpu_relax() barrier() + +struct thread_struct { +}; + +#define INIT_THREAD { \ +} + +void start_thread(struct pt_regs *regs, unsigned long stack_pointer); + +void do_irq_stacked(int irq_nr); + +int user_mode_tail(void); + +struct task_struct; +static inline unsigned long __get_wchan(struct task_struct *p) +{ + /* Should return the function before schedule() was called. */ + /* Will be shown under the "Waiting Channel" of the ps command. */ + return 0; +} + +/* We don't have an instruction pointer. See instruction_pointer.h */ +#define KSTK_EIP(task) (0) + +/* We could possibly expose the stack pointer (has some data)...? */ +#define KSTK_ESP(task) (0) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_WASM_PROCESSOR_H */ diff --git a/arch/wasm/include/asm/ptrace.h b/arch/wasm/include/asm/ptrace.h new file mode 100644 index 000000000..40b4ff72d --- /dev/null +++ b/arch/wasm/include/asm/ptrace.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_PTRACE_H +#define _ASM_WASM_PTRACE_H + +#include +#include +#include + +#ifndef __ASSEMBLY__ + +#define user_mode(regs) ((regs)->cpuflags & BIT(CPUFLAGS_USER_MODE)) + +/* Not available in Wasm. */ +#define instruction_pointer(regs) (0) + +#define current_user_stack_pointer() (0) + +/* Not available (maybe we could extract this from a stacktrace?) */ +#define profile_pc(regs) instruction_pointer(regs) + +#define task_pt_regs(task) ((struct pt_regs *)(task) - 1U) +#define current_pt_regs() task_pt_regs(current) + +#define task_switch_stack(task) ((struct switch_stack *)task_pt_regs(task) - 1U) +#define current_switch_stack() task_switch_stack(current) + +static inline int regs_irqs_disabled(struct pt_regs *regs) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_WASM_PTRACE_H */ diff --git a/arch/wasm/include/asm/smp.h b/arch/wasm/include/asm/smp.h new file mode 100644 index 000000000..d47beeccb --- /dev/null +++ b/arch/wasm/include/asm/smp.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_SMP_H +#define _ASM_WASM_SMP_H + +#include + +#ifdef CONFIG_SMP + +#define raw_smp_processor_id() (current_thread_info()->cpu) + +void __init setup_smp_ipi(void); + +void arch_send_call_function_single_ipi(int cpu); + +static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + int cpu; + + for_each_cpu(cpu, mask) + arch_send_call_function_single_ipi(cpu); +} + +__visible void raise_interrupt(int cpu, int irq_nr); + +#endif /* !CONFIG_SMP */ + +#endif /* _ASM_WASM_SMP_H */ diff --git a/arch/wasm/include/asm/stacktrace.h b/arch/wasm/include/asm/stacktrace.h new file mode 100644 index 000000000..2f702245e --- /dev/null +++ b/arch/wasm/include/asm/stacktrace.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_STACKTRACE_H +#define _ASM_WASM_STACKTRACE_H + +#include +#include + +#define WASM_STACKTRACE_MAX_SIZE 1000U + +static inline bool on_thread_stack(void) +{ + /* + * Since current is directly derived from the stack pointer on Wasm, we + * can do this sneaky trick of comparing stack ends. + */ + return current->stack == (void*)((unsigned long)current & THREAD_MASK); +} + +#endif /* _ASM_WASM_STACKTRACE_H */ diff --git a/arch/wasm/include/asm/syscall.h b/arch/wasm/include/asm/syscall.h new file mode 100644 index 000000000..a31199740 --- /dev/null +++ b/arch/wasm/include/asm/syscall.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_SYSCALL_H +#define _ASM_WASM_SYSCALL_H + +#include +#include + +extern void (* const sys_call_table[])(void); + +struct task_struct; + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->syscall_nr; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* We don't need to rollback anything on Wasm. */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return IS_ERR_VALUE(regs->syscall_ret) ? regs->syscall_ret : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->syscall_ret; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->syscall_ret = error ? (long)error : val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned long *args) +{ + args[0] = regs->syscall_nr; + memcpy(&args[1], regs->syscall_args, sizeof(regs->syscall_args)); +} + +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_WASM32; +} + +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) +{ + return false; +} + +#endif /* _ASM_WASM_SYSCALL_H */ diff --git a/arch/wasm/include/asm/thread_info.h b/arch/wasm/include/asm/thread_info.h new file mode 100644 index 000000000..2f9c43907 --- /dev/null +++ b/arch/wasm/include/asm/thread_info.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_THREAD_INFO_H +#define _ASM_WASM_THREAD_INFO_H + +#include + +/* + * In the Wasm arch, thread_info sits at the top of task_struct and both reside + * at the very end of the stack area (which grows downwards). + * + * HIGHER ADDRESSES + * + * [ [ [...] ] ] ^ <- (__stack_pointer & THREAD_MASK) + THREAD_SIZE + * [ [ [thread_info]] ] | + * [ [task_struct ] ] | <- current, current_thread_info() + * [ [stack ] ] | <- (stack starts with pt_regs + possibly switch_stack) + * [ [ [...] ] ] | <- __stack_pointer (growing towards lower addresses) + * [ ] | + * [ free space ] | THREAD_SIZE + * [ ] v <- (__stack_pointer & THREAD_MASK) + * + * LOWER ADDRESSES + * + * As can be seen, current == current_thread_info() in this arch. In order to + * access any of these, __stack_pointer can be masked by THREAD_MASK, since + * the kernel stack for every task will be aligned on a THREAD_SIZE boundary. + * + * Example of memory-growing instructions Resides in + * -------------------------------------------- -------------------------------- + * iX.const, iX.load, local.get, global.get Wasm internal stack + * lobal.set __stack_pointer __stack_pointer managed stack + * + * Stack usage in Wasm is pretty sparse. Most data resides in "locals" or on the + * internal Wasm stack. Both of these are not accessible from within Wasm, + * except outside the local usage of them of course. The stack we manage is used + * for things that Wasm can't put on any of those, for exmaple when a pointer is + * constructed when taking the address of an auto variable (i.e. the + * function/block scope in C). That stack is referred to by the Wasm global + * __stack_pointer and is known by the compiler. It is not part of the Wasm + * standard, but makes certain parts of the C standard possible to compile. Two + * pages should for this reason be enough as kernel stack. struct task_struct + * (including struct thread_info at its base) is about 2K, leaving 6K for the + * kernel stack. + */ +#define THREAD_SIZE_ORDER (1) +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define THREAD_MASK (~(THREAD_SIZE - 1)) + +#ifndef __ASSEMBLY__ + +struct thread_info { + unsigned int cpu; + unsigned int flags; + int preempt_count; /* Needed but not really used */ + int instance_depth; /* 0 = idle task, 1 = running */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .cpu = 0, \ + .flags = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .instance_depth = 0, \ +} + +struct task_struct; + +static inline void *arch_alloc_thread_stack_node( + struct task_struct *tsk, int node) +{ + return (void *)((unsigned long)tsk & THREAD_MASK); +} + +static inline void arch_free_thread_stack(struct task_struct *tsk) { } + +struct task_struct *alloc_task_struct_node(int node); +void free_task_struct(struct task_struct *tsk); + +#endif /* !__ASSEMBLY__ */ + +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */ +#define TIF_MEMDIE 4 /* is terminating due to OOM killer */ +#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ +#define TIF_NEVER_RUN 6 /* was never run by the scheduler */ +#define TIF_RELOAD_PROGRAM 7 /* should reload code at syscall end */ +#define TIF_DELIVER_SIGNAL 8 /* run sighandler at syscall end */ +#define TIF_RETURN_SIGNAL 9 /* return sighandler at syscall end */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_MEMDIE (1 << TIF_MEMDIE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NEVER_RUN (1 << TIF_NEVER_RUN) +#define _TIF_RELOAD_PROGRAM (1 << TIF_RELOAD_PROGRAM) +#define _TIF_DELIVER_SIGNAL (1 << TIF_DELIVER_SIGNAL) +#define _TIF_RETURN_SIGNAL (1 << TIF_RETURN_SIGNAL) + +#endif /* _ASM_WASM_THREAD_INFO_H */ diff --git a/arch/wasm/include/asm/time.h b/arch/wasm/include/asm/time.h new file mode 100644 index 000000000..2577a1151 --- /dev/null +++ b/arch/wasm/include/asm/time.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_TIME_H +#define _ASM_WASM_TIME_H + +void wasm_clockevent_enable(void); +void wasm_program_timer(unsigned long delta); + +#endif /* _ASM_WASM_TIME_H */ diff --git a/arch/wasm/include/asm/vmalloc.h b/arch/wasm/include/asm/vmalloc.h new file mode 100644 index 000000000..f1c2216f2 --- /dev/null +++ b/arch/wasm/include/asm/vmalloc.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_VMALLOC_H +#define _ASM_WASM_VMALLOC_H + +#endif /* _ASM_WASM_VMALLOC_H */ diff --git a/arch/wasm/include/asm/wasm.h b/arch/wasm/include/asm/wasm.h new file mode 100644 index 000000000..20decb1d5 --- /dev/null +++ b/arch/wasm/include/asm/wasm.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_WASM_WASM_H +#define _ASM_WASM_WASM_H + +/* These are symbols imported from the Wasm host. */ + +extern void wasm_panic(const char *msg); +extern void wasm_dump_stacktrace(char* buffer, unsigned long max_size); + +extern void wasm_start_cpu(unsigned int cpu, struct task_struct *idle_task, + unsigned long start_stack); +extern void wasm_stop_cpu(unsigned int cpu); + +extern struct task_struct *wasm_create_and_run_task( + struct task_struct *prev_task, struct task_struct *new_task, + const char *name, unsigned long bin_start, unsigned long bin_end, + unsigned long data_start, unsigned long table_start); +extern void wasm_release_task(struct task_struct *dead_task); +extern struct task_struct *wasm_serialize_tasks(struct task_struct *prev_task, + struct task_struct *next_task); + +extern void wasm_load_executable(unsigned long bin_start, unsigned long bin_end, + unsigned long data_start, unsigned long table_start); +extern void wasm_reload_program(void); + +extern void wasm_clone_callback(void); + +#endif /* _ASM_WASM_WASM_H */ diff --git a/arch/wasm/include/uapi/asm/Kbuild b/arch/wasm/include/uapi/asm/Kbuild new file mode 100644 index 000000000..b4bb51a5c --- /dev/null +++ b/arch/wasm/include/uapi/asm/Kbuild @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +generic-y += ucontext.h diff --git a/arch/wasm/include/uapi/asm/byteorder.h b/arch/wasm/include/uapi/asm/byteorder.h new file mode 100644 index 000000000..3f8945ac4 --- /dev/null +++ b/arch/wasm/include/uapi/asm/byteorder.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_WASM_BYTEORDER_H +#define _UAPI_ASM_WASM_BYTEORDER_H + +#include + +#endif /* _UAPI_ASM_WASM_BYTEORDER_H */ diff --git a/arch/wasm/include/uapi/asm/ptrace.h b/arch/wasm/include/uapi/asm/ptrace.h new file mode 100644 index 000000000..0761ce261 --- /dev/null +++ b/arch/wasm/include/uapi/asm/ptrace.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_WASM_PTRACE_H +#define _UAPI_ASM_WASM_PTRACE_H + +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 + +#ifndef __ASSEMBLY__ + +/* Registers stored during kernel entry (syscalls, IRQs and exceptions). */ +struct pt_regs { + unsigned long stack_pointer; /* The __stack_pointer global. */ + unsigned long cpuflags; /* CPU Flags (interrupt, user mode). */ + int syscall_nr; /* Needed by syscall_get_nr() etc. */ + long syscall_args[6]; /* Needed by syscall_get_args() etc. */ + long syscall_ret; /* Needed by syscall_*_return() etc. */ +}; + +#define PT_REGS_INIT ((struct pt_regs){.syscall_nr = -1}) + +/* Registers stored when switching between user processes (and signals). */ +struct switch_stack { + /* When kthread, kernel thread callback with arg. */ + int (*fn)(void *); + void *fn_arg; + + /* When user task, the __tls_base global. Unused by the kernel. */ + unsigned long tls; +}; + +/* Registers for user processes (gdb etc.), stable ABI compared to pt_regs. */ +struct user_regs_struct { + unsigned long stack_pointer; + unsigned long tls; +}; + +#endif /* __ASSEMBLY__ */ +#endif /* _UAPI_ASM_WASM_PTRACE_H */ diff --git a/arch/wasm/include/uapi/asm/sigcontext.h b/arch/wasm/include/uapi/asm/sigcontext.h new file mode 100644 index 000000000..7fa987d86 --- /dev/null +++ b/arch/wasm/include/uapi/asm/sigcontext.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_WASM_SIGCONTEXT_H +#define _UAPI_ASM_WASM_SIGCONTEXT_H + +#include + +/* State saved before a signal is handled, given to signal handlers. */ +struct sigcontext { + struct user_regs_struct regs; +}; + +#endif /* _UAPI_ASM_WASM_SIGCONTEXT_H */ diff --git a/arch/wasm/include/uapi/asm/unistd.h b/arch/wasm/include/uapi/asm/unistd.h new file mode 100644 index 000000000..9729b100b --- /dev/null +++ b/arch/wasm/include/uapi/asm/unistd.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ + +#define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 + +#include diff --git a/arch/wasm/kernel/Makefile b/arch/wasm/kernel/Makefile new file mode 100644 index 000000000..a630af519 --- /dev/null +++ b/arch/wasm/kernel/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-only + +extra-y += vmlinux.lds + +obj-y += cpu.o +obj-y += cpuflags.o +obj-y += entry.o +obj-y += head.o +obj-y += irqflags.o +obj-y += irq.o +obj-y += process.o +obj-y += ptrace.o +obj-y += reboot.o +obj-y += setup.o +obj-y += signal.o +obj-y += smp.o +obj-y += stack.o +obj-y += sys_wasm.o +obj-y += syscall_table.o +obj-y += time.o +obj-y += traps.o diff --git a/arch/wasm/kernel/asm-offsets.c b/arch/wasm/kernel/asm-offsets.c new file mode 100644 index 000000000..272f0f461 --- /dev/null +++ b/arch/wasm/kernel/asm-offsets.c @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#define ASM_OFFSETS_C + +#include +#include +#include + +void foo(void) +{ +/* + * struct task_struct is stored just above the thread stack. It is aligned by + * L1_CACHE_BYTES, which is enforced by init_task and the task memory allocator. + * + * sizeof(pt_regs) and sizeof(task_struct) is naturally aligned by their size. + * The start of the actual stack has to be 16-byte aligned when calling C code. + */ +#define _THREAD_TASK_STRUCT_OFFSET ALIGN_DOWN(THREAD_SIZE - sizeof(struct task_struct), L1_CACHE_BYTES) +#define _THREAD_PT_REGS_OFFSET (_THREAD_TASK_STRUCT_OFFSET - sizeof(struct pt_regs)) +#define _THREAD_SWITCH_STACK_OFFSET (_THREAD_PT_REGS_OFFSET - sizeof(struct switch_stack)) +#define _THREAD_STACK_START ALIGN_DOWN(_THREAD_SWITCH_STACK_OFFSET, 16) + + DEFINE(THREAD_TASK_STRUCT_OFFSET, _THREAD_TASK_STRUCT_OFFSET); + BLANK(); + + DEFINE(THREAD_PT_REGS_OFFSET, _THREAD_PT_REGS_OFFSET); + DEFINE(PT_REGS_STACK_POINTER, offsetof(struct pt_regs, stack_pointer)); + BLANK(); + + DEFINE(THREAD_SWITCH_STACK_OFFSET, _THREAD_SWITCH_STACK_OFFSET); + DEFINE(SWITCH_STACK_TLS, offsetof(struct switch_stack, tls)); + BLANK(); + + DEFINE(THREAD_STACK_START, _THREAD_STACK_START); + BLANK(); +} diff --git a/arch/wasm/kernel/cpu.c b/arch/wasm/kernel/cpu.c new file mode 100644 index 000000000..5fb9aa8ba --- /dev/null +++ b/arch/wasm/kernel/cpu.c @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include + +#ifdef CONFIG_PROC_FS + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + if (*pos == nr_cpu_ids) + return NULL; + + *pos = cpumask_next(*pos - 1, cpu_online_mask); + if ((*pos) < nr_cpu_ids) + return (void *)(uintptr_t)(1 + *pos); + return NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +static int c_show(struct seq_file *m, void *v) +{ + unsigned long cpu_id = (unsigned long)v - 1; + + seq_printf(m, "processor\t: %lu\n", cpu_id); + seq_printf(m, "vendor_id\t: Wasm\n"); + + return 0; +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show, +}; + +#endif /* CONFIG_PROC_FS */ diff --git a/arch/wasm/kernel/cpuflags.c b/arch/wasm/kernel/cpuflags.c new file mode 100644 index 000000000..a97e9b58a --- /dev/null +++ b/arch/wasm/kernel/cpuflags.c @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +DEFINE_PER_CPU(unsigned long, wasm_cpuflags) = CPUFLAGS_RESET_VALUE; diff --git a/arch/wasm/kernel/entry.S b/arch/wasm/kernel/entry.S new file mode 100644 index 000000000..04087b23f --- /dev/null +++ b/arch/wasm/kernel/entry.S @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include + + +.globaltype __stack_pointer, i32 + +.section .globals,"",@ + +.globaltype __user_stack_pointer, i32 +__user_stack_pointer: +.globaltype __user_tls_base, i32 +__user_tls_base: + +.section .text,"",@ + +.globl get_user_stack_pointer +get_user_stack_pointer: + .functype get_user_stack_pointer() -> (i32) + global.get __user_stack_pointer + end_function + +.globl get_user_tls_base +get_user_tls_base: + .functype get_user_tls_base() -> (i32) + global.get __user_tls_base + end_function + +.functype user_mode_tail() -> (i32) +.functype wasm_user_mode_tail(i32) -> () + +.globl _user_mode_tail +_user_mode_tail: + .functype _user_mode_tail() -> () + .local i32 /* 0: flow */ + + block + call user_mode_tail + local.tee 0 + i32.eqz + br_if 0 + + local.get 0 + call wasm_user_mode_tail + end_block + + end_function + +/* + * HIGH ADDRESSES + * -------------- + * (thread end) <- (current & THREAD_MASK) + THREAD_SIZE + * [task_struct] <- current + * [pt_regs] + * [switch_stack] <- initial __stack_pointer + * (alignment) + * <- Ready to call C code (16-byte aligned). + * (...free space...) + * + * (thread start) <- current & THREAD_MASK + * ------------- + * LOW ADDRESSES + */ + +.functype __ret_from_fork(i32, i32) -> (i32) + +/* New process. Called by Wasm host when it runs a task for the first time. */ +.globl ret_from_fork +ret_from_fork: + /* struct task_struct *prev_task, struct task_struct *next_task */ + .functype ret_from_fork(i32, i32) -> (i32) + + /* We can't switch back to a task so no need to save into prev_task. */ + + /* Load __stack_pointer from the new task's kernel stack area. */ + local.get 1 + i32.const zeroptr-THREAD_TASK_STRUCT_OFFSET+THREAD_STACK_START + i32.add + global.set __stack_pointer + + /* + * Finish up in C. Returns true if we have a clone callback to call. + * (Upon return, the correct cpuflags for userland have been loaded.) + */ + local.get 0 + local.get 1 + call __ret_from_fork + + /* Load __user_stack_pointer. */ + local.get 1 + i32.const zeroptr-THREAD_TASK_STRUCT_OFFSET+THREAD_PT_REGS_OFFSET+PT_REGS_STACK_POINTER + i32.add + i32.load 0 + global.set __user_stack_pointer + + /* Load __user_tls_base. */ + local.get 1 + i32.const zeroptr-THREAD_TASK_STRUCT_OFFSET+THREAD_SWITCH_STACK_OFFSET+SWITCH_STACK_TLS + i32.add + i32.load 0 + global.set __user_tls_base + + /* Clean up the stack. */ + global.get __stack_pointer + i32.const zeroptr-THREAD_STACK_START+THREAD_TASK_STRUCT_OFFSET + i32.add + global.set __stack_pointer + + /* In theory, the first thing we execute may be a signal handler. */ + call _user_mode_tail + + end_function + +.macro WASM_SYSCALL_ASM_HEAD + /* + * The kernel expects pt_regs to be populated so save what we know. + * The following fields are saved in the C part of this handling: + * * cpuflags is set to appropriate values. + * * syscall_nr and syscall_args are set from our call parameters. + * * syscall_ret is set and returned to us. + */ + + local.get 0 + global.set __user_stack_pointer + local.get 1 + global.set __user_tls_base + + /* Allocate pt_regs + switch_stack + stack alignment. */ + global.get __stack_pointer + i32.const zeroptr-THREAD_TASK_STRUCT_OFFSET+THREAD_STACK_START + i32.add + global.set __stack_pointer + + /* Save __user_stack_pointer. */ + global.get __stack_pointer + i32.const zeroptr-THREAD_STACK_START+THREAD_PT_REGS_OFFSET+PT_REGS_STACK_POINTER + i32.add + global.get __user_stack_pointer + i32.store 0 + + /* Save __user_tls_base. */ + global.get __stack_pointer + i32.const zeroptr-THREAD_STACK_START+THREAD_SWITCH_STACK_OFFSET+SWITCH_STACK_TLS + i32.add + global.get __user_tls_base + i32.store 0 + + /* + * Note: we don't need to swap the stack pointer, it already happened + * automatically when calling into the vmlinux Wasm instance. + */ +.endm + +.macro WASM_SYSCALL_ASM_FOOT + /* (The cpuflags have already been restored in C.) */ + + /* Load __user_stack_pointer. */ + global.get __stack_pointer + i32.const zeroptr-THREAD_STACK_START+THREAD_PT_REGS_OFFSET+PT_REGS_STACK_POINTER + i32.add + i32.load 0 + global.set __user_stack_pointer + + /* Load __user_tls_base. */ + global.get __stack_pointer + i32.const zeroptr-THREAD_STACK_START+THREAD_SWITCH_STACK_OFFSET+SWITCH_STACK_TLS + i32.add + i32.load 0 + global.set __user_tls_base + + /* Deallocate stack alignment + switch_stack + pt_regs. */ + global.get __stack_pointer + i32.const zeroptr-THREAD_STACK_START+THREAD_TASK_STRUCT_OFFSET + i32.add + global.set __stack_pointer + + /* + * Note: we don't need to swap the __stack_pointer, it will + * happen automatically when returning back into the user code + * Wasm instance (as that instance has its own __stack_pointer). + * + * We can exploit this as we're basically in userland, but with + * the kernel stack pointer loaded. This allows us to play tricks + * with execution in userland without setting a program counter. + * + * This is where signal handlers are called, and returned, and exec() + * calls stop the execution of the user program. In the case of exec() + * and signal return, the call stack collapses (this call never returns). + */ + call _user_mode_tail + + end_function +.endm + +.functype __wasm_syscall_0(i32) -> (i32) +.functype __wasm_syscall_1(i32, i32) -> (i32) +.functype __wasm_syscall_2(i32, i32, i32) -> (i32) +.functype __wasm_syscall_3(i32, i32, i32, i32) -> (i32) +.functype __wasm_syscall_4(i32, i32, i32, i32, i32) -> (i32) +.functype __wasm_syscall_5(i32, i32, i32, i32, i32, i32) -> (i32) +.functype __wasm_syscall_6(i32, i32, i32, i32, i32, i32, i32) -> (i32) + +/* + * These syscall functions should be called from userland code. In order to skip + * slow JavaScript glue code, they directly transfer all state needed into the + * kernel. This means that two initial parammeters are added for sp and tp. + * Parameter 0 [sp]: the userland stack pointer. + * Parameter 1 [tp]: the userland TLS pointer. + * Parameter 2 [nr]: the syscall nr. + * Parameter 3..8 [argN]: syscall argument(s) 0..5, where applicable. + * + * The kernel never modifies sp or tp for the calling task during syscalls and + * there is thus no need to restore them after the syscall returns. Apart from + * diagnosics, they only play a role in the clone and exec family of syscalls. + * Clone can be made to copy the supplied sp and tp to the new task. Exec should + * maintain the tp even for new process images (this use case is quite sketchy). + * Considering that the kernel does not care if userland even has a stack or TLS + * area, it would be OK to not transfer these pointers at all if desired. In + * both the clone and exec cases, the initial values of sp and tp would not be + * loaded at a syscall site, but instead during the ret_from_fork code flow. + */ +.globl wasm_syscall_0 +wasm_syscall_0: + .functype wasm_syscall_0(i32, i32, i32) -> (i32) + WASM_SYSCALL_ASM_HEAD + local.get 2 + call __wasm_syscall_0 + WASM_SYSCALL_ASM_FOOT + +.globl wasm_syscall_1 +wasm_syscall_1: + .functype wasm_syscall_1(i32, i32, i32, i32) -> (i32) + WASM_SYSCALL_ASM_HEAD + local.get 2 + local.get 3 + call __wasm_syscall_1 + WASM_SYSCALL_ASM_FOOT + +.globl wasm_syscall_2 +wasm_syscall_2: + .functype wasm_syscall_2(i32, i32, i32, i32, i32) -> (i32) + WASM_SYSCALL_ASM_HEAD + local.get 2 + local.get 3 + local.get 4 + call __wasm_syscall_2 + WASM_SYSCALL_ASM_FOOT + +.globl wasm_syscall_3 +wasm_syscall_3: + .functype wasm_syscall_3(i32, i32, i32, i32, i32, i32) -> (i32) + WASM_SYSCALL_ASM_HEAD + local.get 2 + local.get 3 + local.get 4 + local.get 5 + call __wasm_syscall_3 + WASM_SYSCALL_ASM_FOOT + +.globl wasm_syscall_4 +wasm_syscall_4: + .functype wasm_syscall_4(i32, i32, i32, i32, i32, i32, i32) -> (i32) + WASM_SYSCALL_ASM_HEAD + local.get 2 + local.get 3 + local.get 4 + local.get 5 + local.get 6 + call __wasm_syscall_4 + WASM_SYSCALL_ASM_FOOT + +.globl wasm_syscall_5 +wasm_syscall_5: + .functype wasm_syscall_5(i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) + WASM_SYSCALL_ASM_HEAD + local.get 2 + local.get 3 + local.get 4 + local.get 5 + local.get 6 + local.get 7 + call __wasm_syscall_5 + WASM_SYSCALL_ASM_FOOT + +.globl wasm_syscall_6 +wasm_syscall_6: + .functype wasm_syscall_6(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) + WASM_SYSCALL_ASM_HEAD + local.get 2 + local.get 3 + local.get 4 + local.get 5 + local.get 6 + local.get 7 + local.get 8 + call __wasm_syscall_6 + WASM_SYSCALL_ASM_FOOT diff --git a/arch/wasm/kernel/head.S b/arch/wasm/kernel/head.S new file mode 100644 index 000000000..e7403fcf2 --- /dev/null +++ b/arch/wasm/kernel/head.S @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +.globaltype __stack_pointer, i32 + +.functype start_kernel() -> () +.functype start_secondary() -> () + +/* vmlinux entrypoint */ +.globl _start +_start: + .functype _start() -> () + .local i32 /* alloc_limit */ + + /* + * init_task resides just after the start of the stack. Higher addresses + * contain task_struct init_task data, while the stack grows downwards. + * + * The initial stack needs to be 16-byte aligned when calling C code. + * init_task already has a much higher alignment (by L1_CACHE_BYTES). + */ + i32.const init_task + global.set __stack_pointer + + /* Save static memory used by vmlinux. */ + i32.const memory_start + i32.const 0x10000 /* The first page is reserved for trapping nullptr. */ + i32.store 0 + i32.const memory_kernel_break + memory.size 0 + i32.const 0x10000 /* Multiply by Wasm page size (65k). */ + i32.mul + i32.store 0 + + /* + * By some trial-and-error in Firefox and (mostly) Chromium: + * * Allocating the full address space (4 GB) works most of the time. + * * Allocating 4 GB fails often enough to be unstable. Curiously, it + * does not seem to have anything to do with free memory, and just + * reloading the page fixes it. Waiting a bit might be beneficial. + * * Doing it from within Wasm seems to succeed more often(?). + * * Allocating it in one go works better than stepping, as below, and + * would allow 4 GB. But stepping is more reliable. + * * Stepping all the way from 4 GB makes Chromium accept the allocation + * soon enough, but then crash with SIGILL, probably because of OOM. + * * Stepping from 500 MB is a tradeoff with all things considered. It + * ought indeed to be enough for anybody! (Oh, old joke, sorry...) + * + * Considering the above heuristics, a fair approach seems to start high + * and aggressively step downwards, one page at a time. But not too high + * or there will be OOM troubles related to current default browser + * settings. Sadly, browsers don't seem to handle this in a very well + * defined way, and we have to be moderately aggressive. An even more + * aggressive approach that surprisingly works is to try again and again + * with the same allocation size, but stepping almost achieves that. + * + * Whatever happens, the memory is zero-initialized and hopefully + * overcommitted by the host OS. If it is not, that should be fixed! + * Even better would be MMU support in Wasm, and this problem would be + * solved altogether. And a whole slew of other problems too! + * + * Note that we cannot allocate the last page from within Wasm (even + * though it is possible from the JavaScript host to create a Memory + * with initial: 0x10000, memory.grow only allows us to get to 0xFFFF). + * This is not too bad, as this is almost like not placing anything in + * the first page to catch null pointers. This guards underflow instead. + */ + i32.const 0x2000 /* Immediately decremented by 1 in the loop below. */ + memory.size 0 /* Returns the current number of pages. */ + i32.sub /* Try grow by the difference, (max - curr). */ + local.set 0 + loop + local.get 0 + i32.const 1 + i32.sub + local.tee 0 + + memory.grow 0 + i32.const -1 /* Check if allocation failed (returned -1). */ + i32.eq + br_if 0 + end_loop + + block + local.get 0 + i32.const 16 + i32.lt_u + br_if 0 + + i32.const memory_end + local.get 0 + i32.const 0x10000 /* Multiply by Wasm page size (65k). */ + i32.mul + i32.store 0 + + call start_kernel /* Start the kernel! */ + end_block + + /* If we ever get here, the memory allocation failed. */ + end_function + +.globl _start_secondary +_start_secondary: + .functype _start_secondary(i32) -> () + local.get 0 + global.set __stack_pointer + call start_secondary + /* start_secondary should never return. */ + unreachable + + end_function diff --git a/arch/wasm/kernel/irq.c b/arch/wasm/kernel/irq.c new file mode 100644 index 000000000..9092bf194 --- /dev/null +++ b/arch/wasm/kernel/irq.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +static unsigned int wasm_irq_startup(struct irq_data *data) +{ + return 0; +} + +static void wasm_irq_noop(struct irq_data *data) +{ +} + +static int +wasm_irq_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ +#ifdef CONFIG_SMP + printk("wasm_irq_set_affinity: %d %d %d", data->irq, cpumask_weight(mask), cpumask_first(mask)); + return 0; +#endif +} + +struct irq_chip wasm_irq_chip = { + .name = "wasm", + .irq_startup = wasm_irq_startup, + .irq_shutdown = wasm_irq_noop, + .irq_enable = wasm_irq_noop, + .irq_disable = wasm_irq_noop, + .irq_ack = wasm_irq_noop, + .irq_mask = wasm_irq_noop, + .irq_unmask = wasm_irq_noop, + .irq_set_affinity = wasm_irq_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +void __init init_IRQ(void) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; ++irq) { + if (irq == WASM_IRQ_IPI || irq == WASM_IRQ_TIMER) { + irq_set_percpu_devid(irq); + irq_set_chip_and_handler( + irq, &wasm_irq_chip, handle_percpu_devid_irq); + } else { + irq_set_chip_and_handler( + irq, &wasm_irq_chip, handle_simple_irq); + } + } + + setup_smp_ipi(); +} diff --git a/arch/wasm/kernel/irqflags.c b/arch/wasm/kernel/irqflags.c new file mode 100644 index 000000000..cd8e86e90 --- /dev/null +++ b/arch/wasm/kernel/irqflags.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include + +unsigned long arch_local_save_flags(void) +{ + if (*this_cpu_ptr(&wasm_cpuflags) & BIT(CPUFLAGS_INTERRUPT)) + return ARCH_IRQ_ENABLED; + return ARCH_IRQ_DISABLED; +} + +void arch_local_irq_restore(unsigned long flags) +{ + if (flags == ARCH_IRQ_DISABLED) + *this_cpu_ptr(&wasm_cpuflags) &= ~BIT(CPUFLAGS_INTERRUPT); + else + *this_cpu_ptr(&wasm_cpuflags) |= BIT(CPUFLAGS_INTERRUPT); +} diff --git a/arch/wasm/kernel/process.c b/arch/wasm/kernel/process.c new file mode 100644 index 000000000..1eaa35d8f --- /dev/null +++ b/arch/wasm/kernel/process.c @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static cpumask_t user_cpus = CPU_MASK_NONE; + +struct task_struct *__sched +__switch_to(struct task_struct *prev_task, struct task_struct *next_task) +{ + /* + * Here, a typical arch would normally: + * * Swap registers and stack. + * * Return to the instruction pointer of the new task. + * + * For new tasks (after fork), it would normally: + * * Swap registers and stack. + * * Call schedule_tail(), now in the context of the new process. + * * If there is a kernel_fn set, call it with correct callback arg. + * * Call syscall_exit_to_user_mode(current_pt_regs()) or equivalent. + * -> Return to the new instruciton pointer, ending up in userland. + * + * When prev_task was swapped in again (on another reschedule), we would + * continue here and return back to the scheduler. However, Wasm can't + * do this. Only the in-memory part of the call stack can be swapped and + * there is no way to jump. Instead, we have to resort to serializing + * processes (in the cooperative multitasking sense) by launching + * several threads of execution on the host and use locks to make sure + * that only one process at a time is running on the same cpu. + * + * As soon as more CPUs are enabled, we can start running concurrently, + * by putting each task (except idle) on separate CPUs. Before that, + * init and kthreadd will need to both run, until smp is started. + * Thankfully we have control over these threads and know they will not + * hog the CPU. They might call schedule() on longer waits, that's fine. + * + * There is still the issue with idle threads, we could optimize to get + * away with them, which would cut the number of tasks used in the + * system by half. However, doing so is pretty annoying, as the idle + * loop is actually doing something and will eventually need to call + * schedule_idle(). For now, we serialize them too. + */ + + struct task_struct *last_task; + char name[TASK_COMM_LEN]; + + /* For user code. */ + unsigned long bin_start = 0U; + unsigned long bin_end = 0U; + unsigned long data_start = 0U; + + if (task_thread_info(next_task)->flags & _TIF_NEVER_RUN) { + task_thread_info(next_task)->flags &= ~_TIF_NEVER_RUN; + + /* Get the name to aid debugging. */ + get_task_comm(name, next_task); + + /* For user executables, we need to clone the Wasm instance. */ + if (next_task->mm->start_code) { + bin_start = next_task->mm->start_code; + bin_end = next_task->mm->end_code; + data_start = next_task->mm->start_data; + } + + /* This is called instead of serialize the first time. */ + last_task = wasm_create_and_run_task(prev_task, next_task, name, + bin_start, bin_end, data_start, 0U); + } else { + last_task = wasm_serialize_tasks(prev_task, next_task); + } + + /* If/when we reach here, we got __switch_to():ed by another task. */ + + /* last_task is the previous task (never prev_task, maybe next_task). */ + return last_task; +} + +static int user_task_set_affinity(struct task_struct *p) +{ + /* + * TODO: This function needs a review of proper approach and locking! + * It's probably best to take a step back and think about how this + * should be implemented properly in the first place, instead of adding + * band aid on top of about every line that violates this and that. That + * includes fixing release_thread() and garbage collecting unused CPUs. + * + * We may also have to move kthreads to IRQ_CPU (with an option of the + * boot cpu before IRQ_CPU is up) in case they risk getting blocked. + */ + int retval; + int cpu; + + /* Kthreads can be allowed to run on any online CPU. */ + if (p->flags & PF_KTHREAD) + return 0; + +hack: + cpu = cpumask_first_zero(&user_cpus); + if (cpu >= nr_cpu_ids) + return -EBUSY; + + if(cpu == IRQ_CPU) { + /* TODO: We should mark IRQ_CPU as taken at boot instead. */ + cpumask_set_cpu(cpu, &user_cpus); + goto hack; + } + + if (!cpu_online(cpu)) { + BUG_ON(!cpu_possible(cpu)); + + /* We should add_cpu(cpu) if we properly supported hotplug... */ + retval = cpu_device_up(get_cpu_device(cpu)); + if (retval) + return retval; + } + + cpumask_set_cpu(cpu, &user_cpus); + + retval = set_cpus_allowed_ptr(p, cpumask_of(cpu)); + if (retval) { + cpumask_clear_cpu(cpu, &user_cpus); + return retval; + } + + p->flags |= PF_NO_SETAFFINITY; + + return 0; +} + +asmlinkage unsigned +__ret_from_fork(struct task_struct *prev_task, struct task_struct *next_task) +{ + struct switch_stack *next_switch_stack = task_switch_stack(next_task); + + schedule_tail(prev_task); + + /* Kernel thread callback. */ + if (next_switch_stack->fn) { + next_switch_stack->fn(next_switch_stack->fn_arg); + /* + * Kernel threads can return, and in doing so, return to user + * space. This happens for the first user process (init). + */ + + BUG_ON(current->flags & PF_KTHREAD); + + /* + * The binfmt loader would have set _TIF_RELOAD_PROGRAM + * but we clear it now so that future syscalls don't trap. + */ + current_thread_info()->flags &= ~_TIF_RELOAD_PROGRAM; + } + + /* + * syscall_exit_to_user_mode() turns off interrupts, as most + * architectures would IRET right after it, enabling them again. We + * emulate this behaviour by loading cpuflags, which should both enable + * interrupts again but also drop the privilege level down to USER_MODE. + */ + syscall_exit_to_user_mode(current_pt_regs()); + *this_cpu_ptr(&wasm_cpuflags) = current_pt_regs()->cpuflags; + + /* + * After returning, the Wasm module binary will be initialized and run. + * We run any signal handlers that should be run first, then: + * kthread case: the host will call _start(). + * clone callback case: the host will call __libc_clone_callback(). + */ + return !(next_switch_stack->fn); +} + +void flush_thread(void) +{ + /* Wasm has no FP state to reset, so do nothing. */ +} + +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) +{ + struct pt_regs *parent_pt_regs = current_pt_regs(); + struct switch_stack *parent_switch_stack = current_switch_stack(); + + struct pt_regs *child_pt_regs = task_pt_regs(p); + struct switch_stack *child_switch_stack = task_switch_stack(p); + + task_thread_info(p)->flags |= _TIF_NEVER_RUN; + + if (unlikely(args->fn)) { + /* Kernel thread */ + memset(child_pt_regs, 0, sizeof(*child_pt_regs)); + child_pt_regs->stack_pointer = (unsigned long)child_switch_stack; + child_pt_regs->cpuflags = BIT(CPUFLAGS_INTERRUPT); + + memset(child_switch_stack, 0, sizeof(*child_switch_stack)); + child_switch_stack->fn = args->fn; + child_switch_stack->fn_arg = args->fn_arg; + } else { + /* User thread */ + *child_pt_regs = *parent_pt_regs; + if (args->stack) + child_pt_regs->stack_pointer = args->stack; + + *child_switch_stack = *parent_switch_stack; + child_switch_stack->fn = NULL; + child_switch_stack->fn_arg = NULL; + if (args->flags & CLONE_SETTLS) + child_switch_stack->tls = args->tls; + } + + if (!p->mm->binfmt) { + /* These are normally not zeroed out in copy_process(). */ + current->mm->start_code = 0; + current->mm->end_code = 0; + current->mm->start_stack = 0; + current->mm->start_data = 0; + current->mm->end_data = 0; + } + + return user_task_set_affinity(p); +} + +/* + * Set up a thread for executing a new program. + */ +void start_thread(struct pt_regs *regs, unsigned long stack_pointer) +{ + memset(regs, 0, sizeof(*regs)); + regs->stack_pointer = stack_pointer; + regs->cpuflags = BIT(CPUFLAGS_USER_MODE) | BIT(CPUFLAGS_INTERRUPT); + + wasm_load_executable(current->mm->start_code, current->mm->end_code, + current->mm->start_data, 0U); + + /* Reload the program when the current syscall exits. */ + current_thread_info()->flags |= _TIF_RELOAD_PROGRAM; +} + +void release_thread(struct task_struct *dead_task) +{ + /* TODO: This code also needs review, like user_task_set_affinity(). */ + if (!(dead_task->flags & PF_KTHREAD)) { + BUG_ON(dead_task->nr_cpus_allowed != 1); + BUG_ON(cpumask_first(&dead_task->cpus_mask) + != task_thread_info(dead_task)->cpu); + cpumask_clear_cpu(task_thread_info(dead_task)->cpu, &user_cpus); + } + + wasm_release_task(dead_task); +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + + pr_cont("cpuflags: %08x sp: %08x flags: %08x preempt_count: %08x\n", + (unsigned)regs->cpuflags, + (unsigned)regs->stack_pointer, + (unsigned)current_thread_info()->flags, + (unsigned)current_thread_info()->preempt_count); +} + +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) +{ + char *stack_trace; + + printk("%sStack from %08lx:", loglvl, (unsigned long)stack); + + stack_trace = kmalloc(WASM_STACKTRACE_MAX_SIZE, GFP_ATOMIC); + if (stack_trace) { + wasm_dump_stacktrace(stack_trace, WASM_STACKTRACE_MAX_SIZE); + printk("%s", stack_trace); + } else { + printk("Failed to allocate stack trace buffer."); + } + kfree(stack_trace); +} diff --git a/arch/wasm/kernel/ptrace.c b/arch/wasm/kernel/ptrace.c new file mode 100644 index 000000000..a52667068 --- /dev/null +++ b/arch/wasm/kernel/ptrace.c @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +void ptrace_disable(struct task_struct *child) +{ +} + +long arch_ptrace(struct task_struct *child, long request, unsigned long addr, + unsigned long data) +{ + return ptrace_request(child, request, addr, data); +} diff --git a/arch/wasm/kernel/reboot.c b/arch/wasm/kernel/reboot.c new file mode 100644 index 000000000..271e4ef64 --- /dev/null +++ b/arch/wasm/kernel/reboot.c @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +void machine_restart(char *cmd) +{ + local_irq_disable(); + smp_send_stop(); + + do_kernel_restart(cmd); + + printk("Reboot failed -- System halted\n"); + for (;;); +} + +void machine_halt(void) +{ + local_irq_disable(); + smp_send_stop(); + for (;;); +} + +void machine_power_off(void) +{ + local_irq_disable(); + smp_send_stop(); + do_kernel_power_off(); +} diff --git a/arch/wasm/kernel/setup.c b/arch/wasm/kernel/setup.c new file mode 100644 index 000000000..2ea9cc364 --- /dev/null +++ b/arch/wasm/kernel/setup.c @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +/* + * The format of "screen_info" is strange, and due to early + * i386-setup code. This is just enough to make the console + * code think we're on a VGA color display. + */ +struct screen_info screen_info = { + .orig_x = 0, + .orig_y = 25, + .orig_video_cols = 80, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16, +}; + +unsigned long memory_start; +EXPORT_SYMBOL(memory_start); + +unsigned long memory_end; +EXPORT_SYMBOL(memory_end); + +unsigned long memory_kernel_break; +EXPORT_SYMBOL(memory_kernel_break); + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned i; + + for_each_possible_cpu(i) + set_cpu_present(i, true); +} + +void __init smp_init_cpus(void) +{ + unsigned i; + unsigned int ncpus = NR_CPUS; /* TODO: make this configurable */ + + pr_info("%s: Core Count = %d\n", __func__, ncpus); + + if (ncpus > NR_CPUS) { + ncpus = NR_CPUS; + pr_info("%s: limiting core count by %d\n", __func__, ncpus); + } + + for (i = 0; i < ncpus; ++i) + set_cpu_possible(i, true); +} + +void __init smp_prepare_boot_cpu(void) +{ +} + +void __init setup_arch(char **cmdline_p) +{ + unsigned long max_zone_pfn[MAX_NR_ZONES] = {0}; + + /* Save unparsed command line copy for /proc/cmdline */ + *cmdline_p = boot_command_line; + + parse_early_param(); + + /* See head.S for the logic that sets up these values. */ + memblock_reserve(memory_start, memory_kernel_break - memory_start); + memblock_add(memory_start, memory_end - memory_start); + + /* pcpu_find_block_fit() returns signed 32-bit memory addresses, ugh. */ + memblock_set_current_limit(0x80000000); /* Only positive addresses. */ + + /* This is needed so that more than 128 allocations can be made. */ + memblock_allow_resize(); + + /* Initialize zones, so that memory can be allocated beyond bootmem. */ + max_zone_pfn[ZONE_NORMAL] = memory_end >> PAGE_SHIFT; + free_area_init(max_zone_pfn); + + smp_init_cpus(); +} diff --git a/arch/wasm/kernel/signal.c b/arch/wasm/kernel/signal.c new file mode 100644 index 000000000..ec1753ae2 --- /dev/null +++ b/arch/wasm/kernel/signal.c @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include + +struct rt_sigframe { + int sig_param; + + /* + * info_param and uc_param are convenience pointers that allow extension + * of rt_sigframe in the future and easy detection of whether SA_SIGINFO + * is set (they are NULL if it is not set). + */ + siginfo_t *info_param; + void *uc_param; + + union { + /* Signature the kernel uses internaly ("opaque type"). */ + __sighandler_t sa_handler; + + /* Signature libc should use when SA_SIGINFO is set. */ + void (*sigaction)(int sig, siginfo_t *info, void *uc); + + /* Signature libc should use when SA_SIGINFO is not set. */ + void (*handler)(int sig); + }; + + /* + * info and uc are used for sigaction (SA_SIGINFO) but ignored for + * handler (!SA_SIGINFO). However, uc is still used by the kernel when + * handler is used, as it stores the regs to restore upon sigreturn. + * This is what allows signal handlers to be stacked. + */ + struct siginfo info; + struct ucontext uc; +}; + +SYSCALL_DEFINE0(rt_sigreturn) +{ + struct pt_regs *regs = current_pt_regs(); + struct switch_stack *switch_stack = current_switch_stack(); + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)regs->stack_pointer; + struct user_regs_struct __user *user_regs = &frame->uc.uc_mcontext.regs; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + set_current_blocked(&set); + + if (__get_user(regs->stack_pointer, &user_regs->stack_pointer)) + goto badframe; + if (__get_user(switch_stack->tls, &user_regs->tls)) + goto badframe; + + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; + + current_thread_info()->flags |= _TIF_RETURN_SIGNAL; + + return 0; + +badframe: + force_sig(SIGSEGV); + + return 0; +} + +static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, + struct pt_regs *regs) +{ + struct switch_stack *switch_stack = (struct switch_stack *)regs - 1U; + unsigned long stack_pointer = sigsp(regs->stack_pointer, ksig); + struct rt_sigframe __user *frame; + long err = 0; + + /* + * Allocate storage for frame, aligning it for itself and for further C + * function calling (which shoulld really be the largest alignment...). + */ + stack_pointer -= sizeof(*frame); + stack_pointer &= -16UL; + stack_pointer &= -(unsigned long)__alignof__(*frame); + frame = (struct rt_sigframe __user *)stack_pointer; + + if (!access_ok(frame, sizeof(*frame))) + return -EFAULT; + + /* struct siginfo info */ + err |= copy_siginfo_to_user(&frame->info, &ksig->info); + + /* struct ucontext uc */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __save_altstack(&frame->uc.uc_stack, regs->stack_pointer); + err |= __put_user(regs->stack_pointer, + &frame->uc.uc_mcontext.regs.stack_pointer); + err |= __put_user(switch_stack->tls, &frame->uc.uc_mcontext.regs.tls); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + /* int sig_param */ + err |= __put_user(ksig->sig, &frame->sig_param); + + /* info_param and uc_param convenience pointers */ + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { + err |= __put_user(&frame->info, &frame->info_param); + err |= __put_user(&frame->uc, &frame->uc_param); + } else { + err |= __put_user(NULL, &frame->info_param); + err |= __put_user(NULL, &frame->uc_param); + } + + /* __sighandler_t sa_handler */ + err |= __put_user(ksig->ka.sa.sa_handler, &frame->sa_handler); + + if (err) + return -EFAULT; + + regs->stack_pointer = stack_pointer; + current_thread_info()->flags |= _TIF_DELIVER_SIGNAL; + + return 0; +} + +static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) +{ + sigset_t *oldset = sigmask_to_save(); + int ret; + + /* If we're from a syscall, cancel syscall restarting if appropriate. */ + if (regs->syscall_nr != -1) { + switch (regs->syscall_ret) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->syscall_ret = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { + regs->syscall_ret = -EINTR; + break; + } + fallthrough; + case -ERESTARTNOINTR: + default: + /* We will actually restart in these cases. */ + break; + } + } + + rseq_signal_deliver(ksig, regs); + + /* + * Wasm always uses rt-frames - the libc will have to figure out which + * signature to call the handler with depending on if SA_SIGINFO is set. + */ + ret = setup_rt_frame(ksig, oldset, regs); + + signal_setup_done(ret, ksig, 0); +} + +void arch_do_signal_or_restart(struct pt_regs *regs) +{ + struct ksignal ksig; + + if (get_signal(&ksig)) { + handle_signal(&ksig, regs); + return; + } + + /* + * Restart is handled in the syscall wrapper instead, as Wasm can't + * fiddle with the instruction pointer to re-run the syscall. (Restart + * may be canceled by handle_signal() above if we're handling a signal.) + */ + + /* + * If there's no signal to deliver, we just put the saved sigmask + * back. + */ + restore_saved_sigmask(); +} diff --git a/arch/wasm/kernel/smp.c b/arch/wasm/kernel/smp.c new file mode 100644 index 000000000..c105e5259 --- /dev/null +++ b/arch/wasm/kernel/smp.c @@ -0,0 +1,344 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +extern unsigned long long wasm_cpu_clock_get_monotonic(void); + +static DECLARE_COMPLETION(cpu_running); + +#if NR_IRQS > 32 +#error "NR_IRQS too high" +#endif +static DEFINE_PER_CPU(unsigned int, raised_irqs); + +#define TIMER_NEVER_EXPIRE (-1) +static DEFINE_PER_CPU(long long, local_timer_expiries) = TIMER_NEVER_EXPIRE; + +enum ipi_type { + IPI_RESCHEDULE = 0, + IPI_CALL_FUNC = 1, + IPI_RECEIVE_BROADCAST = 2, + IPI_IRQ_WORK = 3, +}; +#define IPI_MASK(ipi_type) ((unsigned int)(1U << (int)(ipi_type))) +static DEFINE_PER_CPU(unsigned int, raised_ipis); + +void smp_send_stop(void) +{ + unsigned int cpu; + unsigned int this_cpu = smp_processor_id(); + + for_each_online_cpu(cpu) { + if (likely(cpu != this_cpu)) + wasm_stop_cpu(cpu); + } +} + +/* Run for each cpu except the first one, to bring the others up. */ +int __cpu_up(unsigned int cpu, struct task_struct *idle_task) +{ + /* Use 16-byte aligned stack to be able to call C functions. */ + unsigned long stack_start = (unsigned long)idle_task & -16; + + task_thread_info(idle_task)->cpu = cpu; + + /* Needed so that __switch_to does not create a new Wasm task. */ + task_thread_info(idle_task)->flags &= ~_TIF_NEVER_RUN; + + reinit_completion(&cpu_running); + + /* Will create a new Wasm instance and call start_secondary(). */ + wasm_start_cpu(cpu, idle_task, (unsigned long)stack_start); + + /* Wait for CPU to finish startup & mark itself online before return. */ + wait_for_completion(&cpu_running); + return 0; +} + +/* + * First thing to run on the secondary CPUs. + * + * Launched by __cpu_up(), which calls out to the Wasm host. The Wasm host calls + * _start_secondary, which sets up the __stack_pointer and then calls us. + */ +__visible void start_secondary(void) +{ + unsigned int cpu = smp_processor_id(); + + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); + + enable_percpu_irq(WASM_IRQ_IPI, IRQ_TYPE_NONE); + + /* + * Notify boot CPU that we're up & online and it can safely return + * from __cpu_up(). IPIs need to be enabled (enable_percpu_irq above). + */ + complete(&cpu_running); + + wasm_clockevent_enable(); + + local_irq_enable(); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); /* Enter idle. */ +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); +} + +__visible void raise_interrupt(int cpu, int irq_nr) +{ + /* + * Note: this function may be called independent of the kernel, outside + * any CPU or taks. Do not call kernel functions in here! + * + * per_cpu_ptr() is however safe to call (unlike e.g. this_cpu_ptr()). + */ + unsigned int *raised_irqs_ptr = per_cpu_ptr(&raised_irqs, cpu); + + if (irq_nr >= NR_IRQS) + return; + + __atomic_or_fetch(raised_irqs_ptr, 1U << irq_nr, __ATOMIC_SEQ_CST); + __builtin_wasm_memory_atomic_notify(raised_irqs_ptr, 1U); +} + +static void send_ipi_message(int cpu, enum ipi_type ipi) +{ + unsigned int *raised_ipis_ptr = per_cpu_ptr(&raised_ipis, cpu); + __atomic_or_fetch(raised_ipis_ptr, IPI_MASK(ipi), __ATOMIC_SEQ_CST); + + raise_interrupt(cpu, WASM_IRQ_IPI); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + /* + * This is not ideal, as these can only be handled when the other CPU is + * idle, but it allows using the kernel completion API when there is + * really only one task running. This is the case when the primary CPU + * is booting up secondaries, waiting for them to wake up and finish + * their part of the boot process. Ideally, we'd get rid of IPI calls. + */ + + preempt_disable(); + send_ipi_message(cpu, IPI_CALL_FUNC); + preempt_enable(); +} + +void arch_smp_send_reschedule(int cpu) +{ + preempt_disable(); + send_ipi_message(cpu, IPI_RESCHEDULE); + preempt_enable(); +} + +extern void arch_irq_work_raise(void) +{ + /* This is a special IPI sent to ourselves, to break out of context. */ + + preempt_disable(); + send_ipi_message(smp_processor_id(), IPI_IRQ_WORK); + preempt_enable(); +} + +void tick_broadcast(const struct cpumask *mask) +{ + int cpu; + + preempt_disable(); + + for_each_cpu(cpu, mask) { + send_ipi_message(cpu, IPI_RECEIVE_BROADCAST); + } + + preempt_enable(); +} + +void wasm_program_timer(unsigned long delta) +{ + unsigned long long now; + unsigned long long expiry = 0ULL; + + unsigned int *raised_irqs_ptr = this_cpu_ptr(&raised_irqs); + long long *expiry_ptr = this_cpu_ptr(&local_timer_expiries); + + if (delta == 0UL) { + /* Optimization: set expiry to 0 to immediately expire. */ + } else { + now = wasm_cpu_clock_get_monotonic(); + expiry = now + (unsigned long long)delta; + + /* + * This overflow will realistically never happen. Calling panic + * instead of returning a non-zero value is warranted, as the + * calling code would otherwise enter an infinite loop... + */ + if (expiry < now || expiry > (unsigned long long)LLONG_MAX) + panic("clockevent expiry too large"); + } + + __atomic_store_n(expiry_ptr, (long long)expiry, __ATOMIC_SEQ_CST); + + /* + * We notify on raised_irqs since that's what we're waiting on in the + * idle loop. It does not matter if it's still 0 - it will wake anyway. + */ + __builtin_wasm_memory_atomic_notify(raised_irqs_ptr, 1U); +} + +static irqreturn_t handle_IPI(int irq_nr, void *dev_id) +{ + unsigned int *ipi_mask_ptr = dev_id; + unsigned int ipi_mask = __atomic_exchange_n(ipi_mask_ptr, 0U, + __ATOMIC_SEQ_CST); + + if (ipi_mask & IPI_MASK(IPI_RECEIVE_BROADCAST)) { + /* Useful in NO_HZ_FULL case where no task is running. */ + tick_receive_broadcast(); + } + + if (ipi_mask & IPI_MASK(IPI_CALL_FUNC)) + generic_smp_call_function_interrupt(); + + if (ipi_mask & IPI_MASK(IPI_RESCHEDULE)) + scheduler_ipi(); + + if (ipi_mask & IPI_MASK(IPI_IRQ_WORK)) + irq_work_run(); + + return IRQ_HANDLED; +} + +void __init setup_smp_ipi(void) +{ + /* This is run on the boot cpu only. We need to enable others later. */ + + if (request_percpu_irq(WASM_IRQ_IPI, handle_IPI, "IPI", &raised_ipis)) + panic("Failed to register IPI IRQ"); + + enable_percpu_irq(WASM_IRQ_IPI, IRQ_TYPE_NONE); +} + +void arch_cpu_idle(void) +{ + /* Note: The idle task will not migrate so per_cpu state is stable. */ + unsigned int *raised_irqs_ptr = this_cpu_ptr(&raised_irqs); + unsigned int raised_irqs; + long long *expiry_ptr = this_cpu_ptr(&local_timer_expiries); + long long expiry; + long long timeout; + unsigned long long now; + int irq_nr; + + /* + * This function is supposed to sleep until an interrupt comes in. The + * fact these events can only be detected from the idle task makes these + * "interrupts" unreliable unless there are no tasks on this CPU's + * runqueue at all times. Therefore, one CPU (IRQ_CPU) is dedicated to + * handle interrupts only, no user tasks are allowed to run on it. + * + * Additionally, the clockevent subsystem can wake us, either because it + * wants to program a new timer expiry (arming or re-arming the timer), + * or because an already armed timer is expiring. The clockevent + * subsystem can also request a broadcast - i.e. waking up other CPUs + * from a dedicated broadcast device (living on IRQ_CPU). It's important + * that all CPUs can handle programming of timers, since it's being used + * when the system boots (before NO_HZ_IDLE kicks in). Additionally, + * some kernel functions (e.g. schedule_timeout()) rely on timers to + * wake them up when no task is running on the CPU. These events and + * broadcasts will of course happen in a best-effort fashion on CPUs + * where there are tasks running, as they cannot be stopped. + * + * Wasm-specific wait primitives are used so that the Wasm VM can yield + * to the host OS. In a sense, it's like calling schedule(), but on the + * host. Callling schedule() here would just send us back, busy-waiting. + */ + for (;;) { + expiry = __atomic_load_n(expiry_ptr, __ATOMIC_SEQ_CST); + +reprocess: + if (expiry > 0LL) { + now = wasm_cpu_clock_get_monotonic(); + + /* This will realistically never happen. */ + if (now > (unsigned long long)LLONG_MAX) + panic("time is too far into the future"); + + if ((long long)now >= expiry) + timeout = 0LL; + else + timeout = expiry - now; + } else { + /* + * Just like magic: + * If expiry is 0 => timeout becomes 0. + * If expiry is forever => timeout becomes forever. + */ + timeout = expiry; + } + + /* timeout == 0 iff the timer expired this iteration */ + if (timeout == 0LL) { + /* + * It may be tempting to raise the timer interrupt + * already here, but that would not comply with the + * clockevent API, which mandates that re-programming + * of the device also cancels any pending event first. + */ + + /* Try resetting the timer to never expire. */ + if (!__atomic_compare_exchange_n(expiry_ptr, &expiry, + TIMER_NEVER_EXPIRE, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { + /* + * Expiry changed under our rug - re-process it. + * This goto is slightly faster than "continue;" + * as the compare-and-swap above will already + * have loaded the new expiry value on failure. + */ + goto reprocess; + } + + raise_interrupt(smp_processor_id(), WASM_IRQ_TIMER); + + if (smp_processor_id() != IRQ_CPU) + timeout = TIMER_NEVER_EXPIRE; + } + + if (timeout != 0LL) + __builtin_wasm_memory_atomic_wait32(raised_irqs_ptr, 0U, + timeout); + + raised_irqs = __atomic_exchange_n(raised_irqs_ptr, 0U, + __ATOMIC_SEQ_CST); + + /* + * In the case of some raised_irqs, handle it, then we will come + * back here in a future invocation of this function. This + * function retuns so that that idle framework can do its job, + * for example if TIF_NEEDS_RESCHED is set by some IPI. + */ + if (raised_irqs) + break; + } + + irq_nr = 0; + while (raised_irqs) { + if (raised_irqs & 1U) + do_irq_stacked(irq_nr); + + raised_irqs >>= 1; + ++irq_nr; + } +} diff --git a/arch/wasm/kernel/stack.c b/arch/wasm/kernel/stack.c new file mode 100644 index 000000000..a0f1c314b --- /dev/null +++ b/arch/wasm/kernel/stack.c @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include + +struct task_struct *alloc_task_struct_node(int node) +{ + struct page *page = + alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); + + if (unlikely(!page)) + return NULL; + + return (struct task_struct *)((unsigned long)page_address(page) + + ALIGN_DOWN(THREAD_SIZE - sizeof(struct task_struct), + L1_CACHE_BYTES)); +} + +void free_task_struct(struct task_struct *tsk) +{ + free_pages((unsigned long)tsk & THREAD_MASK, THREAD_SIZE_ORDER); +} diff --git a/arch/wasm/kernel/sys_wasm.c b/arch/wasm/kernel/sys_wasm.c new file mode 100644 index 000000000..c3b2404e0 --- /dev/null +++ b/arch/wasm/kernel/sys_wasm.c @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +// SYS_mmap2() +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, unsigned long, fd, + unsigned long, pgoff) +{ + /* + * The "page size" for mmap2 should always be 4K (shift 12). Some + * architectures use their native page size or whatnot, and that's why + * this syscall exists in its own version for each architecture... + * + * Some architectures check the alignment, but that's out of spec. + */ + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); +} diff --git a/arch/wasm/kernel/syscall_table.c b/arch/wasm/kernel/syscall_table.c new file mode 100644 index 000000000..3c4e9b5e6 --- /dev/null +++ b/arch/wasm/kernel/syscall_table.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +/* + * We should probably use some soft variant of CONFIG_COMPAT yet to be invented. + * TODO: This hack should be replaced with proper selection of compat syscalls! + */ + + static long sys_truncate64_fixup(const char __user *pathname, + unsigned long length_lo, + unsigned long length_hi) +{ + return sys_truncate64(pathname, + ((unsigned long long)length_hi << 32) | length_lo); +} + +static long sys_ftruncate64_fixup(unsigned int fd, + unsigned long length_lo, + unsigned long length_hi) +{ + return sys_ftruncate64(fd, + ((unsigned long long)length_hi << 32) | length_lo); +} + +void (* const sys_call_table[__NR_syscalls])(void) = { + [0 ... __NR_syscalls-1] = (void (*)(void))sys_ni_syscall, + +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (void (*)(void))(call), +#include + + [__NR_truncate64] = (void (*)(void))sys_truncate64_fixup, + [__NR_ftruncate64] = (void (*)(void))sys_ftruncate64_fixup, +}; diff --git a/arch/wasm/kernel/time.c b/arch/wasm/kernel/time.c new file mode 100644 index 000000000..af65bc3f0 --- /dev/null +++ b/arch/wasm/kernel/time.c @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include +#include +#include + +extern unsigned long long wasm_cpu_clock_get_monotonic(void); + +/* Wasm clock source: derived from Wasm host cpu clock (monotonic). */ + +static unsigned long long wasm_clocksource_read(struct clocksource *cs) +{ + return wasm_cpu_clock_get_monotonic(); +} + +static struct clocksource wasm_clocksource = { + .name = "wasm_cpu_clock", + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .rating = 200, + .read = wasm_clocksource_read, + .mask = CLOCKSOURCE_MASK(64), +}; + +static int __init wasm_clocksource_init(void) +{ + return clocksource_register_khz(&wasm_clocksource, 1000000U /* 1 ns */); +} + +/* Wasm clock event: derived from Wasm atomic wait timeouts (in smp.c). */ + +static int wasm_clockevent_set_next_event(unsigned long delta, + struct clock_event_device *dev) +{ + wasm_program_timer(delta); + return 0; +} + +static DEFINE_PER_CPU(struct clock_event_device, wasm_clockevents) = { + .name = "wasm_timer", + .features = CLOCK_EVT_FEAT_ONESHOT, + .rating = 300, + .set_next_event = wasm_clockevent_set_next_event, +}; + +static irqreturn_t __irq_entry wasm_timer_interrupt(int irq_nr, void *dev_id) +{ + struct clock_event_device *ce_dev = dev_id; + + ce_dev->event_handler(ce_dev); + + return IRQ_HANDLED; +} + +static int __init wasm_clockevent_init(void) +{ + /* Requested here, enabled in wasm_clockevent_enable() for each cpu. */ + return request_percpu_irq(WASM_IRQ_TIMER, wasm_timer_interrupt, + "wasm-timer", &wasm_clockevents); +} + +void wasm_clockevent_enable(void) +{ + struct clock_event_device *ce_dev = this_cpu_ptr(&wasm_clockevents); + + ce_dev->cpumask = cpumask_of(smp_processor_id()); + ce_dev->irq = WASM_IRQ_TIMER; + clockevents_config_and_register(ce_dev, 1000000000, 0, ~0U); + + enable_percpu_irq(WASM_IRQ_TIMER, IRQ_TYPE_NONE); +} + +/* Called very early in the boot, only CPU 0 is up so far! */ +void __init time_init(void) +{ + /* Time is an illusion and yet here we are... */ + if (wasm_clocksource_init()) + panic("Failed to initialize Wasm clocksource"); + + if (wasm_clockevent_init()) + panic("Failed to initialize Wasm clock_event"); + + /* Only for CPU 0, secondaries will be enabled as they come up. */ + wasm_clockevent_enable(); +} diff --git a/arch/wasm/kernel/traps.c b/arch/wasm/kernel/traps.c new file mode 100644 index 000000000..928a2338a --- /dev/null +++ b/arch/wasm/kernel/traps.c @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include + +static inline void exception_enter(struct pt_regs *regs) +{ + unsigned long *cpuflags = this_cpu_ptr(&wasm_cpuflags); + + regs->cpuflags = *cpuflags; + *cpuflags &= ~(BIT(CPUFLAGS_USER_MODE) | BIT(CPUFLAGS_INTERRUPT)); +} + +static inline void exception_exit(struct pt_regs *regs) +{ + unsigned long *cpuflags = this_cpu_ptr(&wasm_cpuflags); + + *cpuflags = regs->cpuflags; +} + +#define WASM_SYSCALL_N(x, args, cast_args, ...) \ + __visible long __wasm_syscall_##x args \ + { \ + long syscall = n; \ + struct pt_regs *regs = current_pt_regs(); \ + void (*fn)(void); \ + long syscall_args[] = {__MAP(x,__SC_ARGS,__VA_ARGS__)}; \ + bool restart; \ + \ + exception_enter(regs); \ + \ + regs->syscall_nr = n; \ + memcpy(regs->syscall_args, syscall_args, sizeof(syscall_args)); \ + regs->syscall_ret = -ENOSYS; \ + \ + if (user_mode(regs)) { \ + do { \ + syscall = syscall_enter_from_user_mode( \ + regs, syscall); \ + \ + if (syscall >= 0 && syscall < __NR_syscalls) { \ + fn = sys_call_table[syscall]; \ + if (syscall == __NR_restart_syscall) { \ + regs->syscall_ret = sys_restart_syscall(); \ + } else if (fn != (void (*)(void))sys_ni_syscall) { \ + regs->syscall_ret = ((long (*)(cast_args)) \ + fn)(__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + } \ + } \ + \ + syscall_exit_to_user_mode(regs); \ + \ + switch (regs->syscall_ret) { \ + case -ERESTART_RESTARTBLOCK: \ + syscall = __NR_restart_syscall; \ + fallthrough; \ + case -ERESTARTNOHAND: \ + case -ERESTARTSYS: \ + case -ERESTARTNOINTR: \ + restart = true; \ + break; \ + default: \ + restart = false; \ + } \ + } while (restart); \ + } else { \ + irqentry_state_t state = irqentry_nmi_enter(regs); \ + \ + panic("Syscall called when in kernel mode"); \ + \ + irqentry_nmi_exit(regs, state); \ + } \ + \ + exception_exit(regs); \ + \ + return regs->syscall_ret; \ + } +#define WASM_SYSCALL(x, ...) WASM_SYSCALL_N( \ + x, \ + (long n, __MAP(x,__SC_DECL,__VA_ARGS__)), \ + __MAP(x,__SC_DECL,__VA_ARGS__), \ + __VA_ARGS__) + +WASM_SYSCALL_N(0, (long n), void) +WASM_SYSCALL(1, long, a) +WASM_SYSCALL(2, long, a, long, b) +WASM_SYSCALL(3, long, a, long, b, long, c) +WASM_SYSCALL(4, long, a, long, b, long, c, long, d) +WASM_SYSCALL(5, long, a, long, b, long, c, long, d, long, e) +WASM_SYSCALL(6, long, a, long, b, long, c, long, d, long, e, long, f) + +/* + * Final check before syscall return (after pt_regs have been restored). + * + * If exec() was called, we reload user program code. If there is a signal + * handler to call, we call it. (Both will not happen, as exec blocks handlers.) + * + * Returns the direction of program flow: + * -1 if exec() was called and the Wasm host should reload the user program. + * 1 if a signal was delivered => the Wasm host should start signal handling. + * 2 if a sigreturn happened => the Wasm host should cancel signal handling. + * 3 if a signal was delivered AND a sigreturn (of an older signal handler) + * happened => the Wasm host should first handle the new signal (stacked), + * then cancel the old signal handler (after the stacked signal returns). + * 0 if nothing should be done and the syscall should return normally. + * In the case of exec(), the syscall should never fully return to the caller. + */ +int user_mode_tail(void) +{ + struct thread_info *thread_info = current_thread_info(); + const bool reload = thread_info->flags & _TIF_RELOAD_PROGRAM; + const bool deliver = thread_info->flags & _TIF_DELIVER_SIGNAL; + const bool retn = thread_info->flags & _TIF_RETURN_SIGNAL; + + if (reload) { + BUG_ON(deliver); + BUG_ON(retn); + + thread_info->flags &= ~_TIF_RELOAD_PROGRAM; + return -1; + } else if (deliver || retn) { + BUG_ON(reload); + + if (deliver) + thread_info->flags &= ~_TIF_DELIVER_SIGNAL; + + if (retn) + thread_info->flags &= ~_TIF_RETURN_SIGNAL; + + return (deliver ? 1 : 0) | (retn ? 2 : 0); + } + + return 0; +} + +static void do_irq(struct pt_regs *regs, int irq_nr) +{ + struct pt_regs *old_regs; + irqentry_state_t state = irqentry_enter(regs); + + irq_enter_rcu(); + old_regs = set_irq_regs(regs); + generic_handle_irq(irq_nr); + set_irq_regs(old_regs); + irq_exit_rcu(); + + irqentry_exit(regs, state); +} + +void do_irq_stacked(int irq_nr) +{ + /* + * This is a bit odd but somewhere in this function's frame we start an + * exception frame. Exactly where the boundary is does not matter in + * practice, some data may end up on either "wrong" end of the boundary. + */ + struct pt_regs regs = PT_REGS_INIT; + regs.stack_pointer = (unsigned long)®s + sizeof(regs); + exception_enter(®s); + + do_irq(®s, irq_nr); + + exception_exit(®s); +} + +/* Do an exception. There are currently no exception types in Wasm. */ +static void do_exception(struct pt_regs *regs) +{ + /* + * The host is currently responsible for reporting the full error. We + * just mark this error as SIGILL but it could be anything. + */ + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)0U); + irqentry_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + make_task_dead(SIGILL); + irqentry_nmi_exit(regs, state); + } +} + +/* +* This function is called from the host when things break either in kernel code +* or user code. That code will never continue to execute - we have to report the +* error and try to recover in the best way possible. +*/ +__visible void raise_exception(void) +{ + /* + * This is a bit odd but somewhere in this function's frame we start an + * exception frame. Exactly where the boundary is does not matter in + * practice, some data may end up on either "wrong" end of the boundary. + */ + struct pt_regs regs = PT_REGS_INIT; + regs.stack_pointer = (unsigned long)®s + sizeof(regs); + exception_enter(®s); + + do_exception(®s); + + exception_exit(®s); +} diff --git a/arch/wasm/kernel/vmlinux.lds.S b/arch/wasm/kernel/vmlinux.lds.S new file mode 100644 index 000000000..1ae0641dc --- /dev/null +++ b/arch/wasm/kernel/vmlinux.lds.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +/* Put init_task after init_stack in the thread stack. */ +#define INIT_TASK_OFFSET THREAD_TASK_STRUCT_OFFSET + +#include +#include +#include +#include + +SECTIONS +{ + /* To refer to addres 0 in assembly, but as a relocation. */ + zeroptr = 0; + + /* Begin 1 Wasm page (65k) in so that we can dodge null-pointer. */ + . = 0x10000; + + __init_begin = .; + HEAD_TEXT_SECTION + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) + PERCPU_SECTION(L1_CACHE_BYTES) + __init_end = .; + + .text : { + _text = .; + _stext = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + ENTRY_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + _etext = .; + } + + _sdata = .; + RO_DATA(PAGE_SIZE) + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + _edata = .; + + EXCEPTION_TABLE(16) + + BSS_SECTION(0, 0, 0) + _end = .; + +/* + Not supported by wasm-ld linker script hack: + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS +*/ + + DISCARDS // must be the last +} + +/* + * Due to the way linker scripts are implemented in wasm-ld, any symbol-alias + * assignments have to happen after the symbol has been placed into the output. + */ +jiffies = jiffies_64; diff --git a/arch/wasm/lib/Makefile b/arch/wasm/lib/Makefile new file mode 100644 index 000000000..8e4e35012 --- /dev/null +++ b/arch/wasm/lib/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +lib-y += delay.o diff --git a/arch/wasm/lib/delay.c b/arch/wasm/lib/delay.c new file mode 100644 index 000000000..4db76b463 --- /dev/null +++ b/arch/wasm/lib/delay.c @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +void calibrate_delay(void) +{ + /* Wasm convention: lpj = nanoseconds per Hz */ + loops_per_jiffy = 1000000000 / HZ; +} + +void __delay(unsigned long cycles) +{ + unsigned int dummy = 0U; + + mb(); + __builtin_wasm_memory_atomic_wait32(&dummy, 0U, (long long)cycles); + mb(); +} +EXPORT_SYMBOL(__delay); diff --git a/arch/wasm/mm/Makefile b/arch/wasm/mm/Makefile new file mode 100644 index 000000000..661744a43 --- /dev/null +++ b/arch/wasm/mm/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-y += init.o diff --git a/arch/wasm/mm/init.c b/arch/wasm/mm/init.c new file mode 100644 index 000000000..5469d62e5 --- /dev/null +++ b/arch/wasm/mm/init.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); + +void __init mem_init(void) +{ + /* These are needed by some code to know which pages are valid. */ + high_memory = (void *)memory_end; + max_pfn = PFN_DOWN(memory_end); + min_low_pfn = PFN_DOWN(memory_start); + max_low_pfn = max_pfn; + set_max_mapnr(max_low_pfn - min_low_pfn); + + memblock_free_all(); +} diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b331a3947..86d5a3ee4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -404,11 +404,15 @@ . = ALIGN(align); \ *(.data..cacheline_aligned) +#ifndef INIT_TASK_OFFSET +#define INIT_TASK_OFFSET (0) +#endif #define INIT_TASK_DATA(align) \ . = ALIGN(align); \ __start_init_task = .; \ init_thread_union = .; \ init_stack = .; \ + . = . + INIT_TASK_OFFSET; \ KEEP(*(.data..init_task)) \ KEEP(*(.data..init_thread_info)) \ . = __start_init_task + THREAD_SIZE; \ diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index d676ed2b2..f0aee0f22 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -437,6 +437,7 @@ enum { #define AUDIT_ARCH_TILEGX32 (EM_TILEGX|__AUDIT_ARCH_LE) #define AUDIT_ARCH_TILEPRO (EM_TILEPRO|__AUDIT_ARCH_LE) #define AUDIT_ARCH_UNICORE (EM_UNICORE|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_WASM32 (EM_WASM32|__AUDIT_ARCH_LE) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_XTENSA (EM_XTENSA) #define AUDIT_ARCH_LOONGARCH32 (EM_LOONGARCH|__AUDIT_ARCH_LE) diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index ef38c2bc5..aaca659a7 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -52,6 +52,7 @@ #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ #define EM_CSKY 252 /* C-SKY */ #define EM_LOONGARCH 258 /* LoongArch */ +#define EM_WASM32 264 /* WebAssembly wasm32 */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ /* diff --git a/scripts/Makefile.clang b/scripts/Makefile.clang index 058a4c0f8..4253c0177 100644 --- a/scripts/Makefile.clang +++ b/scripts/Makefile.clang @@ -9,6 +9,7 @@ CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu +CLANG_TARGET_FLAGS_wasm := wasm32-unknown-unknown CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu CLANG_TARGET_FLAGS_um := $(CLANG_TARGET_FLAGS_$(SUBARCH)) CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(SRCARCH)) diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 0edfdb403..9fdae5e6d 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -44,12 +44,20 @@ objtool-args = $(vmlinux-objtool-args-y) --link # Link of vmlinux.o used for section mismatch analysis # --------------------------------------------------------------------------- +ifneq ($(ARCH),wasm) + circular-resolved-libs = -start-group $(KBUILD_VMLINUX_LIBS) --end-group +else + # LLVM wasm-ld does not support --start-group and --end-group. This is + # not as good as grouping them, but it might just work! + circular-resolved-libs = $(KBUILD_VMLINUX_LIBS) $(KBUILD_VMLINUX_LIBS) +endif + quiet_cmd_ld_vmlinux.o = LD $@ cmd_ld_vmlinux.o = \ $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ $(addprefix -T , $(initcalls-lds)) \ --whole-archive vmlinux.a --no-whole-archive \ - --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ + $(circular-resolved-libs) \ $(cmd_objtool) define rule_ld_vmlinux.o diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index a432b171b..023eed789 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -66,7 +66,15 @@ vmlinux_link() libs= else objs=vmlinux.a - libs="${KBUILD_VMLINUX_LIBS}" + + if [ "${ARCH}" = "wasm" ]; then + # LLVM wasm-ld does not support --start-group and + # --end-group. This is not as good as grouping them, but + # it might just work! + libs="${KBUILD_VMLINUX_LIBS} ${KBUILD_VMLINUX_LIBS}" + else + libs="${wl}--start-group ${KBUILD_VMLINUX_LIBS} ${wl}--end-group" + fi fi if is_enabled CONFIG_MODULES; then @@ -87,6 +95,16 @@ vmlinux_link() ldlibs= fi + # wasm-ld has very simple linker scripts and needs some extra setup. + if [ "${ARCH}" = "wasm" ]; then + ldflags="${ldflags} --no-entry --error-limit=0" + ldflags="${ldflags} --export-all --export-table" + ldflags="${ldflags} --no-merge-data-segments -no-gc-sections" + ldflags="${ldflags} --import-memory --shared-memory" + ldflags="${ldflags} --max-memory=$((1<<32))" + ldflags="${ldflags} --import-undefined" + fi + ldflags="${ldflags} ${wl}--script=${objtree}/${KBUILD_LDS}" # The kallsyms linking does not need debug symbols included. @@ -100,8 +118,7 @@ vmlinux_link() ${ld} ${ldflags} -o ${output} \ ${wl}--whole-archive ${objs} ${wl}--no-whole-archive \ - ${wl}--start-group ${libs} ${wl}--end-group \ - $@ ${ldlibs} + ${libs} $@ ${ldlibs} } # generate .BTF typeinfo from DWARF debuginfo -- 2.25.1