linux-wasm/patches/llvm/0001-Hack-patch-to-allow-GNU-ld-style-linker-scripts-in-w.patch
2025-10-31 18:38:01 +01:00

2221 lines
72 KiB
Diff

From 939b82d11279ee2700c19c41a770051cc28e045f Mon Sep 17 00:00:00 2001
From: Joel Severin <joel.severin@icemanor.se>
Date: Tue, 26 Mar 2024 12:37:11 +0100
Subject: [PATCH] Hack patch to allow GNU ld style linker scripts in wasm-ld
---
lld/wasm/CMakeLists.txt | 2 +
lld/wasm/Config.h | 1 +
lld/wasm/Driver.cpp | 30 +-
lld/wasm/InputFiles.cpp | 4 +
lld/wasm/Options.td | 2 +
lld/wasm/ScriptLexer.cpp | 328 ++++++++++++
lld/wasm/ScriptLexer.h | 56 ++
lld/wasm/ScriptParser.cpp | 1056 +++++++++++++++++++++++++++++++++++++
lld/wasm/ScriptParser.h | 341 ++++++++++++
lld/wasm/Writer.cpp | 241 ++++++++-
10 files changed, 2051 insertions(+), 10 deletions(-)
create mode 100644 lld/wasm/ScriptLexer.cpp
create mode 100644 lld/wasm/ScriptLexer.h
create mode 100644 lld/wasm/ScriptParser.cpp
create mode 100644 lld/wasm/ScriptParser.h
diff --git a/lld/wasm/CMakeLists.txt b/lld/wasm/CMakeLists.txt
index 6033bfbf9..53048d818 100644
--- a/lld/wasm/CMakeLists.txt
+++ b/lld/wasm/CMakeLists.txt
@@ -12,6 +12,8 @@ add_lld_library(lldWasm
OutputSections.cpp
OutputSegment.cpp
Relocations.cpp
+ ScriptLexer.cpp
+ ScriptParser.cpp
SymbolTable.cpp
Symbols.cpp
SyntheticSections.cpp
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 97c508bda..e42ffdb94 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -57,6 +57,7 @@ struct Configuration {
bool growableTable;
bool gcSections;
llvm::StringSet<> keepSections;
+ std::optional<llvm::MemoryBufferRef> linkerScript;
std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
std::optional<llvm::StringRef> memoryExport;
bool sharedMemory;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 635f19f78..af849f9b7 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -327,6 +327,15 @@ static std::optional<std::string> findFromSearchPaths(StringRef path) {
return std::nullopt;
}
+// If a linker/version script doesn't exist in the current directory, we also
+// look for the script in the '-L' search paths. This matches the behaviour of
+// '-T', --version-script=, and linker script INPUT() command in ld.bfd.
+static std::optional<std::string> searchScript(StringRef name) {
+ if (fs::exists(name))
+ return name.str();
+ return findFromSearchPaths(name);
+}
+
// This is for -l<basename>. We'll look for lib<basename>.a from
// search paths.
static std::optional<std::string> searchLibraryBaseName(StringRef name) {
@@ -388,6 +397,13 @@ void LinkerDriver::createFiles(opt::InputArgList &args) {
error("stray --end-lib");
inLib = false;
break;
+ case OPT_script:
+ if (std::optional<std::string> path = searchScript(arg->getValue())) {
+ config->linkerScript = readFile(*path);
+ } else {
+ error(Twine("cannot find linker script ") + arg->getValue());
+ }
+ break;
}
}
if (files.empty() && errorCount() == 0)
@@ -617,12 +633,6 @@ static void setConfigs() {
// pointer.
if (!config->tableBase)
config->tableBase = 1;
- // The default offset for static/global data, for when --global-base is
- // not specified on the command line. The precise value of 1024 is
- // somewhat arbitrary, and pre-dates wasm-ld (Its the value that
- // emscripten used prior to wasm-ld).
- if (!config->globalBase && !config->relocatable && !config->stackFirst)
- config->globalBase = 1024;
}
if (config->relocatable) {
@@ -1195,6 +1205,14 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (errorCount())
return;
+ // The default offset for static/global data, for when --global-base is
+ // not specified on the command line. The precise value of 1024 is
+ // somewhat arbitrary, and pre-dates wasm-ld (Its the value that
+ // emscripten used prior to wasm-ld).
+ if (!config->globalBase && !config->linkerScript && !ctx.isPic &&
+ !config->relocatable && !config->stackFirst)
+ config->globalBase = 1024;
+
checkOptions(args);
if (errorCount())
return;
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index f5e946aca..db768fe63 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -362,6 +362,10 @@ static bool shouldMerge(const WasmSection &sec) {
}
static bool shouldMerge(const WasmSegment &seg) {
+ // No merge chunks when using linker scripts.
+ if (config->linkerScript)
+ return false;
+
// As of now we only support merging strings, and only with single byte
// alignment (2^0).
if (!(seg.Data.LinkingFlags & WASM_SEG_FLAG_STRINGS) ||
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index 8190717ce..bc1cd6b74 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -294,6 +294,8 @@ def thinlto_jobs: JJ<"thinlto-jobs=">,
def lto_debug_pass_manager: FF<"lto-debug-pass-manager">,
HelpText<"Debug new pass manager">;
+defm script: Eq<"script", "Use linker script ((very) limited support for GNU ld/ELF linker scripts)">;
+
// Experimental PIC mode.
def experimental_pic: FF<"experimental-pic">,
HelpText<"Enable Experimental PIC">;
diff --git a/lld/wasm/ScriptLexer.cpp b/lld/wasm/ScriptLexer.cpp
new file mode 100644
index 000000000..49fb05a2e
--- /dev/null
+++ b/lld/wasm/ScriptLexer.cpp
@@ -0,0 +1,328 @@
+//===- ScriptLexer.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a lexer for the linker script.
+//
+// The linker script's grammar is not complex but ambiguous due to the
+// lack of the formal specification of the language. What we are trying to
+// do in this and other files in LLD is to make a "reasonable" linker
+// script processor.
+//
+// Among simplicity, compatibility and efficiency, we put the most
+// emphasis on simplicity when we wrote this lexer. Compatibility with the
+// GNU linkers is important, but we did not try to clone every tiny corner
+// case of their lexers, as even ld.bfd and ld.gold are subtly different
+// in various corner cases. We do not care much about efficiency because
+// the time spent in parsing linker scripts is usually negligible.
+//
+// Our grammar of the linker script is LL(2), meaning that it needs at
+// most two-token lookahead to parse. The only place we need two-token
+// lookahead is labels in version scripts, where we need to parse "local :"
+// as if "local:".
+//
+// Overall, this lexer works fine for most linker scripts. There might
+// be room for improving compatibility, but that's probably not at the
+// top of our todo list.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScriptLexer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::wasm;
+
+// Returns a whole line containing the current token.
+StringRef ScriptLexer::getLine() {
+ StringRef s = getCurrentMB().getBuffer();
+ StringRef tok = tokens[pos - 1];
+
+ size_t pos = s.rfind('\n', tok.data() - s.data());
+ if (pos != StringRef::npos)
+ s = s.substr(pos + 1);
+ return s.substr(0, s.find_first_of("\r\n"));
+}
+
+// Returns 1-based line number of the current token.
+size_t ScriptLexer::getLineNumber() {
+ if (pos == 0)
+ return 1;
+ StringRef s = getCurrentMB().getBuffer();
+ StringRef tok = tokens[pos - 1];
+ const size_t tokOffset = tok.data() - s.data();
+
+ // For the first token, or when going backwards, start from the beginning of
+ // the buffer. If this token is after the previous token, start from the
+ // previous token.
+ size_t line = 1;
+ size_t start = 0;
+ if (lastLineNumberOffset > 0 && tokOffset >= lastLineNumberOffset) {
+ start = lastLineNumberOffset;
+ line = lastLineNumber;
+ }
+
+ line += s.substr(start, tokOffset - start).count('\n');
+
+ // Store the line number of this token for reuse.
+ lastLineNumberOffset = tokOffset;
+ lastLineNumber = line;
+
+ return line;
+}
+
+// Returns 0-based column number of the current token.
+size_t ScriptLexer::getColumnNumber() {
+ StringRef tok = tokens[pos - 1];
+ return tok.data() - getLine().data();
+}
+
+std::string ScriptLexer::getCurrentLocation() {
+ std::string filename = std::string(getCurrentMB().getBufferIdentifier());
+ return (filename + ":" + Twine(getLineNumber())).str();
+}
+
+ScriptLexer::ScriptLexer(MemoryBufferRef mb) { tokenize(mb); }
+
+// We don't want to record cascading errors. Keep only the first one.
+void ScriptLexer::setError(const Twine &msg) {
+ if (errorCount())
+ return;
+
+ std::string s = (getCurrentLocation() + ": " + msg).str();
+ if (pos)
+ s += "\n>>> " + getLine().str() + "\n>>> " +
+ std::string(getColumnNumber(), ' ') + "^";
+ error(s);
+}
+
+// Split S into linker script tokens.
+void ScriptLexer::tokenize(MemoryBufferRef mb) {
+ std::vector<StringRef> vec;
+ mbs.push_back(mb);
+ StringRef s = mb.getBuffer();
+ StringRef begin = s;
+
+ for (;;) {
+ s = skipSpace(s);
+ if (s.empty())
+ break;
+
+ // Quoted token. Note that double-quote characters are parts of a token
+ // because, in a glob match context, only unquoted tokens are interpreted
+ // as glob patterns. Double-quoted tokens are literal patterns in that
+ // context.
+ if (s.starts_with("\"")) {
+ size_t e = s.find("\"", 1);
+ if (e == StringRef::npos) {
+ StringRef filename = mb.getBufferIdentifier();
+ size_t lineno = begin.substr(0, s.data() - begin.data()).count('\n');
+ error(filename + ":" + Twine(lineno + 1) + ": unclosed quote");
+ return;
+ }
+
+ vec.push_back(s.take_front(e + 1));
+ s = s.substr(e + 1);
+ continue;
+ }
+
+ // Some operators form separate tokens.
+ if (s.starts_with("<<=") || s.starts_with(">>=")) {
+ vec.push_back(s.substr(0, 3));
+ s = s.substr(3);
+ continue;
+ }
+ if (s.size() > 1 && ((s[1] == '=' && strchr("*/+-<>&|", s[0])) ||
+ (s[0] == s[1] && strchr("<>&|", s[0])))) {
+ vec.push_back(s.substr(0, 2));
+ s = s.substr(2);
+ continue;
+ }
+
+ // Unquoted token. This is more relaxed than tokens in C-like language,
+ // so that you can write "file-name.cpp" as one bare token, for example.
+ size_t pos = s.find_first_not_of(
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ "0123456789_.$/\\~=+[]*?-!^:");
+
+ // A character that cannot start a word (which is usually a
+ // punctuation) forms a single character token.
+ if (pos == 0)
+ pos = 1;
+ vec.push_back(s.substr(0, pos));
+ s = s.substr(pos);
+ }
+
+ tokens.insert(tokens.begin() + pos, vec.begin(), vec.end());
+}
+
+// Skip leading whitespace characters or comments.
+StringRef ScriptLexer::skipSpace(StringRef s) {
+ for (;;) {
+ if (s.starts_with("/*")) {
+ size_t e = s.find("*/", 2);
+ if (e == StringRef::npos) {
+ setError("unclosed comment in a linker script");
+ return "";
+ }
+ s = s.substr(e + 2);
+ continue;
+ }
+ if (s.starts_with("#")) {
+ size_t e = s.find('\n', 1);
+ if (e == StringRef::npos)
+ e = s.size() - 1;
+ s = s.substr(e + 1);
+ continue;
+ }
+ size_t size = s.size();
+ s = s.ltrim();
+ if (s.size() == size)
+ return s;
+ }
+}
+
+// An erroneous token is handled as if it were the last token before EOF.
+bool ScriptLexer::atEOF() { return errorCount() || tokens.size() == pos; }
+
+// Split a given string as an expression.
+// This function returns "3", "*" and "5" for "3*5" for example.
+static std::vector<StringRef> tokenizeExpr(StringRef s) {
+ StringRef ops = "!~*/+-<>?:="; // List of operators
+
+ // Quoted strings are literal strings, so we don't want to split it.
+ if (s.starts_with("\""))
+ return {s};
+
+ // Split S with operators as separators.
+ std::vector<StringRef> ret;
+ while (!s.empty()) {
+ size_t e = s.find_first_of(ops);
+
+ // No need to split if there is no operator.
+ if (e == StringRef::npos) {
+ ret.push_back(s);
+ break;
+ }
+
+ // Get a token before the operator.
+ if (e != 0)
+ ret.push_back(s.substr(0, e));
+
+ // Get the operator as a token.
+ // Keep !=, ==, >=, <=, << and >> operators as a single tokens.
+ if (s.substr(e).starts_with("!=") || s.substr(e).starts_with("==") ||
+ s.substr(e).starts_with(">=") || s.substr(e).starts_with("<=") ||
+ s.substr(e).starts_with("<<") || s.substr(e).starts_with(">>")) {
+ ret.push_back(s.substr(e, 2));
+ s = s.substr(e + 2);
+ } else {
+ ret.push_back(s.substr(e, 1));
+ s = s.substr(e + 1);
+ }
+ }
+ return ret;
+}
+
+// In contexts where expressions are expected, the lexer should apply
+// different tokenization rules than the default one. By default,
+// arithmetic operator characters are regular characters, but in the
+// expression context, they should be independent tokens.
+//
+// For example, "foo*3" should be tokenized to "foo", "*" and "3" only
+// in the expression context.
+//
+// This function may split the current token into multiple tokens.
+void ScriptLexer::maybeSplitExpr() {
+ if (!inExpr || errorCount() || atEOF())
+ return;
+
+ std::vector<StringRef> v = tokenizeExpr(tokens[pos]);
+ if (v.size() == 1)
+ return;
+ tokens.erase(tokens.begin() + pos);
+ tokens.insert(tokens.begin() + pos, v.begin(), v.end());
+}
+
+StringRef ScriptLexer::next() {
+ maybeSplitExpr();
+
+ if (errorCount())
+ return "";
+ if (atEOF()) {
+ setError("unexpected EOF");
+ return "";
+ }
+ return tokens[pos++];
+}
+
+StringRef ScriptLexer::peek() {
+ StringRef tok = next();
+ if (errorCount())
+ return "";
+ pos = pos - 1;
+ return tok;
+}
+
+StringRef ScriptLexer::peek2() {
+ skip();
+ StringRef tok = next();
+ if (errorCount())
+ return "";
+ pos = pos - 2;
+ return tok;
+}
+
+bool ScriptLexer::consume(StringRef tok) {
+ if (peek() == tok) {
+ skip();
+ return true;
+ }
+ return false;
+}
+
+// Consumes Tok followed by ":". Space is allowed between Tok and ":".
+bool ScriptLexer::consumeLabel(StringRef tok) {
+ if (consume((tok + ":").str()))
+ return true;
+ if (tokens.size() >= pos + 2 && tokens[pos] == tok &&
+ tokens[pos + 1] == ":") {
+ pos += 2;
+ return true;
+ }
+ return false;
+}
+
+void ScriptLexer::skip() { (void)next(); }
+
+void ScriptLexer::expect(StringRef expect) {
+ if (errorCount())
+ return;
+ StringRef tok = next();
+ if (tok != expect)
+ setError(expect + " expected, but got " + tok);
+}
+
+// Returns true if S encloses T.
+static bool encloses(StringRef s, StringRef t) {
+ return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
+}
+
+MemoryBufferRef ScriptLexer::getCurrentMB() {
+ // Find input buffer containing the current token.
+ assert(!mbs.empty());
+ if (pos == 0)
+ return mbs.back();
+ for (MemoryBufferRef mb : mbs)
+ if (encloses(mb.getBuffer(), tokens[pos - 1]))
+ return mb;
+ llvm_unreachable("getCurrentMB: failed to find a token");
+}
diff --git a/lld/wasm/ScriptLexer.h b/lld/wasm/ScriptLexer.h
new file mode 100644
index 000000000..33e2bbd05
--- /dev/null
+++ b/lld/wasm/ScriptLexer.h
@@ -0,0 +1,56 @@
+//===- ScriptLexer.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_WASM_SCRIPT_LEXER_H
+#define LLD_WASM_SCRIPT_LEXER_H
+
+#include "lld/Common/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include <vector>
+
+namespace lld::wasm {
+
+class ScriptLexer {
+public:
+ explicit ScriptLexer(MemoryBufferRef mb);
+
+ void setError(const Twine &msg);
+ void tokenize(MemoryBufferRef mb);
+ StringRef skipSpace(StringRef s);
+ bool atEOF();
+ StringRef next();
+ StringRef peek();
+ StringRef peek2();
+ void skip();
+ bool consume(StringRef tok);
+ void expect(StringRef expect);
+ bool consumeLabel(StringRef tok);
+ std::string getCurrentLocation();
+
+ std::vector<MemoryBufferRef> mbs;
+ std::vector<StringRef> tokens;
+ bool inExpr = false;
+ size_t pos = 0;
+
+ size_t lastLineNumber = 0;
+ size_t lastLineNumberOffset = 0;
+
+protected:
+ MemoryBufferRef getCurrentMB();
+
+private:
+ void maybeSplitExpr();
+ StringRef getLine();
+ size_t getLineNumber();
+ size_t getColumnNumber();
+};
+
+} // namespace lld::wasm
+
+#endif
diff --git a/lld/wasm/ScriptParser.cpp b/lld/wasm/ScriptParser.cpp
new file mode 100644
index 000000000..4f246f85a
--- /dev/null
+++ b/lld/wasm/ScriptParser.cpp
@@ -0,0 +1,1056 @@
+//===- ScriptParser.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a recursive-descendent parser for linker scripts.
+// Parsed results are stored to Config and Script global objects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScriptParser.h"
+#include "OutputSections.h"
+#include "OutputSegment.h"
+#include "ScriptLexer.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/CommonLinkerContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/TimeProfiler.h"
+#include <cassert>
+#include <limits>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::support::endian;
+using namespace lld;
+using namespace lld::wasm;
+
+static StringRef unquote(StringRef s) {
+ if (s.starts_with("\""))
+ return s.substr(1, s.size() - 2);
+ return s;
+}
+
+// Some operations only support one non absolute value. Move the
+// absolute one to the right hand side for convenience.
+static void moveAbsRight(ExprValue &a, ExprValue &b) {
+ if (a.sec == nullptr || (a.forceAbsolute && !b.isAbsolute()))
+ std::swap(a, b);
+ if (!b.isAbsolute())
+ error(a.loc + ": at least one side of the expression must be absolute");
+}
+
+static ExprValue add(ExprValue a, ExprValue b) {
+ moveAbsRight(a, b);
+ return {a.sec, a.forceAbsolute, a.getSectionOffset() + b.getValue(), a.loc};
+}
+
+static ExprValue sub(ExprValue a, ExprValue b) {
+ // The distance between two symbols in sections is absolute.
+ if (!a.isAbsolute() && !b.isAbsolute())
+ return a.getValue() - b.getValue();
+ return {a.sec, false, a.getSectionOffset() - b.getValue(), a.loc};
+}
+
+static ExprValue bitAnd(ExprValue a, ExprValue b) {
+ moveAbsRight(a, b);
+ return {a.sec, a.forceAbsolute,
+ (a.getValue() & b.getValue()) - a.getSecAddr(), a.loc};
+}
+
+static ExprValue bitOr(ExprValue a, ExprValue b) {
+ moveAbsRight(a, b);
+ return {a.sec, a.forceAbsolute,
+ (a.getValue() | b.getValue()) - a.getSecAddr(), a.loc};
+}
+
+uint64_t ExprValue::getValue() const {
+ if (sec)
+ return alignToPowerOf2(sec->address + sec->getOffset(val),
+ alignment);
+ return alignToPowerOf2(val, alignment);
+}
+
+uint64_t ExprValue::getSecAddr() const {
+ return sec ? sec->address + sec->getOffset(0) : 0;
+}
+
+uint64_t ExprValue::getSectionOffset() const {
+ // If the alignment is trivial, we don't have to compute the full
+ // value to know the offset. This allows this function to succeed in
+ // cases where the output section is not yet known.
+ if (alignment == 1 && !sec)
+ return val;
+ return getValue() - getSecAddr();
+}
+
+void ScriptParser::readLinkerScript() {
+ while (!atEOF()) {
+ StringRef tok = next();
+ if (tok == ";")
+ continue;
+
+ if (tok == "SECTIONS") {
+ readSections();
+ } else if (SymbolAssignment *cmd = readAssignment(tok)) {
+ sectionCommands.push_back(cmd);
+ } else {
+ setError("unknown directive: " + tok);
+ }
+ }
+}
+
+void ScriptParser::readSections() {
+ expect("{");
+ SmallVector<SectionCommand *, 0> v;
+ while (!errorCount() && !consume("}")) {
+ StringRef tok = next();
+ if (tok == "OVERLAY") {
+ setError("OVERLAY not supported");
+ continue;
+ }
+
+ if (SectionCommand *cmd = readAssignment(tok))
+ v.push_back(cmd);
+ else
+ v.push_back(readOutputSectionDescription(tok));
+ }
+
+ // If DATA_SEGMENT_RELRO_END is absent, for sections after DATA_SEGMENT_ALIGN,
+ // the relro fields should be cleared.
+/*
+ if (!seenRelroEnd)
+ for (SectionCommand *cmd : v)
+ if (auto *osd = dyn_cast<OutputDesc>(cmd))
+ osd->osec.relro = false;
+*/
+ sectionCommands.insert(sectionCommands.end(), v.begin(), v.end());
+
+
+ if (atEOF() || !consume("INSERT")) {
+ hasSectionsCommand = true;
+ return;
+ }
+
+ setError("INSERT BEFORE/AFTER not supported");
+}
+
+static int precedence(StringRef op) {
+ return StringSwitch<int>(op)
+ .Cases("*", "/", "%", 10)
+ .Cases("+", "-", 9)
+ .Cases("<<", ">>", 8)
+ .Cases("<", "<=", ">", ">=", 7)
+ .Cases("==", "!=", 6)
+ .Case("&", 5)
+ .Case("|", 4)
+ .Case("&&", 3)
+ .Case("||", 2)
+ .Case("?", 1)
+ .Default(-1);
+}
+
+StringMatcher ScriptParser::readFilePatterns() {
+ StringMatcher Matcher;
+
+ while (!errorCount() && !consume(")"))
+ Matcher.addPattern(SingleStringMatcher(next()));
+ return Matcher;
+}
+
+SortSectionPolicy ScriptParser::peekSortKind() {
+ return StringSwitch<SortSectionPolicy>(peek())
+ .Cases("SORT", "SORT_BY_NAME", SortSectionPolicy::Name)
+ .Case("SORT_BY_ALIGNMENT", SortSectionPolicy::Alignment)
+ .Case("SORT_BY_INIT_PRIORITY", SortSectionPolicy::Priority)
+ .Case("SORT_NONE", SortSectionPolicy::None)
+ .Default(SortSectionPolicy::Default);
+}
+
+SortSectionPolicy ScriptParser::readSortKind() {
+ SortSectionPolicy ret = peekSortKind();
+ if (ret != SortSectionPolicy::Default)
+ skip();
+ return ret;
+}
+
+// Reads SECTIONS command contents in the following form:
+//
+// <contents> ::= <elem>*
+// <elem> ::= <exclude>? <glob-pattern>
+// <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
+//
+// For example,
+//
+// *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
+//
+// is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
+// The semantics of that is section .foo in any file, section .bar in
+// any file but a.o, and section .baz in any file but b.o.
+SmallVector<SectionPattern, 0> ScriptParser::readInputSectionsList() {
+ SmallVector<SectionPattern, 0> ret;
+ while (!errorCount() && peek() != ")") {
+ StringMatcher excludeFilePat;
+ if (consume("EXCLUDE_FILE")) {
+ expect("(");
+ excludeFilePat = readFilePatterns();
+ }
+
+ StringMatcher SectionMatcher;
+ // Break if the next token is ), EXCLUDE_FILE, or SORT*.
+ while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE" &&
+ peekSortKind() == SortSectionPolicy::Default)
+ SectionMatcher.addPattern(unquote(next()));
+
+ if (!SectionMatcher.empty())
+ ret.push_back({std::move(excludeFilePat), std::move(SectionMatcher)});
+ else if (excludeFilePat.empty())
+ break;
+ else
+ setError("section pattern is expected");
+ }
+ return ret;
+}
+
+// Reads contents of "SECTIONS" directive. That directive contains a
+// list of glob patterns for input sections. The grammar is as follows.
+//
+// <patterns> ::= <section-list>
+// | <sort> "(" <section-list> ")"
+// | <sort> "(" <sort> "(" <section-list> ")" ")"
+//
+// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
+// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
+//
+// <section-list> is parsed by readInputSectionsList().
+InputSectionDescription *
+ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags,
+ uint64_t withoutFlags) {
+ auto *cmd =
+ make<InputSectionDescription>(filePattern, withFlags, withoutFlags);
+ expect("(");
+
+ while (!errorCount() && !consume(")")) {
+ SortSectionPolicy outer = readSortKind();
+ SortSectionPolicy inner = SortSectionPolicy::Default;
+ SmallVector<SectionPattern, 0> v;
+ if (outer != SortSectionPolicy::Default) {
+ expect("(");
+ inner = readSortKind();
+ if (inner != SortSectionPolicy::Default) {
+ expect("(");
+ v = readInputSectionsList();
+ expect(")");
+ } else {
+ v = readInputSectionsList();
+ }
+ expect(")");
+ } else {
+ v = readInputSectionsList();
+ }
+
+ for (SectionPattern &pat : v) {
+ pat.sortInner = inner;
+ pat.sortOuter = outer;
+ }
+
+ std::move(v.begin(), v.end(), std::back_inserter(cmd->sectionPatterns));
+ }
+ return cmd;
+}
+
+InputSectionDescription *
+ScriptParser::readInputSectionDescription(StringRef tok) {
+ // Input section wildcard can be surrounded by KEEP.
+ // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
+ uint64_t withFlags = 0;
+ uint64_t withoutFlags = 0;
+ if (tok == "KEEP") {
+ expect("(");
+ if (consume("INPUT_SECTION_FLAGS"))
+ setError("INPUT_SECTION_FLAGS not supported");
+ InputSectionDescription *cmd =
+ readInputSectionRules(next(), withFlags, withoutFlags);
+ expect(")");
+ keptSections.push_back(cmd);
+ return cmd;
+ }
+ if (tok == "INPUT_SECTION_FLAGS") {
+ setError("INPUT_SECTION_FLAGS not supported");
+ tok = next();
+ }
+ return readInputSectionRules(tok, withFlags, withoutFlags);
+}
+
+void ScriptParser::readSort() {
+ expect("(");
+ expect("CONSTRUCTORS");
+ expect(")");
+}
+
+Expr ScriptParser::readAssert() {
+ expect("(");
+ Expr e = readExpr();
+ expect(",");
+ StringRef msg = unquote(next());
+ expect(")");
+
+ return [=] {
+ if (!e().getValue())
+ error(msg);
+ return dot;
+ };
+}
+
+/*
+#define ECase(X) \
+ { #X, X }
+constexpr std::pair<const char *, unsigned> typeMap[] = {
+ ECase(SHT_PROGBITS), ECase(SHT_NOTE), ECase(SHT_NOBITS),
+ ECase(SHT_INIT_ARRAY), ECase(SHT_FINI_ARRAY), ECase(SHT_PREINIT_ARRAY),
+};
+#undef ECase
+*/
+// Tries to read the special directive for an output section definition which
+// can be one of following: "(NOLOAD)", "(COPY)", "(INFO)", "(OVERLAY)", and
+// "(TYPE=<value>)".
+// Tok1 and Tok2 are next 2 tokens peeked. See comment for
+// readSectionAddressType below.
+bool ScriptParser::readSectionDirective(SectionBase *osec, StringRef tok1, StringRef tok2) {
+ if (tok1 != "(")
+ return false;
+ if (tok2 != "NOLOAD" && tok2 != "COPY" && tok2 != "INFO" &&
+ tok2 != "OVERLAY" && tok2 != "TYPE")
+ return false;
+
+ expect("(");
+ setError("section directive " + tok2 + " currently not supported");
+ if (consume("TYPE"))
+ { expect("="); readExpr(); }
+ else
+ skip();
+
+ // cmd = osec->outputSection applies below
+/* if (consume("NOLOAD")) {
+ cmd->type = SHT_NOBITS;
+ cmd->typeIsSet = true;
+ } else if (consume("TYPE")) {
+ expect("=");
+ StringRef value = peek();
+ auto it = llvm::find_if(typeMap, [=](auto e) { return e.first == value; });
+ if (it != std::end(typeMap)) {
+ // The value is a recognized literal SHT_*.
+ cmd->type = it->second;
+ skip();
+ } else if (value.starts_with("SHT_")) {
+ setError("unknown section type " + value);
+ } else {
+ // Otherwise, read an expression.
+ cmd->type = readExpr()().getValue();
+ }
+ cmd->typeIsSet = true;
+ } else {
+ skip(); // This is "COPY", "INFO" or "OVERLAY".
+ cmd->nonAlloc = true;
+ }
+*/
+ expect(")");
+ return true;
+}
+
+// Reads an expression and/or the special directive for an output
+// section definition. Directive is one of following: "(NOLOAD)",
+// "(COPY)", "(INFO)" or "(OVERLAY)".
+//
+// An output section name can be followed by an address expression
+// and/or directive. This grammar is not LL(1) because "(" can be
+// interpreted as either the beginning of some expression or beginning
+// of directive.
+//
+// https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
+// https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
+void ScriptParser::readSectionAddressType(SectionBase *osec) {
+ // Temporarily set inExpr to support TYPE=<value> without spaces.
+ bool saved = std::exchange(inExpr, true);
+ bool isDirective = readSectionDirective(osec, peek(), peek2());
+ inExpr = saved;
+ if (isDirective)
+ return;
+
+ osec->address = readExpr()().getValue();
+ setError("setting address for " + osec->name + " to " + Twine(osec->address));
+ if (peek() == "(" && !readSectionDirective(osec, "(", peek2()))
+ setError("unknown section directive: " + peek2());
+}
+
+static Expr checkAlignment(Expr e, std::string &loc) {
+ return [=] {
+ uint64_t alignment = std::max((uint64_t)1, e().getValue());
+ if (!isPowerOf2_64(alignment)) {
+ error(loc + ": alignment must be power of 2");
+ return (uint64_t)1; // Return a dummy value.
+ }
+ return alignment;
+ };
+}
+
+OutputDesc *ScriptParser::readOutputSectionDescription(StringRef outSec) {
+ OutputDesc *cmd = createOutputSection(outSec, getCurrentLocation());
+ SectionBase *osec = &cmd->osec;
+ // Maybe relro. Will reset to false if DATA_SEGMENT_RELRO_END is absent.
+ //osec->relro = seenDataAlign && !seenRelroEnd;
+
+ //size_t symbolsReferenced = referencedSymbols.size();
+
+ if (peek() != ":") {
+ readSectionAddressType(osec);
+ }
+ expect(":");
+
+ std::string location = getCurrentLocation();
+ if (consume("AT"))
+ //cmd->lmaExpr = readParenExpr();
+ cmd->osec.address = readParenExpr()().getValue();
+ if (consume("ALIGN"))
+ //cmd->alignExpr = checkAlignment(readParenExpr(), location);
+ //{ uint64_t align = checkAlignment(readParenExpr(), location)();
+ // cmd->osec.address = (cmd->osec->address + (align - 1U)) & ~(align - 1U); }
+ setError("setting ALIGN on a section unsupported, align the dot instead");
+ if (consume("SUBALIGN"))
+ error("SUBALIGN unsupported");
+ //osec->subalignExpr = checkAlignment(readParenExpr(), location);
+
+ // Parse constraints.
+ if (consume("ONLY_IF_RO"))
+ setError("constraints like ONLY_IF_RO unsuported");
+ //osec->constraint = ConstraintKind::ReadOnly;
+ if (consume("ONLY_IF_RW"))
+ setError("constraints like ONLY_IF_RW unsuported");
+ //osec->constraint = ConstraintKind::ReadWrite;
+ expect("{");
+
+ while (!errorCount() && !consume("}")) {
+ StringRef tok = next();
+ if (tok == ";") {
+ // Empty commands are allowed. Do nothing here.
+ } else if (SymbolAssignment *assign = readAssignment(tok)) {
+ osec->commands.push_back(assign);
+ } else if (ByteCommand *data = readByteCommand(tok)) {
+ osec->commands.push_back(data);
+ } else if (tok == "CONSTRUCTORS") {
+ // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
+ // by name. This is for very old file formats such as ECOFF/XCOFF.
+ // For ELF, we should ignore.
+ } else if (tok == "FILL") {
+ // We handle the FILL command as an alias for =fillexp section attribute,
+ // which is different from what GNU linkers do.
+ // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
+ if (peek() != "(")
+ setError("( expected, but got " + peek());
+ setError("FILL unsupported"); //osec->filler = readFill();
+ } else if (tok == "SORT") {
+ readSort();
+ } else if (tok == "INCLUDE") {
+ setError("INCLUDE not supported");
+ } else if (tok == "(" || tok == ")") {
+ setError("expected filename pattern");
+ } else if (peek() == "(") {
+ osec->commands.push_back(readInputSectionDescription(tok));
+ } else {
+ // We have a file name and no input sections description. It is not a
+ // commonly used syntax, but still acceptable. In that case, all sections
+ // from the file will be included.
+ // FIXME: GNU ld permits INPUT_SECTION_FLAGS to be used here. We do not
+ // handle this case here as it will already have been matched by the
+ // case above.
+ auto *isd = make<InputSectionDescription>(tok);
+ isd->sectionPatterns.push_back({{}, StringMatcher("*")});
+ osec->commands.push_back(isd);
+ }
+ }
+
+ if (consume(">"))
+ setError("using > not supported");
+ //osec->memoryRegionName = std::string(next());
+
+ if (consume("AT")) {
+ setError("using AT > not supported");
+ expect(">");
+ //osec->lmaRegionName = std::string(next());
+ }
+
+ //if (osec->lmaExpr && !osec->lmaRegionName.empty())
+ // error("section can't have both LMA and a load region");
+
+ //osec->phdrs = readOutputSectionPhdrs();
+
+ if (peek() == "=" || peek().starts_with("=")) {
+ inExpr = true;
+ consume("=");
+ setError("filler unsupported");
+ //osec->filler = readFill();
+ inExpr = false;
+ }
+
+ // Consume optional comma following output section command.
+ consume(",");
+
+ //if (referencedSymbols.size() > symbolsReferenced)
+ // osec->expressionsUseSymbols = true;
+ return cmd;
+}
+
+// Reads a `=<fillexp>` expression and returns its value as a big-endian number.
+// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
+// We do not support using symbols in such expressions.
+//
+// When reading a hexstring, ld.bfd handles it as a blob of arbitrary
+// size, while ld.gold always handles it as a 32-bit big-endian number.
+// We are compatible with ld.gold because it's easier to implement.
+// Also, we require that expressions with operators must be wrapped into
+// round brackets. We did it to resolve the ambiguity when parsing scripts like:
+// SECTIONS { .foo : { ... } =120+3 /DISCARD/ : { ... } }
+std::array<uint8_t, 4> ScriptParser::readFill() {
+ uint64_t value = readPrimary()().val;
+ if (value > UINT32_MAX)
+ setError("filler expression result does not fit 32-bit: 0x" +
+ Twine::utohexstr(value));
+
+ std::array<uint8_t, 4> buf;
+ write32be(buf.data(), (uint32_t)value);
+ return buf;
+}
+
+SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
+ expect("(");
+ StringRef name = next(), eq = peek();
+ if (eq != "=") {
+ setError("= expected, but got " + next());
+ while (!atEOF() && next() != ")")
+ ;
+ return nullptr;
+ }
+ SymbolAssignment *cmd = readSymbolAssignment(name);
+ cmd->provide = provide;
+ cmd->hidden = hidden;
+ expect(")");
+ return cmd;
+}
+
+SymbolAssignment *ScriptParser::readAssignment(StringRef tok) {
+ // Assert expression returns Dot, so this is equal to ".=."
+ if (tok == "ASSERT")
+ return make<SymbolAssignment>(".", readAssert(), getCurrentLocation());
+
+ size_t oldPos = pos;
+ SymbolAssignment *cmd = nullptr;
+ const StringRef op = peek();
+ if (op.starts_with("=")) {
+ // Support = followed by an expression without whitespace.
+ SaveAndRestore saved(inExpr, true);
+ cmd = readSymbolAssignment(tok);
+ } else if ((op.size() == 2 && op[1] == '=' && strchr("*/+-&|", op[0])) ||
+ op == "<<=" || op == ">>=") {
+ cmd = readSymbolAssignment(tok);
+ } else if (tok == "PROVIDE") {
+ SaveAndRestore saved(inExpr, true);
+ cmd = readProvideHidden(true, false);
+ } else if (tok == "HIDDEN") {
+ SaveAndRestore saved(inExpr, true);
+ cmd = readProvideHidden(false, true);
+ } else if (tok == "PROVIDE_HIDDEN") {
+ SaveAndRestore saved(inExpr, true);
+ cmd = readProvideHidden(true, true);
+ }
+
+ if (cmd) {
+ cmd->commandString =
+ tok.str() + " " +
+ llvm::join(tokens.begin() + oldPos, tokens.begin() + pos, " ");
+ expect(";");
+ }
+ return cmd;
+}
+
+SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) {
+ name = unquote(name);
+ StringRef op = next();
+ assert(op == "=" || op == "*=" || op == "/=" || op == "+=" || op == "-=" ||
+ op == "&=" || op == "|=" || op == "<<=" || op == ">>=");
+ // Note: GNU ld does not support %= or ^=.
+ Expr e = readExpr();
+ if (op != "=") {
+ std::string loc = getCurrentLocation();
+ e = [=, c = op[0]]() -> ExprValue {
+ ExprValue lhs = getSymbolValue(name, loc);
+ switch (c) {
+ case '*':
+ return lhs.getValue() * e().getValue();
+ case '/':
+ if (uint64_t rv = e().getValue())
+ return lhs.getValue() / rv;
+ error(loc + ": division by zero");
+ return 0;
+ case '+':
+ return add(lhs, e());
+ case '-':
+ return sub(lhs, e());
+ case '<':
+ return lhs.getValue() << e().getValue();
+ case '>':
+ return lhs.getValue() >> e().getValue();
+ case '&':
+ return lhs.getValue() & e().getValue();
+ case '|':
+ return lhs.getValue() | e().getValue();
+ default:
+ llvm_unreachable("");
+ }
+ };
+ }
+ return make<SymbolAssignment>(name, e, getCurrentLocation());
+}
+
+// This is an operator-precedence parser to parse a linker
+// script expression.
+Expr ScriptParser::readExpr() {
+ // Our lexer is context-aware. Set the in-expression bit so that
+ // they apply different tokenization rules.
+ bool orig = inExpr;
+ inExpr = true;
+ Expr e = readExpr1(readPrimary(), 0);
+ inExpr = orig;
+ return e;
+}
+
+Expr ScriptParser::combine(StringRef op, Expr l, Expr r) {
+ if (op == "+")
+ return [=] { return add(l(), r()); };
+ if (op == "-")
+ return [=] { return sub(l(), r()); };
+ if (op == "*")
+ return [=] { return l().getValue() * r().getValue(); };
+ if (op == "/") {
+ std::string loc = getCurrentLocation();
+ return [=]() -> uint64_t {
+ if (uint64_t rv = r().getValue())
+ return l().getValue() / rv;
+ error(loc + ": division by zero");
+ return 0;
+ };
+ }
+ if (op == "%") {
+ std::string loc = getCurrentLocation();
+ return [=]() -> uint64_t {
+ if (uint64_t rv = r().getValue())
+ return l().getValue() % rv;
+ error(loc + ": modulo by zero");
+ return 0;
+ };
+ }
+ if (op == "<<")
+ return [=] { return l().getValue() << r().getValue(); };
+ if (op == ">>")
+ return [=] { return l().getValue() >> r().getValue(); };
+ if (op == "<")
+ return [=] { return l().getValue() < r().getValue(); };
+ if (op == ">")
+ return [=] { return l().getValue() > r().getValue(); };
+ if (op == ">=")
+ return [=] { return l().getValue() >= r().getValue(); };
+ if (op == "<=")
+ return [=] { return l().getValue() <= r().getValue(); };
+ if (op == "==")
+ return [=] { return l().getValue() == r().getValue(); };
+ if (op == "!=")
+ return [=] { return l().getValue() != r().getValue(); };
+ if (op == "||")
+ return [=] { return l().getValue() || r().getValue(); };
+ if (op == "&&")
+ return [=] { return l().getValue() && r().getValue(); };
+ if (op == "&")
+ return [=] { return bitAnd(l(), r()); };
+ if (op == "|")
+ return [=] { return bitOr(l(), r()); };
+ llvm_unreachable("invalid operator");
+}
+
+// This is a part of the operator-precedence parser. This function
+// assumes that the remaining token stream starts with an operator.
+Expr ScriptParser::readExpr1(Expr lhs, int minPrec) {
+ while (!atEOF() && !errorCount()) {
+ // Read an operator and an expression.
+ StringRef op1 = peek();
+ if (precedence(op1) < minPrec)
+ break;
+ if (consume("?"))
+ return readTernary(lhs);
+ skip();
+ Expr rhs = readPrimary();
+
+ // Evaluate the remaining part of the expression first if the
+ // next operator has greater precedence than the previous one.
+ // For example, if we have read "+" and "3", and if the next
+ // operator is "*", then we'll evaluate 3 * ... part first.
+ while (!atEOF()) {
+ StringRef op2 = peek();
+ if (precedence(op2) <= precedence(op1))
+ break;
+ rhs = readExpr1(rhs, precedence(op2));
+ }
+
+ lhs = combine(op1, lhs, rhs);
+ }
+ return lhs;
+}
+
+Expr ScriptParser::getPageSize() {
+ return [] { return 0xFFFF; }; // Wasm page size is 65k.
+}
+
+Expr ScriptParser::readConstant() {
+ StringRef s = readParenLiteral();
+ if (s == "COMMONPAGESIZE")
+ return getPageSize();
+ if (s == "MAXPAGESIZE")
+ return getPageSize();
+ setError("unknown constant: " + s);
+ return [] { return 0; };
+}
+
+// Parses Tok as an integer. It recognizes hexadecimal (prefixed with
+// "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
+// have "K" (Ki) or "M" (Mi) suffixes.
+static std::optional<uint64_t> parseInt(StringRef tok) {
+ // Hexadecimal
+ uint64_t val;
+ if (tok.starts_with_insensitive("0x")) {
+ if (!to_integer(tok.substr(2), val, 16))
+ return std::nullopt;
+ return val;
+ }
+ if (tok.ends_with_insensitive("H")) {
+ if (!to_integer(tok.drop_back(), val, 16))
+ return std::nullopt;
+ return val;
+ }
+
+ // Decimal
+ if (tok.ends_with_insensitive("K")) {
+ if (!to_integer(tok.drop_back(), val, 10))
+ return std::nullopt;
+ return val * 1024;
+ }
+ if (tok.ends_with_insensitive("M")) {
+ if (!to_integer(tok.drop_back(), val, 10))
+ return std::nullopt;
+ return val * 1024 * 1024;
+ }
+ if (!to_integer(tok, val, 10))
+ return std::nullopt;
+ return val;
+}
+
+ByteCommand *ScriptParser::readByteCommand(StringRef tok) {
+ int size = StringSwitch<int>(tok)
+ .Case("BYTE", 1)
+ .Case("SHORT", 2)
+ .Case("LONG", 4)
+ .Case("QUAD", 8)
+ .Default(-1);
+ if (size == -1)
+ return nullptr;
+
+ size_t oldPos = pos;
+ Expr e = readParenExpr();
+ std::string commandString =
+ tok.str() + " " +
+ llvm::join(tokens.begin() + oldPos, tokens.begin() + pos, " ");
+ return make<ByteCommand>(e, size, commandString);
+}
+
+StringRef ScriptParser::readParenLiteral() {
+ expect("(");
+ bool orig = inExpr;
+ inExpr = false;
+ StringRef tok = next();
+ inExpr = orig;
+ expect(")");
+ return tok;
+}
+
+static void checkIfExists(const SectionBase &osec, StringRef location) {
+ if (osec.location.empty())
+ error(location + ": undefined section " + osec.name);
+}
+
+static bool isValidSymbolName(StringRef s) {
+ auto valid = [](char c) {
+ return isAlnum(c) || c == '$' || c == '.' || c == '_';
+ };
+ return !s.empty() && !isDigit(s[0]) && llvm::all_of(s, valid);
+}
+
+Expr ScriptParser::readPrimary() {
+ if (peek() == "(")
+ return readParenExpr();
+
+ if (consume("~")) {
+ Expr e = readPrimary();
+ return [=] { return ~e().getValue(); };
+ }
+ if (consume("!")) {
+ Expr e = readPrimary();
+ return [=] { return !e().getValue(); };
+ }
+ if (consume("-")) {
+ Expr e = readPrimary();
+ return [=] { return -e().getValue(); };
+ }
+
+ StringRef tok = next();
+ std::string location = getCurrentLocation();
+
+ // Built-in functions are parsed here.
+ // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
+ if (tok == "ABSOLUTE") {
+ Expr inner = readParenExpr();
+ return [=] {
+ ExprValue i = inner();
+ i.forceAbsolute = true;
+ return i;
+ };
+ }
+ if (tok == "ADDR") {
+ StringRef name = readParenLiteral();
+ SectionBase *osec = &getOrCreateOutputSection(name)->osec;
+ //osec->usedInExpression = true;
+ return [=]() -> ExprValue {
+ checkIfExists(*osec, location);
+ return {osec, false, 0, location};
+ };
+ }
+ if (tok == "ALIGN") {
+ expect("(");
+ Expr e = readExpr();
+ if (consume(")")) {
+ e = checkAlignment(e, location);
+ return [=] { return alignToPowerOf2(dot, e().getValue()); };
+ }
+ expect(",");
+ Expr e2 = checkAlignment(readExpr(), location);
+ expect(")");
+ return [=] {
+ ExprValue v = e();
+ v.alignment = e2().getValue();
+ return v;
+ };
+ }
+ if (tok == "ALIGNOF") {
+ setError("ALIGNOF unsupported");
+ StringRef name = readParenLiteral();
+ SectionBase *osec = &getOrCreateOutputSection(name)->osec;
+ return [=] {
+ checkIfExists(*osec, location);
+ return 0;//osec->addralign;
+ };
+ }
+ if (tok == "ASSERT")
+ return readAssert();
+ if (tok == "CONSTANT")
+ return readConstant();
+ if (tok == "DATA_SEGMENT_ALIGN") {
+ expect("(");
+ Expr e = readExpr();
+ expect(",");
+ readExpr();
+ expect(")");
+ seenDataAlign = true;
+ return [=] {
+ uint64_t align = std::max(uint64_t(1), e().getValue());
+ return (dot + align - 1) & -align;
+ };
+ }
+ if (tok == "DATA_SEGMENT_END") {
+ expect("(");
+ expect(".");
+ expect(")");
+ return [=] { return dot; }; // = added
+ }
+ if (tok == "DATA_SEGMENT_RELRO_END") {
+ setError("unsupported DATA_SEGMENT_RELRO_END");
+
+ // GNU linkers implements more complicated logic to handle
+ // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
+ // just align to the next page boundary for simplicity.
+ expect("(");
+ readExpr();
+ expect(",");
+ readExpr();
+ expect(")");
+ seenRelroEnd = true;
+ Expr e = getPageSize();
+ return [=] { return alignToPowerOf2(dot, e().getValue()); };
+ }
+ if (tok == "DEFINED") {
+ StringRef name = unquote(readParenLiteral());
+ return [=] {
+ Symbol *b = symtab->find(name);
+ return (b && b->isDefined()) ? 1 : 0;
+ };
+ }
+ if (tok == "LENGTH") {
+ setError("LENGTH command not supported (no memory region support)");
+ return 0;
+ }
+ if (tok == "LOADADDR") {
+ setError("LOADADDR unsuppported");
+ /*
+ StringRef name = readParenLiteral();
+ OutputSection *osec = &getOrCreateOutputSection(name)->osec;
+ osec->usedInExpression = true;
+ return [=] {
+ checkIfExists(*osec, location);
+ return osec->getLMA();
+ };
+ */
+ }
+ if (tok == "LOG2CEIL") {
+ expect("(");
+ Expr a = readExpr();
+ expect(")");
+ return [=] {
+ // LOG2CEIL(0) is defined to be 0.
+ return llvm::Log2_64_Ceil(std::max(a().getValue(), UINT64_C(1)));
+ };
+ }
+ if (tok == "MAX" || tok == "MIN") {
+ expect("(");
+ Expr a = readExpr();
+ expect(",");
+ Expr b = readExpr();
+ expect(")");
+ if (tok == "MIN")
+ return [=] { return std::min(a().getValue(), b().getValue()); };
+ return [=] { return std::max(a().getValue(), b().getValue()); };
+ }
+ if (tok == "ORIGIN") {
+ setError("ORIGIN command not supported (no memory region support)");
+ return 0;
+ }
+ if (tok == "SEGMENT_START") {
+ expect("(");
+ skip();
+ expect(",");
+ Expr e = readExpr();
+ expect(")");
+ return [=] { return e(); };
+ }
+ if (tok == "SIZEOF") {
+ setError("SIZEOF unsupported");
+ //StringRef name = readParenLiteral();
+ //SectionBase *cmd = &getOrCreateOutputSection(name)->osec;
+ // Linker script does not create an output section if its content is empty.
+ // We want to allow SIZEOF(.foo) where .foo is a section which happened to
+ // be empty.
+ return [=] { return 0;/*cmd->size;*/ };
+ }
+ if (tok == "SIZEOF_HEADERS")
+ return [=] { return /*elf::getHeaderSize();*/ 0; };
+
+ // Tok is the dot.
+ if (tok == ".")
+ return [=] { return getSymbolValue(tok, location); };
+
+ // Tok is a literal number.
+ if (std::optional<uint64_t> val = parseInt(tok))
+ return [=] { return *val; };
+
+ // Tok is a symbol name.
+ if (tok.starts_with("\""))
+ tok = unquote(tok);
+ else if (!isValidSymbolName(tok))
+ setError("malformed number: " + tok);
+ //referencedSymbols.push_back(tok);
+ return [=] { return getSymbolValue(tok, location); };
+}
+
+Expr ScriptParser::readTernary(Expr cond) {
+ Expr l = readExpr();
+ expect(":");
+ Expr r = readExpr();
+ return [=] { return cond().getValue() ? l() : r(); };
+}
+
+Expr ScriptParser::readParenExpr() {
+ expect("(");
+ Expr e = readExpr();
+ expect(")");
+ return e;
+}
+
+OutputDesc *ScriptParser::createOutputSection(StringRef name,
+ StringRef location) {
+ OutputDesc *&secRef = nameToOutputSection[CachedHashStringRef(name)];
+ OutputDesc *sec;
+ if (secRef && secRef->osec.location.empty()) {
+ // There was a forward reference.
+ sec = secRef;
+ } else {
+ sec = make<OutputDesc>(name);
+ if (!secRef)
+ secRef = sec;
+ }
+ sec->osec.location = std::string(location);
+ return sec;
+}
+
+OutputDesc *ScriptParser::getOrCreateOutputSection(StringRef name) {
+ OutputDesc *&cmdRef = nameToOutputSection[CachedHashStringRef(name)];
+ if (!cmdRef)
+ cmdRef = make<OutputDesc>(name);
+ return cmdRef;
+}
+
+ExprValue ScriptParser::getSymbolValue(StringRef name, const Twine &loc) {
+ if (name == ".") {
+ //if (state)
+ // return {state->outSec, false, dot - state->outSec->addr, loc};
+ return {nullptr, false, dot, loc};
+ //error(loc + ": unable to get location counter value");
+ //return 0;
+ }
+
+ if (Symbol *sym = symtab->find(name)) {
+ if (auto *ds = dyn_cast<DefinedData>(sym)) {
+ // A bit of a hack to support aliases outside of SECTIONS.
+ // This only works if the evaluation happpens after placement into the output.
+ uint64_t offset = ds->segment && ds->segment->outputSeg ? ds->segment->outputSeg->startVA + ds->segment->outputSegmentOffset : ds->value;
+ ExprValue v{nullptr, false, offset, loc};
+ // Retain the original st_type, so that the alias will get the same
+ // behavior in relocation processing. Any operation will reset st_type to
+ // STT_NOTYPE.
+ // v.type = ds->type;
+ return v;
+ }
+ //if (isa<SharedSymbol>(sym))
+ // if (!errorOnMissingSection)
+ // return {nullptr, false, 0, loc};
+ }
+
+ error(loc + ": symbol not found: " + name);
+ return 0;
+}
diff --git a/lld/wasm/ScriptParser.h b/lld/wasm/ScriptParser.h
new file mode 100644
index 000000000..c0a845e65
--- /dev/null
+++ b/lld/wasm/ScriptParser.h
@@ -0,0 +1,341 @@
+//===- ScriptParser.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_WASM_SCRIPT_PARSER_H
+#define LLD_WASM_SCRIPT_PARSER_H
+
+#include "ScriptParser.h"
+#include "OutputSections.h"
+#include "ScriptLexer.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/CommonLinkerContext.h"
+#include "lld/Common/Strings.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
+#include <limits>
+#include <vector>
+
+namespace lld::wasm {
+
+// This enum is used to implement linker script SECTIONS command.
+// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
+enum SectionsCommandKind {
+ AssignmentKind, // . = expr or <sym> = expr
+ OutputSectionKind,
+ InputSectionKind,
+ ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
+};
+
+struct SectionCommand {
+ SectionCommand(int k) : kind(k) {}
+ int kind;
+};
+
+class SectionBase {
+public:
+/*
+ enum Kind { Regular, Synthetic, EHFrame, Merge, Output };
+
+ Kind kind() const { return (Kind)sectionKind; }
+
+ uint8_t sectionKind : 3;
+
+ // The next two bit fields are only used by InputSectionBase, but we
+ // put them here so the struct packs better.
+
+ uint8_t bss : 1;
+
+ // Set for sections that should not be folded by ICF.
+ uint8_t keepUnique : 1;
+
+ uint8_t partition = 1;
+*/
+// uint32_t type;
+ //union {
+ OutputSection *outputSection;
+ //InputChunk *inputChunk;
+ //};
+
+ StringRef name;
+
+ uint64_t address;
+ //uint32_t addralign;
+ bool live;
+
+ SmallVector<SectionCommand*, 0> commands;
+ std::string location;
+
+/*
+ // The 1-indexed partition that this section is assigned to by the garbage
+ // collector, or 0 if this section is dead. Normally there is only one
+ // partition, so this will either be 0 or 1.
+ elf::Partition &getPartition() const;
+
+ // These corresponds to the fields in Elf_Shdr.
+ uint64_t flags;
+ uint32_t addralign;
+ uint32_t entsize;
+ uint32_t link;
+ uint32_t info;
+*/
+
+ OutputSection *getOutputSection() { return outputSection; }
+ const OutputSection *getOutputSection() const {
+ return const_cast<SectionBase *>(this)->getOutputSection();
+ }
+
+ // Translate an offset in the input section to an offset in the output
+ // section.
+ uint64_t getOffset(uint64_t offset) const { return offset; }
+
+ uint64_t getVA(uint64_t offset = 0) const { return offset; };
+
+ bool isLive() const { return live; } //return partition != 0; }
+ void markLive() { live = 1; }
+ void markDead() { live = 0; }
+
+ SectionBase(OutputSection *osec) : outputSection(osec), name(osec->name) {}
+
+/*
+protected:
+ constexpr SectionBase(/*Kind sectionKind,/ StringRef name, uint64_t flags,
+ uint32_t entsize, uint32_t addralign, uint32_t type,
+ uint32_t info, uint32_t link)
+ : name (name) {}
+// : sectionKind(sectionKind), bss(false), keepUnique(false), type(type),
+// name(name), flags(flags), addralign(addralign), entsize(entsize),
+// link(link), info(info) {}
+*/
+};
+
+// This represents an r-value in the linker script.
+struct ExprValue {
+ ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
+ const Twine &loc)
+ : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
+
+ ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
+
+ bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
+ uint64_t getValue() const;
+ uint64_t getSecAddr() const;
+ uint64_t getSectionOffset() const;
+
+ // If a value is relative to a section, it has a non-null Sec.
+ SectionBase *sec;
+
+ uint64_t val;
+ uint64_t alignment = 1;
+
+ // True if this expression is enclosed in ABSOLUTE().
+ // This flag affects the return value of getValue().
+ bool forceAbsolute;
+
+ // Original source location. Used for error messages.
+ std::string loc;
+};
+
+// This represents an expression in the linker script.
+// ScriptParser::readExpr reads an expression and returns an Expr.
+// Later, we evaluate the expression by calling the function.
+using Expr = std::function<ExprValue()>;
+
+// This represents ". = <expr>" or "<symbol> = <expr>".
+struct SymbolAssignment : SectionCommand {
+ SymbolAssignment(StringRef name, Expr e, std::string loc)
+ : SectionCommand(AssignmentKind), name(name), expression(e),
+ location(loc) {}
+
+ static bool classof(const SectionCommand *c) {
+ return c->kind == AssignmentKind;
+ }
+
+ // The LHS of an expression. Name is either a symbol name or ".".
+ StringRef name;
+ DefinedData *sym = nullptr;
+
+ // The RHS of an expression.
+ Expr expression;
+
+ // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
+ bool provide = false;
+ bool hidden = false;
+
+ // Holds file name and line number for error reporting.
+ std::string location;
+
+ // A string representation of this command. We use this for -Map.
+ std::string commandString;
+
+ // Address of this assignment command.
+ uint64_t addr;
+
+ // Size of this assignment command. This is usually 0, but if
+ // you move '.' this may be greater than 0.
+ uint64_t size;
+};
+
+struct OutputDesc final : SectionCommand {
+ SectionBase osec;
+ explicit OutputDesc(StringRef name)
+ : SectionCommand(OutputSectionKind), osec(make<DataSection>(ArrayRef<OutputSegment *>())) {
+ osec.name = name;
+ }
+
+ static bool classof(const SectionCommand *c) {
+ return c->kind == OutputSectionKind;
+ }
+};
+
+// For --sort-section and linkerscript sorting rules.
+enum class SortSectionPolicy { Default, None, Alignment, Name, Priority };
+
+// This struct represents one section match pattern in SECTIONS() command.
+// It can optionally have negative match pattern for EXCLUDED_FILE command.
+// Also it may be surrounded with SORT() command, so contains sorting rules.
+class SectionPattern {
+
+ // Cache of the most recent input argument and result of excludesFile().
+ mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
+
+public:
+ SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
+ : excludedFilePat(pat1), sectionPat(pat2),
+ sortOuter(SortSectionPolicy::Default),
+ sortInner(SortSectionPolicy::Default) {}
+
+ bool excludesFile(const InputFile *file) const;
+
+ StringMatcher excludedFilePat;
+ StringMatcher sectionPat;
+ SortSectionPolicy sortOuter;
+ SortSectionPolicy sortInner;
+};
+
+class InputSectionDescription : public SectionCommand {
+ // Cache of the most recent input argument and result of matchesFile().
+ mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
+
+public:
+ InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
+ uint64_t withoutFlags = 0)
+ : SectionCommand(InputSectionKind), filePat(filePattern),
+ withFlags(withFlags), withoutFlags(withoutFlags) {}
+
+ static bool classof(const SectionCommand *c) {
+ return c->kind == InputSectionKind;
+ }
+
+ bool matchesFile(const InputFile *file) const;
+
+ SingleStringMatcher filePat;
+
+ // Input sections that matches at least one of SectionPatterns
+ // will be associated with this InputSectionDescription.
+ SmallVector<SectionPattern, 0> sectionPatterns;
+
+ // Includes InputSections and MergeInputSections. Used temporarily during
+ // assignment of input sections to output sections.
+ //SmallVector<InputSectionBase *, 0> sectionBases;
+
+ // Used after the finalizeInputSections() pass. MergeInputSections have been
+ // merged into MergeSyntheticSections.
+ SmallVector<InputSection *, 0> sections;
+
+ // Temporary record of synthetic ThunkSection instances and the pass that
+ // they were created in. This is used to insert newly created ThunkSections
+ // into Sections at the end of a createThunks() pass.
+ //SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
+
+ // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
+ uint64_t withFlags;
+ uint64_t withoutFlags;
+};
+
+// Represents BYTE(), SHORT(), LONG(), or QUAD().
+struct ByteCommand : SectionCommand {
+ ByteCommand(Expr e, unsigned size, std::string commandString)
+ : SectionCommand(ByteKind), commandString(commandString), expression(e),
+ size(size) {}
+
+ static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
+
+ // Keeps string representing the command. Used for -Map" is perhaps better.
+ std::string commandString;
+
+ Expr expression;
+
+ // This is just an offset of this assignment command in the output section.
+ unsigned offset;
+
+ // Size of this data command.
+ unsigned size;
+};
+
+class ScriptParser final : ScriptLexer {
+public:
+ ScriptParser(MemoryBufferRef mb) : ScriptLexer(mb) { }
+
+ void readLinkerScript();
+
+private:
+ void readOutput();
+ void readSections();
+
+ SymbolAssignment *readSymbolAssignment(StringRef name);
+ ByteCommand *readByteCommand(StringRef tok);
+ std::array<uint8_t, 4> readFill();
+ bool readSectionDirective(SectionBase *osec, StringRef tok1, StringRef tok2);
+ void readSectionAddressType(SectionBase *osec);
+ OutputDesc *readOutputSectionDescription(StringRef outSec);
+ InputSectionDescription *readInputSectionDescription(StringRef tok);
+ StringMatcher readFilePatterns();
+ SmallVector<SectionPattern, 0> readInputSectionsList();
+ InputSectionDescription *readInputSectionRules(StringRef filePattern,
+ uint64_t withFlags,
+ uint64_t withoutFlags);
+ SortSectionPolicy peekSortKind();
+ SortSectionPolicy readSortKind();
+ SymbolAssignment *readProvideHidden(bool provide, bool hidden);
+ SymbolAssignment *readAssignment(StringRef tok);
+ void readSort();
+ Expr readAssert();
+ Expr readConstant();
+ Expr getPageSize();
+
+ Expr combine(StringRef op, Expr l, Expr r);
+ Expr readExpr();
+ Expr readExpr1(Expr lhs, int minPrec);
+ StringRef readParenLiteral();
+ Expr readPrimary();
+ Expr readTernary(Expr cond);
+ Expr readParenExpr();
+
+ bool seenDataAlign = false;
+ bool seenRelroEnd = false;
+
+ // Moved from LinkerScript to here:
+
+ OutputDesc *createOutputSection(StringRef name, StringRef location);
+ OutputDesc *getOrCreateOutputSection(StringRef name);
+ ExprValue getSymbolValue(StringRef name, const Twine &loc);
+
+public:
+ uint64_t dot = 0;
+ //SmallVector<llvm::StringRef, 0> referencedSymbols;
+ SmallVector<SectionCommand *, 0> sectionCommands;
+ bool hasSectionsCommand = false;
+ SmallVector<InputSectionDescription *, 0> keptSections;
+ llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
+};
+
+}
+
+#endif
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index d1a06c9ac..3f718a823 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -14,6 +14,7 @@
#include "OutputSections.h"
#include "OutputSegment.h"
#include "Relocations.h"
+#include "ScriptParser.h"
#include "SymbolTable.h"
#include "SyntheticSections.h"
#include "WriterUtils.h"
@@ -92,6 +93,7 @@ private:
OutputSegment *createOutputSegment(StringRef name);
void combineOutputSegments();
void layoutMemory();
+ void runScript();
void createHeader();
void addSection(OutputSection *sec);
@@ -499,6 +501,231 @@ void Writer::layoutMemory() {
}
}
+void Writer::runScript() {
+ if (ctx.isPic || config->relocatable || config->globalBase) {
+ error("any kind of position independent/dynamic code can't be used with manual memory layout");
+ } else if (config->stackFirst) {
+ error("--stack-first can't be used with manual memory config (place it manually instead)");
+ }
+
+ llvm::SmallVector<InputChunk *, 0> inputSegments;
+ for (ObjFile *file : ctx.objectFiles) {
+ for (InputChunk *segment : file->segments) {
+ if (!segment->live)
+ continue;
+
+ inputSegments.push_back(segment);
+ }
+ }
+
+ // Place segments using linker script. Also assign symbols.
+ uint64_t memoryPtr = 0;
+ {
+ llvm::TimeTraceScope timeScope("Run linker script",
+ config->linkerScript->getBufferIdentifier());
+ ScriptParser parser{*config->linkerScript};
+ parser.readLinkerScript();
+
+ auto handleScriptSymbol = [&] (SymbolAssignment* assign, bool inSec) {
+ StringRef name = assign->name;
+ if (name != ".") {
+ if (!isValidCIdentifier(name))
+ return;
+
+ assign->addr = parser.dot;
+ ExprValue v = assign->expression();
+ uint64_t value = v.isAbsolute() ? v.getValue() : v.getSectionOffset();
+ log("SCRIPT SET " + name + " to " + Twine(value) + ", dot was " + Twine(parser.dot));
+ symtab->addOptionalDataSymbol(saver().save(name), value);
+ LLVM_DEBUG(dbgs() << "setSymbolAssignment: " << name << "\n");
+ } else { //if (assign->sym) {
+ //if (inSec) {
+ // error("Assigning to . inside section is currently not supported");
+ //}
+
+ uint64_t val = assign->expression().getValue();
+ if (val < parser.dot)
+ error(assign->location + ": unable to move location counter backward for: " + name);
+
+ log("SCRIPT DOT " + name + " from " + Twine(parser.dot) + " to " + Twine(val));
+ parser.dot = val;
+ LLVM_DEBUG(dbgs() << "dotSymbolAssignment: " << parser.dot << "\n");
+ }
+ };
+
+ auto nameComparator = [](InputChunk *a, InputChunk *b) {
+ return a->name < b->name;
+ };
+
+ // Output sections need to have unique names.
+ // Example:
+ // osec->name: .rodata
+ // segment->name: .rodata.123
+ // segment->inputSegments: vector of InputChunk:s with names:
+ // .rodata.foo
+ // .rodata.foo (yes, again)
+ // .rodata.bar
+ // .my.custom.name (i.e. does not have to start with e.g. .rodata)
+ size_t osecUid = 0;
+ for (SectionCommand *base : parser.sectionCommands) {
+ if (auto *osd = dyn_cast<OutputDesc>(base)) {
+ SectionBase *osec = &osd->osec;
+
+ for (SectionCommand *cmd : osec->commands) {
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) {
+ handleScriptSymbol(assign, true);
+ } else if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
+ // If dot is assigned or read while matching, we need to have new OutputSegments,
+ // so that the startVA can move (and the assignments will work). This means that
+ // there can be several output segments with the same name (a bit unfortunate).
+ OutputSegment *segment = make<OutputSegment>(
+ saver().save(osec->name + "." + Twine(osecUid++)));
+ segment->isBss = osec->name.starts_with(".bss");
+ if (config->sharedMemory)
+ segment->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
+
+ for (const SectionPattern &pat : isd->sectionPatterns) {
+ if (!isd->filePat.isTrivialMatchAll() || !pat.excludedFilePat.empty())
+ error("Only trivial wildcard patterns are supported for file (i.e. *), no excludes");
+
+ if (pat.sortInner != SortSectionPolicy::Default &&
+ pat.sortInner != SortSectionPolicy::None)
+ error("Only one level of sorting currently supported in linker scripts");
+
+ if (pat.sortOuter != SortSectionPolicy::Default &&
+ pat.sortOuter != SortSectionPolicy::None &&
+ pat.sortOuter != SortSectionPolicy::Name)
+ error("Only sorting on name is currently supported in linker scripts");
+
+ auto sortStart = segment->inputSegments.end();
+ for (InputChunk *chunk : inputSegments) {
+ // If an input is matched once, never match it again! (This is by spec.)
+ if (chunk->outputSeg) // Set by addInputSegment() below.
+ continue;
+
+ if (!pat.sectionPat.match(chunk->name))
+ //|| !isd->matchesFile(sec->file) || pat.excludesFile(sec->file))
+ continue;
+
+ log("MAPPING " + segment->name + " <--- " + chunk->name);
+ if (osec->name == "/DISCARD/") {
+ // The output section name `/DISCARD/' is special.
+ // Any input section assigned to it is discarded.
+ chunk->discarded = true;
+ } else {
+ segment->addInputSegment(chunk); // Sets chunk->outputSeg.
+ assert(chunk->outputSeg);
+ }
+ }
+ auto sortEnd = segment->inputSegments.end();
+
+ // Sorting happens on each pattern, for example *(.foo SORT(.bar.*) .baz)
+ if (pat.sortOuter == SortSectionPolicy::Name)
+ std::stable_sort(sortStart, sortEnd, nameComparator);
+ }
+
+ if (osec->name != "/DISCARD/" && !segment->inputSegments.empty()) {
+ // The linker script will align dot directly itself. However, we might have to
+ // increase the alignment to what came from the input files, moving the dot too.
+ segment->finalizeInputSegments(); // Bake everything, so that we know the size.
+ log("SCRIPT PLACE " + segment->name + " with size " + Twine(segment->size) +
+ " dot: script " + Twine(parser.dot) +
+ " seg " + Twine(alignTo(parser.dot, 1ULL << segment->alignment)));
+
+ parser.dot = alignTo(parser.dot, 1ULL << segment->alignment);
+ segment->startVA = parser.dot;
+ parser.dot += segment->size;
+
+ log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", segment->name,
+ segment->startVA, segment->size, segment->alignment));
+
+ segments.push_back(segment);
+ }
+ }
+ }
+ } else if (auto *assign = dyn_cast<SymbolAssignment>(base)) {
+ handleScriptSymbol(assign, false);
+ }
+ }
+
+ // Place any remaining segments that were not discarded.
+ OutputSegment *bonusdata = createOutputSegment(".data.bonus"); // Will call segments.push_back()
+ OutputSegment *bonusbss = createOutputSegment(".bss.bonus"); // Will call segments.push_back()
+ for (InputChunk *chunk : inputSegments) {
+ if (!chunk->outputSeg && !chunk->discarded) {
+ log("BONUS <--- " + chunk->name);
+ (chunk->name.starts_with(".bss") ? bonusbss : bonusdata)->addInputSegment(chunk);
+ }
+ }
+
+ bonusdata->finalizeInputSegments();
+ parser.dot = alignTo(parser.dot, 1ULL << bonusdata->alignment);
+ bonusdata->startVA = parser.dot;
+ parser.dot += bonusdata->size;
+
+ bonusbss->finalizeInputSegments();
+ parser.dot = alignTo(parser.dot, 1ULL << bonusbss->alignment);
+ bonusbss->startVA = parser.dot;
+ parser.dot += bonusbss->size;
+
+ memoryPtr = parser.dot;
+ }
+
+ // This works fine if there is only one bss segment and it comes last.
+ // But we can/will have at least two, so let's fake index.
+ size_t nonIndex = 0;
+ for (size_t i = 0; i < segments.size(); ++i)
+ if (needsPassiveInitialization(segments[i]) && !segments[i]->isBss)
+ segments[i]->index = nonIndex++;
+ else
+ segments[i]->index = static_cast<uint32_t>(-1);
+
+ // Make space for the memory initialization flag
+ if (config->sharedMemory && hasPassiveInitializedSegments()) {
+ memoryPtr = alignTo(memoryPtr, 4);
+ WasmSym::initMemoryFlag = symtab->addSyntheticDataSymbol(
+ "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN);
+ WasmSym::initMemoryFlag->markLive();
+ WasmSym::initMemoryFlag->setVA(memoryPtr);
+ log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}",
+ "__wasm_init_memory_flag", memoryPtr, 4, 4));
+ memoryPtr += 4;
+ }
+
+ memoryPtr = alignTo(memoryPtr, WasmPageSize);
+ out.memorySec->numMemoryPages = memoryPtr / WasmPageSize;
+ log("mem: total pages = " + Twine(out.memorySec->numMemoryPages));
+
+ uint64_t maxMemorySetting = 1ULL << (config->is64.value_or(false) ? 48 : 32);
+ if (config->initialMemory != 0) {
+ if (config->initialMemory != alignTo(config->initialMemory, WasmPageSize))
+ error("initial memory must be " + Twine(WasmPageSize) + "-byte aligned");
+ if (memoryPtr > config->initialMemory)
+ error("initial memory too small, " + Twine(memoryPtr) + " bytes needed");
+ if (config->initialMemory > maxMemorySetting)
+ error("initial memory too large, cannot be greater than " +
+ Twine(maxMemorySetting));
+ memoryPtr = config->initialMemory;
+ }
+
+ if (config->maxMemory != 0) {
+ if (config->maxMemory != alignTo(config->maxMemory, WasmPageSize))
+ error("maximum memory must be " + Twine(WasmPageSize) + "-byte aligned");
+ if (memoryPtr > config->maxMemory)
+ error("maximum memory too small, " + Twine(memoryPtr) + " bytes needed");
+ if (config->maxMemory > maxMemorySetting)
+ error("maximum memory too large, cannot be greater than " +
+ Twine(maxMemorySetting));
+ }
+
+ // Check max if explicitly supplied or required by shared memory
+ if (config->maxMemory != 0 || config->sharedMemory) {
+ uint64_t max = config->maxMemory ? config->maxMemory : memoryPtr;
+ out.memorySec->maxMemoryPages = max / WasmPageSize;
+ log("mem: max pages = " + Twine(out.memorySec->maxMemoryPages));
+ }
+}
+
void Writer::addSection(OutputSection *sec) {
if (!sec->isNeeded())
return;
@@ -1694,12 +1921,18 @@ void Writer::run() {
WasmSym::definedTableBase32->setVA(config->tableBase);
}
- log("-- createOutputSegments");
- createOutputSegments();
log("-- createSyntheticSections");
createSyntheticSections();
- log("-- layoutMemory");
- layoutMemory();
+
+ if (!config->linkerScript) {
+ log("-- createOutputSegments");
+ createOutputSegments();
+ log("-- layoutMemory");
+ layoutMemory();
+ } else {
+ log("-- runScript");
+ runScript();
+ }
if (!config->relocatable) {
// Create linker synthesized __start_SECNAME/__stop_SECNAME symbols
--
2.25.1