Compare commits
36 Commits
9f42d161e9
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| d25cf59380 | |||
| d52cf8aaaf | |||
| eea0e6204d | |||
| 403fd6031b | |||
| de10ce58e2 | |||
| 3d7532c906 | |||
| 5d146140b9 | |||
| 7161b6d1a2 | |||
| 7eb5601eb6 | |||
| 1557b82c1d | |||
| 3633346d53 | |||
| 08f21c06fb | |||
| 7186905ad2 | |||
| 8322ddba3b | |||
| 85a07116af | |||
| 0a282259e3 | |||
| 33ce01d56d | |||
| 403301a06e | |||
| 1b109ab5aa | |||
| d0c227faa8 | |||
| f4064aff89 | |||
| d3271963a8 | |||
| b73ac766bf | |||
| 3211a7705b | |||
| da69c60ffd | |||
| 9ac107b398 | |||
| ef6cd851f7 | |||
| 557c98917c | |||
| c32cd74628 | |||
| a8f55f6d63 | |||
| 8d907f071c | |||
| 9d4f325a2c | |||
| 0788dd30f2 | |||
| 49ae70ec2c | |||
| 1922669c53 | |||
| 434681eeb8 |
83
README.md
83
README.md
@@ -1,5 +1,84 @@
|
|||||||
# Load-time patcher
|
# Flicker
|
||||||
|
|
||||||
|
Flicker is a universal load-time binary rewriter for native AMD64 Linux applications. It maps the
|
||||||
|
target executable into memory, performs a linear scan disassembly, and applies patches using a
|
||||||
|
hierarchy of tactics, allowing for instrumentation, debugging, and hook injection.
|
||||||
|
|
||||||
|
This approach allows Flicker to maintain control over the process lifecycle, enabling it to handle
|
||||||
|
Statically linked executables, Dynamically linked executables (via interpreter loading), and System
|
||||||
|
calls (e.g., intercepting `readlink`, `clone`).
|
||||||
|
|
||||||
|
It tries to offer a middle ground that aims for native execution speeds with the flexibility of
|
||||||
|
dynamic instrumentation.
|
||||||
|
|
||||||
|
## Work In Progress
|
||||||
|
|
||||||
|
This project is currently in active development.
|
||||||
|
|
||||||
|
Already supported are Statically linked executables, basic dynamically linked executables (via
|
||||||
|
`PT_INTERP` loading), and basic syscall interception.
|
||||||
|
|
||||||
|
Full `dlopen` support, JIT handling, signal handling, and a plugin system are pending.
|
||||||
|
|
||||||
|
## Build
|
||||||
|
|
||||||
|
Flicker uses the Zig build system. Ensure you have Zig 0.15.1 installed.
|
||||||
|
|
||||||
|
To build the release binary:
|
||||||
|
```bash
|
||||||
|
zig build -Doptimize=ReleaseSafe
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the test suite (includes various static/dynamic executables):
|
||||||
|
```bash
|
||||||
|
zig build test
|
||||||
|
```
|
||||||
|
|
||||||
|
The compiled binary will be located at `zig-out/bin/flicker`.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Flicker acts as a loader wrapper. Pass the target executable and its arguments directly to Flicker.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./flicker <executable> [args...]
|
||||||
|
# Example: Running 'ls' through Flicker
|
||||||
|
./zig-out/bin/flicker ls -la
|
||||||
|
```
|
||||||
|
|
||||||
|
## How it Works
|
||||||
|
|
||||||
|
For more information see the [Project Overview](docs/project_overview.md) and the [Use
|
||||||
|
Cases](docs/use_cases.md).
|
||||||
|
|
||||||
|
### The Loader
|
||||||
|
|
||||||
|
Flicker does not use `LD_PRELOAD`. Instead, it maps the target ELF binary into memory. If the binary
|
||||||
|
is dynamically linked, Flicker parses the `PT_INTERP` header, locates the dynamic linker (mostly
|
||||||
|
`ld-linux.so`), and maps that as well. It then rewrites the Auxiliary Vector (`AT_PHDR`, `AT_ENTRY`,
|
||||||
|
`AT_BASE`) on the stack to trick the C runtime into accepting the manually loaded environment.
|
||||||
|
|
||||||
|
### Patching Engine
|
||||||
|
|
||||||
|
Before transferring control to the entry point, Flicker scans executable segments for instructions
|
||||||
|
that require instrumentation. It allocates "Trampolines" - executable memory pages located within
|
||||||
|
±2GB of the target instruction.
|
||||||
|
|
||||||
|
To overwrite an instruction with a 5-byte jump (`jmp rel32`) without corrupting adjacent code or
|
||||||
|
breaking jump targets, Flicker uses a Back-to-Front scanning approach and a constraint solver to
|
||||||
|
find valid bytes for "instruction punning."
|
||||||
|
|
||||||
|
### Syscall Interception
|
||||||
|
|
||||||
|
Flicker can replace `syscall` opcodes with jumps to a custom handler. This handler emulates the
|
||||||
|
syscall logic or modifies arguments.
|
||||||
|
|
||||||
|
Special handling detects `clone` syscalls to ensure the child thread (which wakes up with a fresh
|
||||||
|
stack) does not crash when attempting to restore the parent's register state.
|
||||||
|
|
||||||
|
Path Spoofing: Intercepts readlink on `/proc/self/exe` to return the path of the target binary
|
||||||
|
rather than the Flicker loader.
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
Apache 2.0
|
Apache License 2.0
|
||||||
|
|||||||
50
build.zig
50
build.zig
@@ -33,8 +33,10 @@ pub fn build(b: *std.Build) !void {
|
|||||||
const exe = b.addExecutable(.{
|
const exe = b.addExecutable(.{
|
||||||
.name = "flicker",
|
.name = "flicker",
|
||||||
.root_module = mod,
|
.root_module = mod,
|
||||||
|
.use_llvm = true,
|
||||||
});
|
});
|
||||||
exe.pie = true;
|
exe.pie = true;
|
||||||
|
exe.lto = if (optimize == .Debug) .none else .full;
|
||||||
b.installArtifact(exe);
|
b.installArtifact(exe);
|
||||||
|
|
||||||
const run_step = b.step("run", "Run the app");
|
const run_step = b.step("run", "Run the app");
|
||||||
@@ -45,8 +47,56 @@ pub fn build(b: *std.Build) !void {
|
|||||||
run_cmd.addArgs(args);
|
run_cmd.addArgs(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try compileTestApplications(b, target, optimize, false, false);
|
||||||
|
try compileTestApplications(b, target, optimize, false, true);
|
||||||
|
try compileTestApplications(b, target, optimize, true, true);
|
||||||
|
|
||||||
const exe_tests = b.addTest(.{ .root_module = mod });
|
const exe_tests = b.addTest(.{ .root_module = mod });
|
||||||
const run_exe_tests = b.addRunArtifact(exe_tests);
|
const run_exe_tests = b.addRunArtifact(exe_tests);
|
||||||
const test_step = b.step("test", "Run tests");
|
const test_step = b.step("test", "Run tests");
|
||||||
|
test_step.dependOn(b.getInstallStep());
|
||||||
test_step.dependOn(&run_exe_tests.step);
|
test_step.dependOn(&run_exe_tests.step);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn compileTestApplications(
|
||||||
|
b: *std.Build,
|
||||||
|
target: std.Build.ResolvedTarget,
|
||||||
|
optimize: std.builtin.OptimizeMode,
|
||||||
|
comptime link_libc: bool,
|
||||||
|
comptime pie: bool,
|
||||||
|
) !void {
|
||||||
|
// Compile test applications
|
||||||
|
const test_path = "src/test/";
|
||||||
|
const test_prefix = prefix: {
|
||||||
|
const p1 = "test_" ++ if (link_libc) "libc_" else "nolibc_";
|
||||||
|
const p2 = p1 ++ if (pie) "pie_" else "nopie_";
|
||||||
|
break :prefix p2;
|
||||||
|
};
|
||||||
|
var test_dir = try std.fs.cwd().openDir(test_path, .{ .iterate = true });
|
||||||
|
defer test_dir.close();
|
||||||
|
var iterator = test_dir.iterate();
|
||||||
|
while (try iterator.next()) |entry| {
|
||||||
|
if (entry.kind != .file) continue;
|
||||||
|
if (!std.mem.endsWith(u8, entry.name, ".zig")) continue;
|
||||||
|
|
||||||
|
const name = try std.mem.concat(b.allocator, u8, &.{
|
||||||
|
test_prefix, entry.name[0 .. entry.name.len - 4], // strip .zig suffix
|
||||||
|
});
|
||||||
|
const test_executable = b.addExecutable(.{
|
||||||
|
.name = name,
|
||||||
|
.root_module = b.createModule(.{
|
||||||
|
.root_source_file = b.path(b.pathJoin(&.{ test_path, entry.name })),
|
||||||
|
.optimize = optimize,
|
||||||
|
.target = target,
|
||||||
|
.link_libc = link_libc,
|
||||||
|
.link_libcpp = false,
|
||||||
|
.pic = pie,
|
||||||
|
}),
|
||||||
|
.linkage = if (link_libc) .dynamic else .static,
|
||||||
|
.use_llvm = true,
|
||||||
|
.use_lld = true,
|
||||||
|
});
|
||||||
|
test_executable.pie = pie;
|
||||||
|
b.installArtifact(test_executable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
52
docs/TODO.md
Normal file
52
docs/TODO.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
## General things
|
||||||
|
|
||||||
|
### Thread-locals
|
||||||
|
|
||||||
|
Right now we don't use any thread-local stuff in zig. This means that the application can freely
|
||||||
|
decide what to do with the `fs` segment. If we need some thread-locals in the future we have to
|
||||||
|
carefully think about how to do it.
|
||||||
|
|
||||||
|
If `FSGSBASE` is available we can swap out the segment real fast. If not we would need to fallback
|
||||||
|
to `arch_prctl` which is of course a lot slower. Fortunately `FSGSBASE` is available since Intel
|
||||||
|
IvyBridge(2012) and AMD Zen 2 Family 17H(2019) and Linux 5.9(2020).
|
||||||
|
|
||||||
|
## Major things
|
||||||
|
|
||||||
|
- [x] `clone`: with and without stack switching
|
||||||
|
- [x] `clone3`: with and without stack switching
|
||||||
|
- [x] `fork`: likely there is nothing to be done here but just to be sure, check again
|
||||||
|
- [x] `rt_sigreturn`: we can't use the normal `syscall` interception because we push something onto
|
||||||
|
the stack, so `ucontext` isn't on top anymore.
|
||||||
|
- [x] `/proc/self/exe`: intercept calls to `readlink`/`readlinkat` with that as argument
|
||||||
|
- [x] `auxv`: check if that is setup correctly and completely
|
||||||
|
- [x] JIT support: intercept `mmap`, `mprotect` and `mremap` that change pages to be executable
|
||||||
|
- [ ] `SIGILL` patching fallback
|
||||||
|
- [x] `vdso` handling
|
||||||
|
- [x] check why the libc tests are flaky
|
||||||
|
|
||||||
|
## Minor things
|
||||||
|
|
||||||
|
- [ ] Cleanup: When a JIT engine frees code, our trampolines are "zombies", so over time we leak
|
||||||
|
memory and also reduce the patching percentage
|
||||||
|
- [ ] Ghost page edge case: In all patch strategies, if a range spans multiple pages and we `mmap`
|
||||||
|
the first one but can't `mmap` the second one we just let the first one mapped. It would be better
|
||||||
|
to unmap them
|
||||||
|
- [ ] Right now when patching we mmap a page and may not use it, but we still leave it mapped. This
|
||||||
|
leaks memory. If we fix this correctly the Ghost page issue is also fixed
|
||||||
|
- [ ] Re-entrancy for `patchRegion`
|
||||||
|
- when a signal comes, while we are in that function, and we need to patch something due to the
|
||||||
|
signal we will deadlock
|
||||||
|
- [ ] strict disassembly mode: currently we warn on disassembly error, provide a flag to stop instead
|
||||||
|
- [ ] Separate stack for flicker
|
||||||
|
- when the application is run with a small stack (`sigaltstack`, goroutines) we might overflow
|
||||||
|
especially for the `patchRegion` call
|
||||||
|
- either one global stack for all to use(with a mutex) or a thread-local stack (though using
|
||||||
|
`fs` has other problems)
|
||||||
|
- [ ] `exec`: option to persist across `exec` calls, useful for things like `make`
|
||||||
|
- [ ] `prctl`/`arch_prctl`: check if/what we need to intercept and change
|
||||||
|
- [ ] `seccomp`: check what we need to intercept and change
|
||||||
|
- [ ] `modify_ldt`: check what we need to intercept and change
|
||||||
|
- [ ] `set_tid_address`: check what we need to intercept and change
|
||||||
|
- [ ] performance optimizations for patched code? Peephole might be possible
|
||||||
|
- [ ] maybe add a way to run something after the client is finished
|
||||||
|
- could be useful for statistics, cleanup(if necessary), or notifying of suppressed warnings
|
||||||
115
docs/project_overview.md
Normal file
115
docs/project_overview.md
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
# Project Flicker: Universal Load-Time Binary Rewriting
|
||||||
|
|
||||||
|
Flicker is a binary rewriting infrastructure designed for native amd64 Linux applications. Its
|
||||||
|
primary objective is to enable universal instrumentation-the ability to patch any instruction-with
|
||||||
|
minimal performance overhead.
|
||||||
|
|
||||||
|
Current approaches to binary rewriting force a difficult trade-off between coverage, performance,
|
||||||
|
and complexity. Flicker addresses this by operating at load-time, combining the transparency of
|
||||||
|
load-time injection with control-flow agnostic patching techniques. This architecture supports
|
||||||
|
statically linked executables, dynamically linked libraries, and Just-In-Time (JIT) compiled code
|
||||||
|
within a single unified framework.
|
||||||
|
|
||||||
|
## The Landscape of Binary Rewriting
|
||||||
|
|
||||||
|
To understand Flicker's position, it is helpful to look at the two dominant approaches: dynamic and
|
||||||
|
static rewriting.
|
||||||
|
|
||||||
|
Dynamic Binary Translation (DBT) tools, such as DynamoRIO or Pin, execute programs inside a virtual
|
||||||
|
machine-like environment. They act as interpreters that disassemble and translate code blocks on the
|
||||||
|
fly. This allows them to handle JIT code and shared libraries natively because they see the
|
||||||
|
instruction stream as it executes. However, this flexibility incurs significant overhead, often
|
||||||
|
slowing execution by 20% to 50% because the engine must constantly disassemble and translate code.
|
||||||
|
|
||||||
|
Static Binary Rewriting involves modifying the binary on disk before execution. While potentially
|
||||||
|
fast, this approach faces the theoretically undecidable problem of disassembly. Identifying all jump
|
||||||
|
targets in a stripped binary is reducible to the halting problem. If an instruction is moved to
|
||||||
|
insert a patch, existing jump targets break. Static tools often lift code to an Intermediate
|
||||||
|
Representation (IR) to manage this, but this adds complexity and brittleness.
|
||||||
|
|
||||||
|
## The Flicker Architecture: Load-Time Rewriting
|
||||||
|
|
||||||
|
Flicker pursues a third path: load-time binary rewriting. This occurs after the executable is mapped
|
||||||
|
into memory but before the entry point is executed. By implementing a custom user-space loader, the
|
||||||
|
system gains total control over the process lifecycle without incurring the runtime overhead of a
|
||||||
|
DBT engine.
|
||||||
|
|
||||||
|
The key advantage of this approach is the ability to use `mmap` to allocate trampoline pages
|
||||||
|
directly near the target code. This removes the need to hijack binary sections to embed loader and
|
||||||
|
trampoline information, which is a common limitation of static rewriting tools.
|
||||||
|
|
||||||
|
### The Patching Mechanism
|
||||||
|
|
||||||
|
To solve the static rewriting issue of shifting addresses, Flicker adopts the methodology used by
|
||||||
|
E9Patch. The core invariant is that the size of the code section never changes, and instructions are
|
||||||
|
never moved unless evicted to a trampoline. This makes the patching process control-flow agnostic;
|
||||||
|
valid jump targets remain valid because addresses do not shift.
|
||||||
|
|
||||||
|
Flicker applies patches using a hierarchy of tactics ordered by invasiveness. Ideally, if an
|
||||||
|
instruction is five bytes or larger, it is replaced with a standard 32-bit relative jump to a
|
||||||
|
trampoline. If the instruction is smaller than five bytes, the system attempts "Instruction
|
||||||
|
Punning," where it finds a jump offset that overlaps with the bytes of the following instructions to
|
||||||
|
form a valid target. If punning fails, the system tries using instruction prefixes to shift the jump
|
||||||
|
bytes (Padded Jumps).
|
||||||
|
|
||||||
|
When these non-destructive methods fail, Flicker employs eviction strategies. "Successor Eviction"
|
||||||
|
moves the following instruction to a trampoline to create space for the patch. If that is
|
||||||
|
insufficient, "Neighbor Eviction" searches for a neighboring instruction up to 128 bytes away,
|
||||||
|
evicting it to create a hole that can stage a short jump to the trampoline. As a final fallback to
|
||||||
|
guarantee 100% coverage, the system can insert an invalid instruction to trap execution, though this
|
||||||
|
comes at a performance cost.
|
||||||
|
|
||||||
|
### Universal Coverage via Induction
|
||||||
|
|
||||||
|
Flicker treats code discovery as an inductive problem, ensuring support for static executables,
|
||||||
|
dynamic libraries, and JIT code.
|
||||||
|
|
||||||
|
The base case is a statically linked executable. Flicker acts as the OS loader: it reads ELF
|
||||||
|
headers, maps segments, performs a linear scan of the executable sections, and applies patches
|
||||||
|
before jumping to the entry point. This relies on the assumption that modern compilers produce
|
||||||
|
tessellated code with no gaps.
|
||||||
|
|
||||||
|
The inductive step covers JIT code and dynamic libraries. on Linux, generating executable code
|
||||||
|
mostly follows a pattern: memory is mapped, code is written, and then `mprotect` is called to make
|
||||||
|
it executable. Flicker intercepts all `mprotect` and `mmap` calls. When a page transitions to
|
||||||
|
executable status, the system scans the buffer and applies patches before the kernel finalizes the
|
||||||
|
permissions.
|
||||||
|
|
||||||
|
This logic extends recursively to dynamic libraries. Because the dynamic loader (`ld.so`) uses
|
||||||
|
`mmap` and `mprotect` to load libraries (such as libc or libGL), intercepting the loader's system
|
||||||
|
calls allows Flicker to automatically patch every library loaded, including those loaded manually
|
||||||
|
via `dlopen`.
|
||||||
|
|
||||||
|
## System Integration and Edge Cases
|
||||||
|
|
||||||
|
Binary rewriting at this level encounters specific OS behaviors that require precise handling to
|
||||||
|
avoid crashes.
|
||||||
|
|
||||||
|
### Thread Creation and Stack Switching
|
||||||
|
|
||||||
|
The `clone` syscall, creates a thread with a fresh stack. If a patch intercepts `clone`, the
|
||||||
|
trampoline runs on the parent's stack. When `clone` returns, the child thread wakes up inside the
|
||||||
|
trampoline at the instruction following the syscall. The child then attempts to run the trampoline
|
||||||
|
epilogue to restore registers, but it does so using its new, empty stack, reading garbage data and
|
||||||
|
crashing.
|
||||||
|
|
||||||
|
To resolve this, the trampoline checks the return value. If it is the parent, execution proceeds
|
||||||
|
normally. If it is the child, the trampoline immediately jumps back to the original code, skipping
|
||||||
|
stack restoration.
|
||||||
|
|
||||||
|
### Signal Handling
|
||||||
|
|
||||||
|
When a signal handler returns, it calls `rt_sigreturn`, telling the kernel to restore the CPU state
|
||||||
|
from a `ucontext` struct saved on the stack. If a trampoline modifies the stack pointer to save
|
||||||
|
context, `rt_sigreturn` is called while the stack pointer is modified. The kernel then looks for
|
||||||
|
`ucontext` at the wrong address, corrupting the process state. Flicker handles this by detecting
|
||||||
|
`rt_sigreturn` and restoring the stack pointer to its exact pre-trampoline value before executing
|
||||||
|
the syscall.
|
||||||
|
|
||||||
|
### The vDSO and Concurrency
|
||||||
|
|
||||||
|
The virtual Dynamic Shared Object (vDSO) allows fast syscalls in user space. Flicker locates the
|
||||||
|
vDSO via the `AT_SYSINFO` auxiliary vector and patches it like any other shared library. Regarding
|
||||||
|
concurrency, a race condition exists where one thread executes JIT code while another modifies it.
|
||||||
|
Flicker mitigates this by intercepting the `mprotect` call while the page is still writable but not
|
||||||
|
yet executable, patching the code safely before the kernel atomically updates the permissions.
|
||||||
120
docs/use_cases.md
Normal file
120
docs/use_cases.md
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
# Use Cases for Flicker
|
||||||
|
|
||||||
|
Flicker's architecture, load-time binary rewriting without control-flow recovery, uniquely positions
|
||||||
|
it to handle scenarios where source code is unavailable (legacy/commercial software) and performance
|
||||||
|
is critical. Unlike Dynamic Binary Translation (DBT) tools like Valgrind or QEMU, which incur high
|
||||||
|
overhead due to JIT compilation/emulation, Flicker patches code to run natively.
|
||||||
|
|
||||||
|
Below are possible use cases categorized by domain.
|
||||||
|
|
||||||
|
## High Performance Computing (HPC) & Optimization
|
||||||
|
|
||||||
|
### Approximate Computing and Mixed-Precision Analysis
|
||||||
|
|
||||||
|
Scientific simulations often default to double precision (64-bit) for safety, even when single
|
||||||
|
(32-bit) or half (16-bit) precision would yield accurate results with significantly higher
|
||||||
|
performance. But rewriting massive legacy Fortran/C++ codebases to test precision sensitivity is
|
||||||
|
impractical.
|
||||||
|
|
||||||
|
Flicker could instrument floating-point instructions to perform "Shadow Execution," running
|
||||||
|
operations in both double and single precision to log divergence. Alternatively, it can mask lower
|
||||||
|
bits of registers to simulate low-precision hardware.
|
||||||
|
|
||||||
|
Unlike compiler-based approaches that change the whole binary, Flicker can apply these patches
|
||||||
|
selectively to specific "hot" functions at load-time, preserving accuracy in sensitive setup/solver
|
||||||
|
phases while optimizing the bulk computation.
|
||||||
|
|
||||||
|
### Profiling Memory Access Patterns (False Sharing)
|
||||||
|
|
||||||
|
In multi-threaded HPC applications, performance often degrades due to "False Sharing", where multiple
|
||||||
|
threads modify independent variables that happen to reside on the same CPU cache line, causing cache
|
||||||
|
thrashing.
|
||||||
|
|
||||||
|
Sampling profilers (like `perf`) provide statistical approximations but often miss precise
|
||||||
|
interaction timings. Source-level instrumentation disrupts compiler optimizations.
|
||||||
|
|
||||||
|
Flicker could instrument memory store instructions (`MOV` etc.) to record effective addresses. By
|
||||||
|
aggregating this data, it can generate heatmaps of cache line access density, precisely identifying
|
||||||
|
false sharing or inefficient strided access patterns in optimized binaries.
|
||||||
|
|
||||||
|
### Low-Overhead I/O Tracing
|
||||||
|
|
||||||
|
Parallel MPI jobs often inadvertently stress parallel filesystems (Lustre, GPFS) by performing
|
||||||
|
excessive small writes or metadata operations.
|
||||||
|
|
||||||
|
Tools like `strace` force a context switch for every syscall, slowing down the application so much
|
||||||
|
that the race conditions or I/O storms disappear (Heisenbugs).
|
||||||
|
|
||||||
|
By intercepting I/O syscalls (`write`, `read`, `open`, ...) inside the process memory, Flicker could
|
||||||
|
aggregate I/O statistics (e.g., "Rank 7 performed 50,000 writes of 4 bytes") with negligible
|
||||||
|
overhead, providing a lightweight alternative to `strace` for high-throughput jobs.
|
||||||
|
|
||||||
|
### MPI Communication Profiling
|
||||||
|
|
||||||
|
HPC performance is often bound by network latency between nodes. Profiling tools like Vampir are
|
||||||
|
heavy and costly. Flicker can patch shared library exports (like MPI_Send or MPI_Recv) at load-time.
|
||||||
|
This allows lightweight logging of message sizes and latencies without recompiling the application
|
||||||
|
or linking against special profiling libraries.
|
||||||
|
|
||||||
|
## Security and Hardening
|
||||||
|
|
||||||
|
### Coverage-Guided Fuzzing (Closed Source)
|
||||||
|
|
||||||
|
Fuzzing requires feedback on which code paths are executed to be effective. But for closed-source
|
||||||
|
software, researchers typically use QEMU-mode in AFL. QEMU translates instructions dynamically,
|
||||||
|
resulting in slow execution speeds (often 2-10x slower than native).
|
||||||
|
|
||||||
|
Flicker could inject coverage instrumentation (updating a shared memory bitmap on branch targets)
|
||||||
|
directly into the binary at load time. This would allow closed-source binaries to be fuzzed at
|
||||||
|
near-native speeds, significantly increasing the number of test cases run per second.
|
||||||
|
|
||||||
|
### Software Shadow Stacks
|
||||||
|
|
||||||
|
Return-Oriented Programming (ROP) attacks exploit buffer overflows to overwrite return addresses on
|
||||||
|
the stack.
|
||||||
|
|
||||||
|
Hardware enforcement (Intel CET/AMD Shadow Stack) requires modern CPUs (Intel 11th Gen+, Zen 3+) and
|
||||||
|
recent kernels (Linux 6.6+). Older systems remain vulnerable.
|
||||||
|
|
||||||
|
Flicker could instrument `CALL` and `RET` instructions to implement a Software Shadow Stack. On
|
||||||
|
`CALL`, the return address is pushed to a secure, isolated stack region. On `RET`, the address on
|
||||||
|
the stack is compared against the shadow stack. If they mismatch, the program terminates, preventing
|
||||||
|
ROP chains.
|
||||||
|
|
||||||
|
### Binary-Only Address Sanitizer (ASan)
|
||||||
|
|
||||||
|
Memory safety errors (buffer overflows, use-after-free) in C/C++ are often found with ASan or
|
||||||
|
Valgrind. ASan requires recompilation. Valgrind works on binaries but slows execution by 20x-50x,
|
||||||
|
making it unusable for large datasets.
|
||||||
|
|
||||||
|
Flicker could intercept allocator calls (`malloc`/`free`) to poison "red zones" around memory and
|
||||||
|
instrument memory access instructions to check these zones. This provides ASan-like capabilities for
|
||||||
|
legacy binaries with significantly lower overhead than Valgrind.
|
||||||
|
|
||||||
|
## Systems and Maintenance
|
||||||
|
|
||||||
|
### Hardware Feature Emulation (Forward Compatibility)
|
||||||
|
|
||||||
|
HPC clusters are often heterogeneous, with older nodes lacking newer instruction sets (e.g.,
|
||||||
|
AVX-512, AMX). A binary compiled for a newer architecture will crash with `SIGILL` on an older node.
|
||||||
|
|
||||||
|
Flicker could detect these instructions and patch them to jump to a software emulation routine or a
|
||||||
|
scalar fallback implementation. This allows binaries optimized for the latest hardware to run
|
||||||
|
(albeit slower) on legacy nodes for testing or resource-filling purposes.
|
||||||
|
|
||||||
|
### Fault Injection
|
||||||
|
|
||||||
|
To certify software for mission-critical environments, developers must verify how it handles
|
||||||
|
hardware errors.
|
||||||
|
|
||||||
|
Flicker could instrument instructions to probabilistically flip bits in registers or memory
|
||||||
|
("Bit-flip injection"), or intercept syscalls to return error codes (e.g., returning `ENOSPC` on
|
||||||
|
`write`). It can also simulate malfunctioning or intermittent devices by corrupting buffers returned
|
||||||
|
by `read`. This allows testing error recovery paths without physical hardware damage.
|
||||||
|
|
||||||
|
### Record/Replay Engine
|
||||||
|
|
||||||
|
Debugging non-deterministic bugs (race conditions) is difficult because they are hard to reproduce.
|
||||||
|
By intercepting all sources of non-determinism (syscalls, `rdtsc`, atomic instructions, signals),
|
||||||
|
Flicker could record a trace of an execution. This trace can be replayed later to force the exact
|
||||||
|
same execution path, allowing developers to debug the error state interactively.
|
||||||
412
src/Patcher.zig
412
src/Patcher.zig
@@ -6,6 +6,7 @@ const mem = std.mem;
|
|||||||
const posix = std.posix;
|
const posix = std.posix;
|
||||||
const zydis = @import("zydis").zydis;
|
const zydis = @import("zydis").zydis;
|
||||||
const dis = @import("disassembler.zig");
|
const dis = @import("disassembler.zig");
|
||||||
|
const syscalls = @import("syscalls.zig");
|
||||||
|
|
||||||
const log = std.log.scoped(.patcher);
|
const log = std.log.scoped(.patcher);
|
||||||
const AddressAllocator = @import("AddressAllocator.zig");
|
const AddressAllocator = @import("AddressAllocator.zig");
|
||||||
@@ -17,15 +18,11 @@ const Range = @import("Range.zig");
|
|||||||
|
|
||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
|
|
||||||
const page_size = 4096;
|
const page_size = std.heap.pageSize();
|
||||||
const jump_rel32: u8 = 0xe9;
|
const jump_rel32: u8 = 0xe9;
|
||||||
const jump_rel32_size = 5;
|
const jump_rel32_size = 5;
|
||||||
const jump_rel8: u8 = 0xeb;
|
const jump_rel8: u8 = 0xeb;
|
||||||
const jump_rel8_size = 2;
|
const jump_rel8_size = 2;
|
||||||
const max_ins_bytes = 15;
|
|
||||||
// Based on the paper 'x86-64 Instruction Usage among C/C++ Applications' by 'Akshintala et al.'
|
|
||||||
// it's '4.25' bytes, so 4 is good enough. (https://oscarlab.github.io/papers/instrpop-systor19.pdf)
|
|
||||||
const avg_ins_bytes = 4;
|
|
||||||
|
|
||||||
// TODO: Find an invalid instruction to use.
|
// TODO: Find an invalid instruction to use.
|
||||||
// const invalid: u8 = 0xaa;
|
// const invalid: u8 = 0xaa;
|
||||||
@@ -33,42 +30,80 @@ const int3: u8 = 0xcc;
|
|||||||
const nop: u8 = 0x90;
|
const nop: u8 = 0x90;
|
||||||
|
|
||||||
// Prefixes for Padded Jumps (Tactic T1)
|
// Prefixes for Padded Jumps (Tactic T1)
|
||||||
const prefix_fs: u8 = 0x64;
|
const prefixes = [_]u8{
|
||||||
const prefix_gs: u8 = 0x65;
|
// prefix_fs,
|
||||||
const prefix_ss: u8 = 0x36;
|
0x64,
|
||||||
const prefixes = [_]u8{ prefix_fs, prefix_gs, prefix_ss };
|
// prefix_gs,
|
||||||
|
0x65,
|
||||||
|
// prefix_ss,
|
||||||
|
0x36,
|
||||||
|
};
|
||||||
|
|
||||||
const Patcher = @This();
|
/// As of the SysV ABI: 'The kernel destroys registers %rcx and %r11."
|
||||||
|
/// So we put the address of the function to call into %r11.
|
||||||
|
// TODO: Don't we need to save the red zone here, because we push the return address onto the stack
|
||||||
|
// with the `call r11` instruction?
|
||||||
|
var syscall_flicken_bytes = [_]u8{
|
||||||
|
0x49, 0xBB, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, // mov r11 <imm>
|
||||||
|
0x41, 0xff, 0xd3, // call r11
|
||||||
|
};
|
||||||
|
|
||||||
gpa: mem.Allocator,
|
pub var gpa: mem.Allocator = undefined;
|
||||||
flicken: std.StringArrayHashMapUnmanaged(Flicken) = .empty,
|
pub var flicken_templates: std.StringArrayHashMapUnmanaged(Flicken) = .empty;
|
||||||
address_allocator: AddressAllocator = .empty,
|
pub var address_allocator: AddressAllocator = .empty;
|
||||||
/// Tracks the base addresses of pages we have mmap'd for Flicken.
|
/// Tracks the base addresses of pages we have mmap'd for Flicken.
|
||||||
allocated_pages: std.AutoHashMapUnmanaged(u64, void) = .empty,
|
pub var allocated_pages: std.AutoHashMapUnmanaged(u64, void) = .empty;
|
||||||
|
pub var mutex: std.Thread.Mutex = .{};
|
||||||
|
|
||||||
pub fn init(gpa: mem.Allocator) !Patcher {
|
pub var target_exec_path_buf: [std.fs.max_path_bytes]u8 = @splat(0);
|
||||||
var flicken: std.StringArrayHashMapUnmanaged(Flicken) = .empty;
|
pub var target_exec_path: []const u8 = undefined;
|
||||||
try flicken.ensureTotalCapacity(gpa, 8);
|
|
||||||
flicken.putAssumeCapacity("nop", .{ .name = "nop", .bytes = &.{} });
|
|
||||||
return .{
|
|
||||||
.gpa = gpa,
|
|
||||||
.flicken = flicken,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn deinit(patcher: *Patcher) void {
|
/// Initialize the patcher.
|
||||||
_ = patcher;
|
/// NOTE: This should only be called **once**.
|
||||||
|
pub fn init() !void {
|
||||||
|
gpa = std.heap.page_allocator;
|
||||||
|
|
||||||
|
try flicken_templates.ensureTotalCapacity(
|
||||||
|
std.heap.page_allocator,
|
||||||
|
page_size / @sizeOf(Flicken),
|
||||||
|
);
|
||||||
|
flicken_templates.putAssumeCapacity("nop", .{ .name = "nop", .bytes = &.{} });
|
||||||
|
mem.writeInt(
|
||||||
|
u64,
|
||||||
|
syscall_flicken_bytes[2..][0..8],
|
||||||
|
@intFromPtr(&syscalls.syscallEntry),
|
||||||
|
.little,
|
||||||
|
);
|
||||||
|
flicken_templates.putAssumeCapacity("syscall", .{ .name = "syscall", .bytes = &syscall_flicken_bytes });
|
||||||
|
|
||||||
|
{
|
||||||
|
// Read mmap_min_addr to block the low memory range. This prevents us from allocating
|
||||||
|
// trampolines in the forbidden low address range.
|
||||||
|
var min_addr: u64 = 0x10000; // Default safe fallback (64KB)
|
||||||
|
if (std.fs.openFileAbsolute("/proc/sys/vm/mmap_min_addr", .{})) |file| {
|
||||||
|
defer file.close();
|
||||||
|
var buf: [32]u8 = undefined;
|
||||||
|
if (file.readAll(&buf)) |len| {
|
||||||
|
const trimmed = std.mem.trim(u8, buf[0..len], " \n\r\t");
|
||||||
|
if (std.fmt.parseInt(u64, trimmed, 10)) |val| {
|
||||||
|
min_addr = val;
|
||||||
|
} else |_| {}
|
||||||
|
} else |_| {}
|
||||||
|
} else |_| {}
|
||||||
|
try address_allocator.block(gpa, .{ .start = 0, .end = @intCast(min_addr) }, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Flicken name and bytes have to be valid for the lifetime it's used. If a trampoline with the
|
/// Flicken name and bytes have to be valid for the lifetime it's used. If a trampoline with the
|
||||||
/// name is already registered it gets overwritten.
|
/// name is already registered it gets overwritten.
|
||||||
/// NOTE: The name "nop" is reserved and always has the ID 0.
|
/// NOTE: The name "nop" is reserved and always has the ID 0.
|
||||||
pub fn addFlicken(patcher: *Patcher, trampoline: Flicken) !FlickenId {
|
pub fn addFlicken(trampoline: Flicken) !FlickenId {
|
||||||
assert(!mem.eql(u8, "nop", trampoline.name));
|
assert(!mem.eql(u8, "nop", trampoline.name));
|
||||||
try patcher.flicken.ensureUnusedCapacity(patcher.gpa, 1);
|
assert(!mem.eql(u8, "syscall", trampoline.name));
|
||||||
|
try flicken_templates.ensureUnusedCapacity(gpa, 1);
|
||||||
errdefer comptime unreachable;
|
errdefer comptime unreachable;
|
||||||
|
|
||||||
const gop = patcher.flicken.getOrPutAssumeCapacity(trampoline.name);
|
const gop = flicken_templates.getOrPutAssumeCapacity(trampoline.name);
|
||||||
if (gop.found_existing) {
|
if (gop.found_existing) {
|
||||||
log.warn("addTrampoline: Overwriting existing trampoline: {s}", .{trampoline.name});
|
log.warn("addTrampoline: Overwriting existing trampoline: {s}", .{trampoline.name});
|
||||||
}
|
}
|
||||||
@@ -93,6 +128,8 @@ pub const FlickenId = enum(u64) {
|
|||||||
/// It also needs special handling when constructing the patches, because it's different for
|
/// It also needs special handling when constructing the patches, because it's different for
|
||||||
/// each instruction.
|
/// each instruction.
|
||||||
nop = 0,
|
nop = 0,
|
||||||
|
/// TODO: docs
|
||||||
|
syscall = 1,
|
||||||
_,
|
_,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -169,18 +206,32 @@ pub const Statistics = struct {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
|
/// Scans a memory region for instructions that require patching and applies the patches
|
||||||
|
/// using a hierarchy of tactics (Direct/Punning -> Successor Eviction -> Neighbor Eviction).
|
||||||
|
///
|
||||||
|
/// NOTE: This function leaves the region as R|W and the caller is responsible for changing it to
|
||||||
|
/// the desired protection
|
||||||
|
pub fn patchRegion(region: []align(page_size) u8) !void {
|
||||||
|
log.info(
|
||||||
|
"Patching region: 0x{x} - 0x{x}",
|
||||||
|
.{ @intFromPtr(region.ptr), @intFromPtr(®ion[region.len - 1]) },
|
||||||
|
);
|
||||||
|
// For now just do a coarse lock.
|
||||||
|
// TODO: should we make this more fine grained?
|
||||||
|
mutex.lock();
|
||||||
|
defer mutex.unlock();
|
||||||
|
|
||||||
{
|
{
|
||||||
// Block the region, such that we don't try to allocate there anymore.
|
// Block the region, such that we don't try to allocate there anymore.
|
||||||
const start: i64 = @intCast(@intFromPtr(region.ptr));
|
const start: i64 = @intCast(@intFromPtr(region.ptr));
|
||||||
try patcher.address_allocator.block(
|
try address_allocator.block(
|
||||||
patcher.gpa,
|
gpa,
|
||||||
.{ .start = start, .end = start + @as(i64, @intCast(region.len)) },
|
.{ .start = start, .end = start + @as(i64, @intCast(region.len)) },
|
||||||
page_size,
|
page_size,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
var arena_impl = std.heap.ArenaAllocator.init(patcher.gpa);
|
var arena_impl = std.heap.ArenaAllocator.init(gpa);
|
||||||
const arena = arena_impl.allocator();
|
const arena = arena_impl.allocator();
|
||||||
defer arena_impl.deinit();
|
defer arena_impl.deinit();
|
||||||
|
|
||||||
@@ -200,11 +251,12 @@ pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
|
|||||||
const offset = instruction.address - @intFromPtr(region.ptr);
|
const offset = instruction.address - @intFromPtr(region.ptr);
|
||||||
instruction_starts.set(offset);
|
instruction_starts.set(offset);
|
||||||
|
|
||||||
const should_patch = instruction.instruction.mnemonic == zydis.ZYDIS_MNEMONIC_SYSCALL or
|
const is_syscall = instruction.instruction.mnemonic == zydis.ZYDIS_MNEMONIC_SYSCALL;
|
||||||
|
const should_patch = is_syscall or
|
||||||
instruction.instruction.attributes & zydis.ZYDIS_ATTRIB_HAS_LOCK > 0;
|
instruction.instruction.attributes & zydis.ZYDIS_ATTRIB_HAS_LOCK > 0;
|
||||||
if (should_patch) {
|
if (should_patch) {
|
||||||
const request: PatchRequest = .{
|
const request: PatchRequest = .{
|
||||||
.flicken = .nop,
|
.flicken = if (is_syscall) .syscall else .nop,
|
||||||
.offset = offset,
|
.offset = offset,
|
||||||
.size = instruction.instruction.length,
|
.size = instruction.instruction.length,
|
||||||
.bytes = region[offset..],
|
.bytes = region[offset..],
|
||||||
@@ -234,7 +286,7 @@ pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
|
|||||||
}
|
}
|
||||||
last_offset = request.offset;
|
last_offset = request.offset;
|
||||||
|
|
||||||
if (@as(u64, @intFromEnum(request.flicken)) >= patcher.flicken.count()) {
|
if (@as(u64, @intFromEnum(request.flicken)) >= flicken_templates.count()) {
|
||||||
const fmt = dis.formatBytes(request.bytes[0..request.size]);
|
const fmt = dis.formatBytes(request.bytes[0..request.size]);
|
||||||
log.err(
|
log.err(
|
||||||
"patchRegion: Usage of undefined flicken in request {f} for instruction: {s}",
|
"patchRegion: Usage of undefined flicken in request {f} for instruction: {s}",
|
||||||
@@ -248,8 +300,6 @@ pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
|
|||||||
{
|
{
|
||||||
// Apply patches.
|
// Apply patches.
|
||||||
try posix.mprotect(region, posix.PROT.READ | posix.PROT.WRITE);
|
try posix.mprotect(region, posix.PROT.READ | posix.PROT.WRITE);
|
||||||
defer posix.mprotect(region, posix.PROT.READ | posix.PROT.EXEC) catch
|
|
||||||
@panic("patchRegion: mprotect back to R|X failed. Can't continue");
|
|
||||||
|
|
||||||
var stats = Statistics.empty;
|
var stats = Statistics.empty;
|
||||||
// Used to track which bytes have been modified or used for constraints (punning),
|
// Used to track which bytes have been modified or used for constraints (punning),
|
||||||
@@ -269,7 +319,7 @@ pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (try patcher.attemptDirectOrPunning(
|
if (try attemptDirectOrPunning(
|
||||||
request,
|
request,
|
||||||
arena,
|
arena,
|
||||||
&locked_bytes,
|
&locked_bytes,
|
||||||
@@ -279,7 +329,7 @@ pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
|
|||||||
continue :requests;
|
continue :requests;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (try patcher.attemptSuccessorEviction(
|
if (try attemptSuccessorEviction(
|
||||||
request,
|
request,
|
||||||
arena,
|
arena,
|
||||||
&locked_bytes,
|
&locked_bytes,
|
||||||
@@ -289,7 +339,7 @@ pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
|
|||||||
continue :requests;
|
continue :requests;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (try patcher.attemptNeighborEviction(
|
if (try attemptNeighborEviction(
|
||||||
request,
|
request,
|
||||||
arena,
|
arena,
|
||||||
&locked_bytes,
|
&locked_bytes,
|
||||||
@@ -323,7 +373,6 @@ pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn attemptDirectOrPunning(
|
fn attemptDirectOrPunning(
|
||||||
patcher: *Patcher,
|
|
||||||
request: PatchRequest,
|
request: PatchRequest,
|
||||||
arena: mem.Allocator,
|
arena: mem.Allocator,
|
||||||
locked_bytes: *std.DynamicBitSetUnmanaged,
|
locked_bytes: *std.DynamicBitSetUnmanaged,
|
||||||
@@ -333,7 +382,7 @@ fn attemptDirectOrPunning(
|
|||||||
const flicken: Flicken = if (request.flicken == .nop)
|
const flicken: Flicken = if (request.flicken == .nop)
|
||||||
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
||||||
else
|
else
|
||||||
patcher.flicken.entries.get(@intFromEnum(request.flicken)).value;
|
flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
|
||||||
|
|
||||||
var pii = PatchInstructionIterator.init(
|
var pii = PatchInstructionIterator.init(
|
||||||
request.bytes,
|
request.bytes,
|
||||||
@@ -346,9 +395,9 @@ fn attemptDirectOrPunning(
|
|||||||
// mapped. While harmless (it becomes an unused executable page), it is technically a
|
// mapped. While harmless (it becomes an unused executable page), it is technically a
|
||||||
// memory leak. A future fix should track "current attempt" pages separately and unmap
|
// memory leak. A future fix should track "current attempt" pages separately and unmap
|
||||||
// them on failure.
|
// them on failure.
|
||||||
while (pii.next(&patcher.address_allocator)) |allocated_range| {
|
while (pii.next(.{ .count = 256 })) |allocated_range| {
|
||||||
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(allocated_range));
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(allocated_range));
|
||||||
patcher.ensureRangeWritable(
|
ensureRangeWritable(
|
||||||
allocated_range,
|
allocated_range,
|
||||||
pages_made_writable,
|
pages_made_writable,
|
||||||
) catch |err| switch (err) {
|
) catch |err| switch (err) {
|
||||||
@@ -366,7 +415,7 @@ fn attemptDirectOrPunning(
|
|||||||
else => return err,
|
else => return err,
|
||||||
};
|
};
|
||||||
|
|
||||||
try patcher.address_allocator.block(patcher.gpa, allocated_range, 0);
|
try address_allocator.block(gpa, allocated_range, 0);
|
||||||
const lock_size = jump_rel32_size + pii.num_prefixes;
|
const lock_size = jump_rel32_size + pii.num_prefixes;
|
||||||
locked_bytes.setRangeValue(
|
locked_bytes.setRangeValue(
|
||||||
.{ .start = request.offset, .end = request.offset + lock_size },
|
.{ .start = request.offset, .end = request.offset + lock_size },
|
||||||
@@ -374,7 +423,7 @@ fn attemptDirectOrPunning(
|
|||||||
);
|
);
|
||||||
|
|
||||||
if (request.size >= 5) {
|
if (request.size >= 5) {
|
||||||
assert(pii.num_prefixes == 0);
|
// assert(pii.num_prefixes == 0);
|
||||||
stats.jump += 1;
|
stats.jump += 1;
|
||||||
} else {
|
} else {
|
||||||
stats.punning[pii.num_prefixes] += 1;
|
stats.punning[pii.num_prefixes] += 1;
|
||||||
@@ -385,7 +434,6 @@ fn attemptDirectOrPunning(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn attemptSuccessorEviction(
|
fn attemptSuccessorEviction(
|
||||||
patcher: *Patcher,
|
|
||||||
request: PatchRequest,
|
request: PatchRequest,
|
||||||
arena: mem.Allocator,
|
arena: mem.Allocator,
|
||||||
locked_bytes: *std.DynamicBitSetUnmanaged,
|
locked_bytes: *std.DynamicBitSetUnmanaged,
|
||||||
@@ -421,7 +469,7 @@ fn attemptSuccessorEviction(
|
|||||||
succ_request.size,
|
succ_request.size,
|
||||||
succ_flicken.size(),
|
succ_flicken.size(),
|
||||||
);
|
);
|
||||||
while (succ_pii.next(&patcher.address_allocator)) |succ_range| {
|
while (succ_pii.next(.{ .count = 16 })) |succ_range| {
|
||||||
// Ensure bytes match original before retry.
|
// Ensure bytes match original before retry.
|
||||||
assert(mem.eql(
|
assert(mem.eql(
|
||||||
u8,
|
u8,
|
||||||
@@ -430,7 +478,7 @@ fn attemptSuccessorEviction(
|
|||||||
));
|
));
|
||||||
|
|
||||||
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(succ_range));
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(succ_range));
|
||||||
patcher.ensureRangeWritable(
|
ensureRangeWritable(
|
||||||
succ_range,
|
succ_range,
|
||||||
pages_made_writable,
|
pages_made_writable,
|
||||||
) catch |err| switch (err) {
|
) catch |err| switch (err) {
|
||||||
@@ -452,17 +500,17 @@ fn attemptSuccessorEviction(
|
|||||||
const flicken: Flicken = if (request.flicken == .nop)
|
const flicken: Flicken = if (request.flicken == .nop)
|
||||||
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
||||||
else
|
else
|
||||||
patcher.flicken.entries.get(@intFromEnum(request.flicken)).value;
|
flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
|
||||||
|
|
||||||
var orig_pii = PatchInstructionIterator.init(
|
var orig_pii = PatchInstructionIterator.init(
|
||||||
request.bytes,
|
request.bytes,
|
||||||
request.size,
|
request.size,
|
||||||
flicken.size(),
|
flicken.size(),
|
||||||
);
|
);
|
||||||
while (orig_pii.next(&patcher.address_allocator)) |orig_range| {
|
while (orig_pii.next(.{ .count = 16 })) |orig_range| {
|
||||||
if (succ_range.touches(orig_range)) continue;
|
if (succ_range.touches(orig_range)) continue;
|
||||||
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(orig_range));
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(orig_range));
|
||||||
patcher.ensureRangeWritable(
|
ensureRangeWritable(
|
||||||
orig_range,
|
orig_range,
|
||||||
pages_made_writable,
|
pages_made_writable,
|
||||||
) catch |err| switch (err) {
|
) catch |err| switch (err) {
|
||||||
@@ -480,8 +528,8 @@ fn attemptSuccessorEviction(
|
|||||||
else => return err,
|
else => return err,
|
||||||
};
|
};
|
||||||
|
|
||||||
try patcher.address_allocator.block(patcher.gpa, succ_range, 0);
|
try address_allocator.block(gpa, succ_range, 0);
|
||||||
try patcher.address_allocator.block(patcher.gpa, orig_range, 0);
|
try address_allocator.block(gpa, orig_range, 0);
|
||||||
const lock_size = request.size + jump_rel32_size + succ_pii.num_prefixes;
|
const lock_size = request.size + jump_rel32_size + succ_pii.num_prefixes;
|
||||||
locked_bytes.setRangeValue(
|
locked_bytes.setRangeValue(
|
||||||
.{ .start = request.offset, .end = request.offset + lock_size },
|
.{ .start = request.offset, .end = request.offset + lock_size },
|
||||||
@@ -501,7 +549,6 @@ fn attemptSuccessorEviction(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn attemptNeighborEviction(
|
fn attemptNeighborEviction(
|
||||||
patcher: *Patcher,
|
|
||||||
request: PatchRequest,
|
request: PatchRequest,
|
||||||
arena: mem.Allocator,
|
arena: mem.Allocator,
|
||||||
locked_bytes: *std.DynamicBitSetUnmanaged,
|
locked_bytes: *std.DynamicBitSetUnmanaged,
|
||||||
@@ -509,56 +556,48 @@ fn attemptNeighborEviction(
|
|||||||
instruction_starts: *const std.DynamicBitSetUnmanaged,
|
instruction_starts: *const std.DynamicBitSetUnmanaged,
|
||||||
stats: *Statistics,
|
stats: *Statistics,
|
||||||
) !bool {
|
) !bool {
|
||||||
// Iterate valid neighbors.
|
// Valid neighbors must be within [-128, 127] range for a short jump.
|
||||||
// Neighbors must be within [-128, 127] range for a short jump.
|
|
||||||
// Since we patch back-to-front, we only look at neighbors *after* the current instruction
|
// Since we patch back-to-front, we only look at neighbors *after* the current instruction
|
||||||
// (higher address) to avoid evicting an instruction we haven't processed/patched yet.
|
// (higher address) to avoid evicting an instruction we haven't processed/patched yet.
|
||||||
// Short jump is 2 bytes (EB xx). Target is IP + 2 + xx.
|
|
||||||
// So min offset is +2 (xx=0). Max offset is +2+127 = +129.
|
|
||||||
const start_offset = request.offset + 2;
|
const start_offset = request.offset + 2;
|
||||||
const end_offset = @min(
|
const end_offset = @min(
|
||||||
start_offset + 128, // 2 + 128
|
start_offset + 128,
|
||||||
request.bytes.len + request.offset,
|
request.bytes.len + request.offset,
|
||||||
);
|
);
|
||||||
|
|
||||||
neighbor: for (start_offset..end_offset) |neighbor_offset| {
|
neighbor: for (start_offset..end_offset) |neighbor_offset| {
|
||||||
if (!instruction_starts.isSet(neighbor_offset)) continue;
|
if (!instruction_starts.isSet(neighbor_offset)) continue;
|
||||||
|
|
||||||
// Found a candidate victim instruction.
|
|
||||||
// We must access it relative to the request bytes slice.
|
|
||||||
const victim_bytes_all = request.bytes[neighbor_offset - request.offset ..];
|
const victim_bytes_all = request.bytes[neighbor_offset - request.offset ..];
|
||||||
|
|
||||||
// Disassemble to get size.
|
|
||||||
// PERF: We could also search for the next set bit in instruction_starts
|
// PERF: We could also search for the next set bit in instruction_starts
|
||||||
const victim_instr = dis.disassembleInstruction(victim_bytes_all) orelse continue;
|
const victim_instr = dis.disassembleInstruction(victim_bytes_all) orelse continue;
|
||||||
const victim_size = victim_instr.instruction.length;
|
const victim_size = victim_instr.instruction.length;
|
||||||
const victim_bytes = victim_bytes_all[0..victim_size];
|
const victim_bytes = victim_bytes_all[0..victim_size];
|
||||||
|
|
||||||
// Check locks for victim.
|
|
||||||
for (0..victim_size) |i| {
|
for (0..victim_size) |i| {
|
||||||
if (locked_bytes.isSet(neighbor_offset + i)) {
|
if (locked_bytes.isSet(neighbor_offset + i)) {
|
||||||
continue :neighbor;
|
continue :neighbor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save original bytes to revert.
|
// Save original bytes to revert if constraints cannot be solved.
|
||||||
var victim_orig_bytes: [15]u8 = undefined;
|
var victim_orig_bytes: [15]u8 = undefined;
|
||||||
@memcpy(victim_orig_bytes[0..victim_size], victim_bytes);
|
@memcpy(victim_orig_bytes[0..victim_size], victim_bytes);
|
||||||
|
|
||||||
// OUTER LOOP: J_Patch
|
// OUTER LOOP: J_Patch
|
||||||
// Iterate possible offsets 'k' inside the victim for the patch jump.
|
// Iterate possible offsets 'k' inside the victim for the patch jump.
|
||||||
// J_Patch is 5 bytes. It can extend beyond victim.
|
var k: u8 = 1;
|
||||||
for (1..victim_size) |k| {
|
while (k < victim_size) : (k += 1) {
|
||||||
// Check if short jump from P reaches V+k
|
|
||||||
const target: i64 = @intCast(neighbor_offset + k);
|
const target: i64 = @intCast(neighbor_offset + k);
|
||||||
const source: i64 = @intCast(request.offset + 2);
|
const source: i64 = @intCast(request.offset + 2);
|
||||||
const disp = target - source;
|
const disp = target - source;
|
||||||
if (disp > 127 or disp < -128) continue; // Should be covered by loop bounds, but be safe.
|
if (disp > 127 or disp < -128) continue;
|
||||||
|
|
||||||
const patch_flicken: Flicken = if (request.flicken == .nop)
|
const patch_flicken: Flicken = if (request.flicken == .nop)
|
||||||
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
||||||
else
|
else
|
||||||
patcher.flicken.entries.get(@intFromEnum(request.flicken)).value;
|
flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
|
||||||
|
|
||||||
// Constraints for J_Patch:
|
// Constraints for J_Patch:
|
||||||
// Bytes [0 .. victim_size - k] are free (inside victim).
|
// Bytes [0 .. victim_size - k] are free (inside victim).
|
||||||
@@ -569,19 +608,18 @@ fn attemptNeighborEviction(
|
|||||||
patch_flicken.size(),
|
patch_flicken.size(),
|
||||||
);
|
);
|
||||||
|
|
||||||
while (patch_pii.next(&patcher.address_allocator)) |patch_range| {
|
while (patch_pii.next(.{ .count = 16 })) |patch_range| {
|
||||||
// J_Patch MUST NOT use prefixes, because it's punned inside J_Victim.
|
// J_Patch MUST NOT use prefixes, because it's punned inside J_Victim.
|
||||||
// Adding prefixes would shift J_Patch relative to J_Victim, making constraints harder.
|
// Adding prefixes would shift J_Patch relative to J_Victim, making constraints harder.
|
||||||
if (patch_pii.num_prefixes > 0) break;
|
if (patch_pii.num_prefixes > 0) break;
|
||||||
|
|
||||||
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(patch_range));
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(patch_range));
|
||||||
patcher.ensureRangeWritable(patch_range, pages_made_writable) catch |err| switch (err) {
|
ensureRangeWritable(patch_range, pages_made_writable) catch |err| switch (err) {
|
||||||
error.MappingAlreadyExists => continue,
|
error.MappingAlreadyExists => continue,
|
||||||
else => return err,
|
else => return err,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Tentatively write J_Patch to memory to set constraints for J_Victim.
|
// Tentatively write J_Patch to memory to set constraints for J_Victim.
|
||||||
// We must perform the write logic manually because applyPatch assumes request struct.
|
|
||||||
// We only need to write the bytes of J_Patch that land inside the victim.
|
// We only need to write the bytes of J_Patch that land inside the victim.
|
||||||
{
|
{
|
||||||
const jmp_target = patch_range.start;
|
const jmp_target = patch_range.start;
|
||||||
@@ -602,15 +640,15 @@ fn attemptNeighborEviction(
|
|||||||
|
|
||||||
var victim_pii = PatchInstructionIterator.init(
|
var victim_pii = PatchInstructionIterator.init(
|
||||||
victim_bytes_all,
|
victim_bytes_all,
|
||||||
@intCast(k),
|
k,
|
||||||
victim_flicken.size(),
|
victim_flicken.size(),
|
||||||
);
|
);
|
||||||
|
|
||||||
while (victim_pii.next(&patcher.address_allocator)) |victim_range| {
|
while (victim_pii.next(.{ .count = 16 })) |victim_range| {
|
||||||
if (patch_range.touches(victim_range)) continue;
|
if (patch_range.touches(victim_range)) continue;
|
||||||
|
|
||||||
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(victim_range));
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(victim_range));
|
||||||
patcher.ensureRangeWritable(victim_range, pages_made_writable) catch |err| switch (err) {
|
ensureRangeWritable(victim_range, pages_made_writable) catch |err| switch (err) {
|
||||||
error.MappingAlreadyExists => continue,
|
error.MappingAlreadyExists => continue,
|
||||||
else => return err,
|
else => return err,
|
||||||
};
|
};
|
||||||
@@ -620,55 +658,48 @@ fn attemptNeighborEviction(
|
|||||||
// 1. Write Patch Trampoline (J_Patch target)
|
// 1. Write Patch Trampoline (J_Patch target)
|
||||||
{
|
{
|
||||||
const trampoline: [*]u8 = @ptrFromInt(patch_range.getStart(u64));
|
const trampoline: [*]u8 = @ptrFromInt(patch_range.getStart(u64));
|
||||||
@memcpy(trampoline, patch_flicken.bytes);
|
var reloc_info: ?RelocInfo = null;
|
||||||
if (request.flicken == .nop) {
|
if (request.flicken == .nop) {
|
||||||
const instr = dis.disassembleInstruction(patch_flicken.bytes).?;
|
reloc_info = .{
|
||||||
try relocateInstruction(
|
.instr = dis.disassembleInstruction(patch_flicken.bytes).?,
|
||||||
instr,
|
.old_addr = @intFromPtr(request.bytes.ptr),
|
||||||
@intCast(patch_range.start),
|
};
|
||||||
trampoline[0..patch_flicken.bytes.len],
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
// Jmp back from Patch Trampoline to original code (after request)
|
commitTrampoline(
|
||||||
trampoline[patch_flicken.bytes.len] = jump_rel32;
|
trampoline,
|
||||||
const ret_addr: i64 = @intCast(@intFromPtr(&request.bytes[request.size]));
|
patch_flicken.bytes,
|
||||||
const from = patch_range.end;
|
reloc_info,
|
||||||
const jmp_back_disp: i32 = @intCast(ret_addr - from);
|
@intFromPtr(request.bytes.ptr) + request.size,
|
||||||
mem.writeInt(i32, trampoline[patch_flicken.bytes.len + 1 ..][0..4], jmp_back_disp, .little);
|
) catch |err| switch (err) {
|
||||||
|
error.RelocationOverflow => continue,
|
||||||
|
else => return err,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Write Victim Trampoline (J_Victim target)
|
// 2. Write Victim Trampoline (J_Victim target)
|
||||||
{
|
{
|
||||||
const trampoline: [*]u8 = @ptrFromInt(victim_range.getStart(u64));
|
const trampoline: [*]u8 = @ptrFromInt(victim_range.getStart(u64));
|
||||||
@memcpy(trampoline, victim_orig_bytes[0..victim_size]);
|
commitTrampoline(
|
||||||
// Relocate victim instruction
|
trampoline,
|
||||||
const instr = dis.disassembleInstruction(victim_orig_bytes[0..victim_size]).?;
|
victim_orig_bytes[0..victim_size],
|
||||||
try relocateInstruction(
|
.{
|
||||||
instr,
|
.instr = dis.disassembleInstruction(victim_orig_bytes[0..victim_size]).?,
|
||||||
@intCast(victim_range.start),
|
.old_addr = @intFromPtr(victim_bytes_all.ptr),
|
||||||
trampoline[0..victim_size],
|
},
|
||||||
);
|
@intFromPtr(victim_bytes_all.ptr) + victim_size,
|
||||||
// Jmp back from Victim Trampoline to original code (after victim)
|
) catch |err| switch (err) {
|
||||||
trampoline[victim_size] = jump_rel32;
|
error.RelocationOverflow => continue,
|
||||||
const ret_addr: i64 = @intCast(@intFromPtr(&victim_bytes_all[victim_size]));
|
else => return err,
|
||||||
const from = victim_range.end;
|
};
|
||||||
const jmp_back_disp: i32 = @intCast(ret_addr - from);
|
|
||||||
mem.writeInt(i32, trampoline[victim_size + 1 ..][0..4], jmp_back_disp, .little);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Write J_Victim (overwrites head of J_Patch which is fine, we just used it for constraints)
|
// 3. Write J_Victim (overwrites head of J_Patch which is fine)
|
||||||
applyPatch(
|
commitJump(
|
||||||
// Create a fake request for the victim part
|
victim_bytes_all.ptr,
|
||||||
.{
|
@intCast(victim_range.start),
|
||||||
.flicken = .nop, // Irrelevant, unused by applyPatch for jump writing
|
|
||||||
.offset = neighbor_offset,
|
|
||||||
.size = @intCast(victim_size),
|
|
||||||
.bytes = victim_bytes_all,
|
|
||||||
},
|
|
||||||
victim_flicken, // Unused by applyPatch for jump writing
|
|
||||||
victim_range,
|
|
||||||
victim_pii.num_prefixes,
|
victim_pii.num_prefixes,
|
||||||
) catch unreachable; // Should fit because we allocated it
|
k, // Total size for padding is limited to k to preserve J_Patch tail
|
||||||
|
);
|
||||||
|
|
||||||
// 4. Write J_Short at request
|
// 4. Write J_Short at request
|
||||||
request.bytes[0] = jump_rel8;
|
request.bytes[0] = jump_rel8;
|
||||||
@@ -678,8 +709,8 @@ fn attemptNeighborEviction(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 5. Locking
|
// 5. Locking
|
||||||
try patcher.address_allocator.block(patcher.gpa, patch_range, 0);
|
try address_allocator.block(gpa, patch_range, 0);
|
||||||
try patcher.address_allocator.block(patcher.gpa, victim_range, 0);
|
try address_allocator.block(gpa, victim_range, 0);
|
||||||
|
|
||||||
locked_bytes.setRangeValue(
|
locked_bytes.setRangeValue(
|
||||||
.{ .start = request.offset, .end = request.offset + request.size },
|
.{ .start = request.offset, .end = request.offset + request.size },
|
||||||
@@ -706,6 +737,10 @@ fn attemptNeighborEviction(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Applies a standard patch (T1/B1/B2) where the instruction is replaced by a jump to a trampoline.
|
||||||
|
///
|
||||||
|
/// This handles the logic of writing the trampoline content (including relocation) and
|
||||||
|
/// overwriting the original instruction with a `JMP` (plus prefixes/padding).
|
||||||
fn applyPatch(
|
fn applyPatch(
|
||||||
request: PatchRequest,
|
request: PatchRequest,
|
||||||
flicken: Flicken,
|
flicken: Flicken,
|
||||||
@@ -713,51 +748,78 @@ fn applyPatch(
|
|||||||
num_prefixes: u8,
|
num_prefixes: u8,
|
||||||
) !void {
|
) !void {
|
||||||
const flicken_addr: [*]u8 = @ptrFromInt(allocated_range.getStart(u64));
|
const flicken_addr: [*]u8 = @ptrFromInt(allocated_range.getStart(u64));
|
||||||
const flicken_slice = flicken_addr[0..flicken.size()];
|
|
||||||
|
|
||||||
const jump_to_offset: i32 = blk: {
|
// Commit Trampoline
|
||||||
const from: i64 = @intCast(@intFromPtr(&request.bytes[
|
var reloc_info: ?RelocInfo = null;
|
||||||
num_prefixes + jump_rel32_size
|
|
||||||
]));
|
|
||||||
const to = allocated_range.start;
|
|
||||||
break :blk @intCast(to - from);
|
|
||||||
};
|
|
||||||
const jump_back_offset: i32 = blk: {
|
|
||||||
const from = allocated_range.end;
|
|
||||||
const to: i64 = @intCast(@intFromPtr(&request.bytes[request.size]));
|
|
||||||
break :blk @intCast(to - from);
|
|
||||||
};
|
|
||||||
// The jumps have to be in the opposite direction.
|
|
||||||
assert(math.sign(jump_to_offset) * math.sign(jump_back_offset) < 0);
|
|
||||||
|
|
||||||
// Write to the trampoline first, because for the `nop` flicken `flicken.bytes` points to
|
|
||||||
// `request.bytes` which we overwrite in the next step.
|
|
||||||
@memcpy(flicken_addr, flicken.bytes);
|
|
||||||
if (request.flicken == .nop) {
|
if (request.flicken == .nop) {
|
||||||
const instr_bytes = request.bytes[0..request.size];
|
reloc_info = .{
|
||||||
const instr = dis.disassembleInstruction(instr_bytes).?;
|
.instr = dis.disassembleInstruction(request.bytes[0..request.size]).?,
|
||||||
|
.old_addr = @intFromPtr(request.bytes.ptr),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const ret_addr = @intFromPtr(request.bytes.ptr) + request.size;
|
||||||
|
try commitTrampoline(flicken_addr, flicken.bytes, reloc_info, ret_addr);
|
||||||
|
|
||||||
|
// Commit Jump (Patch)
|
||||||
|
commitJump(request.bytes.ptr, @intCast(allocated_range.start), num_prefixes, request.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
const RelocInfo = struct {
|
||||||
|
instr: dis.BundledInstruction,
|
||||||
|
old_addr: u64,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Helper to write code into a trampoline.
|
||||||
|
///
|
||||||
|
/// It copies the original bytes (or flicken content), relocates any RIP-relative instructions
|
||||||
|
/// to be valid at the new address, and appends a jump back to the instruction stream.
|
||||||
|
fn commitTrampoline(
|
||||||
|
trampoline_ptr: [*]u8,
|
||||||
|
content: []const u8,
|
||||||
|
reloc_info: ?RelocInfo,
|
||||||
|
return_addr: u64,
|
||||||
|
) !void {
|
||||||
|
@memcpy(trampoline_ptr[0..content.len], content);
|
||||||
|
|
||||||
|
if (reloc_info) |info| {
|
||||||
try relocateInstruction(
|
try relocateInstruction(
|
||||||
instr,
|
info.instr,
|
||||||
@intCast(allocated_range.start),
|
@intFromPtr(trampoline_ptr),
|
||||||
flicken_slice[0..request.size],
|
trampoline_ptr[0..content.len],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
flicken_slice[flicken.bytes.len] = jump_rel32;
|
|
||||||
const jump_back_location = flicken_slice[flicken.bytes.len + 1 ..][0..4];
|
|
||||||
mem.writeInt(i32, jump_back_location, jump_back_offset, .little);
|
|
||||||
|
|
||||||
@memcpy(request.bytes[0..num_prefixes], prefixes[0..num_prefixes]);
|
// Write jump back
|
||||||
request.bytes[num_prefixes] = jump_rel32;
|
trampoline_ptr[content.len] = jump_rel32;
|
||||||
mem.writeInt(
|
const jump_src = @intFromPtr(trampoline_ptr) + content.len + jump_rel32_size;
|
||||||
i32,
|
const jump_disp: i32 = @intCast(@as(i64, @intCast(return_addr)) - @as(i64, @intCast(jump_src)));
|
||||||
request.bytes[num_prefixes + 1 ..][0..4],
|
mem.writeInt(i32, trampoline_ptr[content.len + 1 ..][0..4], jump_disp, .little);
|
||||||
jump_to_offset,
|
}
|
||||||
.little,
|
|
||||||
);
|
/// Helper to overwrite an instruction with a jump to a trampoline.
|
||||||
// Pad remaining with int3.
|
///
|
||||||
|
/// It handles writing optional prefixes (padding), the `0xE9` opcode, the relative offset,
|
||||||
|
/// and fills any remaining bytes of the original instruction with `INT3` to prevent
|
||||||
|
/// execution of garbage bytes.
|
||||||
|
fn commitJump(
|
||||||
|
from_ptr: [*]u8,
|
||||||
|
to_addr: u64,
|
||||||
|
num_prefixes: u8,
|
||||||
|
total_size: usize,
|
||||||
|
) void {
|
||||||
|
const prefixes_slice = from_ptr[0..num_prefixes];
|
||||||
|
@memcpy(prefixes_slice, prefixes[0..num_prefixes]);
|
||||||
|
|
||||||
|
from_ptr[num_prefixes] = jump_rel32;
|
||||||
|
|
||||||
|
const jump_src = @intFromPtr(from_ptr) + num_prefixes + jump_rel32_size;
|
||||||
|
const jump_disp: i32 = @intCast(@as(i64, @intCast(to_addr)) - @as(i64, @intCast(jump_src)));
|
||||||
|
mem.writeInt(i32, from_ptr[num_prefixes + 1 ..][0..4], jump_disp, .little);
|
||||||
|
|
||||||
const patch_end_index = num_prefixes + jump_rel32_size;
|
const patch_end_index = num_prefixes + jump_rel32_size;
|
||||||
if (patch_end_index < request.size) {
|
if (patch_end_index < total_size) {
|
||||||
@memset(request.bytes[patch_end_index..request.size], int3);
|
@memset(from_ptr[patch_end_index..total_size], int3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -780,7 +842,6 @@ fn touchedPageCount(range: Range) u32 {
|
|||||||
|
|
||||||
/// Ensure `range` is mapped R|W. Assumes `pages_made_writable` has enough free capacity.
|
/// Ensure `range` is mapped R|W. Assumes `pages_made_writable` has enough free capacity.
|
||||||
fn ensureRangeWritable(
|
fn ensureRangeWritable(
|
||||||
patcher: *Patcher,
|
|
||||||
range: Range,
|
range: Range,
|
||||||
pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
|
pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
|
||||||
) !void {
|
) !void {
|
||||||
@@ -792,10 +853,10 @@ fn ensureRangeWritable(
|
|||||||
// If the page is already writable, skip it.
|
// If the page is already writable, skip it.
|
||||||
if (pages_made_writable.get(page_addr)) |_| continue;
|
if (pages_made_writable.get(page_addr)) |_| continue;
|
||||||
// If we mapped it already we have to do mprotect, else mmap.
|
// If we mapped it already we have to do mprotect, else mmap.
|
||||||
const gop = try patcher.allocated_pages.getOrPut(patcher.gpa, page_addr);
|
const gop = try allocated_pages.getOrPut(gpa, page_addr);
|
||||||
if (gop.found_existing) {
|
if (gop.found_existing) {
|
||||||
const ptr: [*]align(page_size) u8 = @ptrFromInt(page_addr);
|
const ptr: [*]align(page_size) u8 = @ptrFromInt(page_addr);
|
||||||
try posix.mprotect(ptr[0..page_addr], protection);
|
try posix.mprotect(ptr[0..page_size], protection);
|
||||||
} else {
|
} else {
|
||||||
const addr = posix.mmap(
|
const addr = posix.mmap(
|
||||||
@ptrFromInt(page_addr),
|
@ptrFromInt(page_addr),
|
||||||
@@ -810,8 +871,8 @@ fn ensureRangeWritable(
|
|||||||
// (executable, OS, dynamic loader,...) allocated something there.
|
// (executable, OS, dynamic loader,...) allocated something there.
|
||||||
// We block this so we don't try this page again in the future,
|
// We block this so we don't try this page again in the future,
|
||||||
// saving a bunch of syscalls.
|
// saving a bunch of syscalls.
|
||||||
try patcher.address_allocator.block(
|
try address_allocator.block(
|
||||||
patcher.gpa,
|
gpa,
|
||||||
.{ .start = @intCast(page_addr), .end = @intCast(page_addr + page_size) },
|
.{ .start = @intCast(page_addr), .end = @intCast(page_addr + page_size) },
|
||||||
page_size,
|
page_size,
|
||||||
);
|
);
|
||||||
@@ -835,6 +896,7 @@ const PatchInstructionIterator = struct {
|
|||||||
num_prefixes: u8,
|
num_prefixes: u8,
|
||||||
pli: PatchLocationIterator,
|
pli: PatchLocationIterator,
|
||||||
valid_range: Range,
|
valid_range: Range,
|
||||||
|
allocated_count: u64,
|
||||||
|
|
||||||
fn init(
|
fn init(
|
||||||
bytes: []const u8,
|
bytes: []const u8,
|
||||||
@@ -851,12 +913,26 @@ const PatchInstructionIterator = struct {
|
|||||||
.num_prefixes = 0,
|
.num_prefixes = 0,
|
||||||
.pli = pli,
|
.pli = pli,
|
||||||
.valid_range = valid_range,
|
.valid_range = valid_range,
|
||||||
|
.allocated_count = 0,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub const Strategy = union(enum) {
|
||||||
|
/// Iterates through all possible ranges.
|
||||||
|
/// Useful for finding the optimal allocation (fewest prefixes).
|
||||||
|
exhaustive: void,
|
||||||
|
/// Limits the search to `count` allocation attempts per valid constraint range found by the
|
||||||
|
/// PatchLocationIterator.
|
||||||
|
///
|
||||||
|
/// This acts as a heuristic to prevent worst-case performance (scanning every byte of a 2GB
|
||||||
|
/// gap) while still offering better density than a purely greedy approach. A count of 1 is
|
||||||
|
/// equivalent to a greedy strategy.
|
||||||
|
count: u64,
|
||||||
|
};
|
||||||
|
|
||||||
fn next(
|
fn next(
|
||||||
pii: *PatchInstructionIterator,
|
pii: *PatchInstructionIterator,
|
||||||
address_allocator: *AddressAllocator,
|
strategy: Strategy,
|
||||||
) ?Range {
|
) ?Range {
|
||||||
const State = enum {
|
const State = enum {
|
||||||
allocation,
|
allocation,
|
||||||
@@ -870,11 +946,23 @@ const PatchInstructionIterator = struct {
|
|||||||
pii.valid_range,
|
pii.valid_range,
|
||||||
)) |allocated_range| {
|
)) |allocated_range| {
|
||||||
assert(allocated_range.size() == pii.flicken_size);
|
assert(allocated_range.size() == pii.flicken_size);
|
||||||
|
pii.allocated_count += 1;
|
||||||
// Advancing the valid range, such that the next call to `findAllocation` won't
|
// Advancing the valid range, such that the next call to `findAllocation` won't
|
||||||
// find the same range again.
|
// find the same range again.
|
||||||
pii.valid_range.start = allocated_range.start + 1;
|
switch (strategy) {
|
||||||
|
.exhaustive => pii.valid_range.start = allocated_range.start + 1,
|
||||||
|
.count => |c| {
|
||||||
|
if (pii.allocated_count >= c) {
|
||||||
|
pii.valid_range.start = pii.valid_range.end;
|
||||||
|
pii.allocated_count = 0;
|
||||||
|
} else {
|
||||||
|
pii.valid_range.start = allocated_range.start + 1;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
return allocated_range;
|
return allocated_range;
|
||||||
} else {
|
} else {
|
||||||
|
pii.allocated_count = 0;
|
||||||
continue :blk .range;
|
continue :blk .range;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -6,10 +6,14 @@ const log = std.log.scoped(.disassembler);
|
|||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
|
|
||||||
pub const InstructionIterator = struct {
|
pub const InstructionIterator = struct {
|
||||||
|
/// Maximum number of warnings to print per iterator before suppressing.
|
||||||
|
pub var max_warnings: u64 = 3;
|
||||||
|
|
||||||
decoder: zydis.ZydisDecoder,
|
decoder: zydis.ZydisDecoder,
|
||||||
bytes: []const u8,
|
bytes: []const u8,
|
||||||
instruction: zydis.ZydisDecodedInstruction,
|
instruction: zydis.ZydisDecodedInstruction,
|
||||||
operands: [zydis.ZYDIS_MAX_OPERAND_COUNT]zydis.ZydisDecodedOperand,
|
operands: [zydis.ZYDIS_MAX_OPERAND_COUNT]zydis.ZydisDecodedOperand,
|
||||||
|
warnings: usize = 0,
|
||||||
|
|
||||||
pub fn init(bytes: []const u8) InstructionIterator {
|
pub fn init(bytes: []const u8) InstructionIterator {
|
||||||
var decoder: zydis.ZydisDecoder = undefined;
|
var decoder: zydis.ZydisDecoder = undefined;
|
||||||
@@ -38,27 +42,33 @@ pub const InstructionIterator = struct {
|
|||||||
var address: u64 = @intFromPtr(iterator.bytes.ptr);
|
var address: u64 = @intFromPtr(iterator.bytes.ptr);
|
||||||
|
|
||||||
while (!zydis.ZYAN_SUCCESS(status)) {
|
while (!zydis.ZYAN_SUCCESS(status)) {
|
||||||
// TODO: handle common padding bytes
|
if (status == zydis.ZYDIS_STATUS_NO_MORE_DATA) {
|
||||||
switch (status) {
|
log.debug("next: Got status: NO_MORE_DATA. Iterator completed.", .{});
|
||||||
zydis.ZYDIS_STATUS_NO_MORE_DATA => {
|
return null;
|
||||||
log.info("next: Got status: NO_MORE_DATA. Iterator completed.", .{});
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
zydis.ZYDIS_STATUS_ILLEGAL_LOCK => log.warn("next: Got status: ILLEGAL_LOCK. " ++
|
|
||||||
"Byte stepping, to find next valid instruction begin", .{}),
|
|
||||||
zydis.ZYDIS_STATUS_DECODING_ERROR => log.warn("next: Got status: DECODING_ERROR. " ++
|
|
||||||
"Byte stepping, to find next valid instruction begin", .{}),
|
|
||||||
else => log.warn("next: Got unknown status: 0x{x}. Byte stepping, to find next " ++
|
|
||||||
"valid instruction begin", .{status}),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: handle common padding bytes
|
||||||
// TODO: add a flag to instead return an error
|
// TODO: add a flag to instead return an error
|
||||||
|
iterator.warnings += 1;
|
||||||
|
if (iterator.warnings <= max_warnings) {
|
||||||
|
const err_desc = switch (status) {
|
||||||
|
zydis.ZYDIS_STATUS_ILLEGAL_LOCK => "ILLEGAL_LOCK",
|
||||||
|
zydis.ZYDIS_STATUS_DECODING_ERROR => "DECODING_ERROR",
|
||||||
|
zydis.ZYDIS_STATUS_INVALID_MAP => "INVALID_MAP",
|
||||||
|
else => "UNKNOWN",
|
||||||
|
};
|
||||||
|
log.warn(
|
||||||
|
"next: Got status: {s} (0x{x}). Byte stepping, for next instruction begin",
|
||||||
|
.{ err_desc, status },
|
||||||
|
);
|
||||||
|
if (iterator.warnings == max_warnings) {
|
||||||
|
log.warn("next: Suppressing further warnings for this disassembly.", .{});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
log.debug(
|
log.debug(
|
||||||
"next: instruction length: {}, address: 0x{x}, bytes: 0x{x}",
|
"next: skipping byte at address: 0x{x}, byte: 0x{x}",
|
||||||
.{
|
.{ address, iterator.bytes[0] },
|
||||||
iterator.instruction.length,
|
|
||||||
address,
|
|
||||||
iterator.bytes[0..iterator.instruction.length],
|
|
||||||
},
|
|
||||||
);
|
);
|
||||||
|
|
||||||
iterator.bytes = iterator.bytes[1..];
|
iterator.bytes = iterator.bytes[1..];
|
||||||
|
|||||||
242
src/main.zig
242
src/main.zig
@@ -32,8 +32,6 @@ const help =
|
|||||||
|
|
||||||
const UnfinishedReadError = error{UnfinishedRead};
|
const UnfinishedReadError = error{UnfinishedRead};
|
||||||
|
|
||||||
var patcher: Patcher = undefined;
|
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
// Parse arguments
|
// Parse arguments
|
||||||
var arg_index: u64 = 1; // Skip own name
|
var arg_index: u64 = 1; // Skip own name
|
||||||
@@ -51,14 +49,21 @@ pub fn main() !void {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize patcher
|
const file = try lookupFile(mem.sliceTo(std.os.argv[arg_index], 0));
|
||||||
patcher = try Patcher.init(std.heap.page_allocator); // TODO: allocator
|
|
||||||
// Block the first 64k to avoid mmap_min_addr (EPERM) issues on Linux.
|
{
|
||||||
// TODO: read it from `/proc/sys/vm/mmap_min_addr` instead.
|
// Initialize patcher
|
||||||
try patcher.address_allocator.block(patcher.gpa, .{ .start = 0, .end = 0x10000 }, 0);
|
try Patcher.init();
|
||||||
|
// Resolve the absolute path of the target executable. This is needed for the
|
||||||
|
// readlink("/proc/self/exe") interception. We use the file descriptor to get the
|
||||||
|
// authoritative path.
|
||||||
|
var self_buf: [128]u8 = undefined;
|
||||||
|
const fd_path = try std.fmt.bufPrint(&self_buf, "/proc/self/fd/{d}", .{file.handle});
|
||||||
|
Patcher.target_exec_path = try std.fs.readLinkAbsolute(fd_path, &Patcher.target_exec_path_buf);
|
||||||
|
log.debug("Resolved target executable path: {s}", .{Patcher.target_exec_path});
|
||||||
|
}
|
||||||
|
|
||||||
// Map file into memory
|
// Map file into memory
|
||||||
const file = try lookupFile(mem.sliceTo(std.os.argv[arg_index], 0));
|
|
||||||
var file_buffer: [128]u8 = undefined;
|
var file_buffer: [128]u8 = undefined;
|
||||||
var file_reader = file.reader(&file_buffer);
|
var file_reader = file.reader(&file_buffer);
|
||||||
log.info("--- Loading executable: {s} ---", .{std.os.argv[arg_index]});
|
log.info("--- Loading executable: {s} ---", .{std.os.argv[arg_index]});
|
||||||
@@ -66,6 +71,7 @@ pub fn main() !void {
|
|||||||
const base = try loadStaticElf(ehdr, &file_reader);
|
const base = try loadStaticElf(ehdr, &file_reader);
|
||||||
const entry = ehdr.entry + if (ehdr.type == .DYN) base else 0;
|
const entry = ehdr.entry + if (ehdr.type == .DYN) base else 0;
|
||||||
log.info("Executable loaded: base=0x{x}, entry=0x{x}", .{ base, entry });
|
log.info("Executable loaded: base=0x{x}, entry=0x{x}", .{ base, entry });
|
||||||
|
try patchLoadedElf(base);
|
||||||
|
|
||||||
// Check for dynamic linker
|
// Check for dynamic linker
|
||||||
var maybe_interp_base: ?usize = null;
|
var maybe_interp_base: ?usize = null;
|
||||||
@@ -97,13 +103,13 @@ pub fn main() !void {
|
|||||||
"Interpreter loaded: base=0x{x}, entry=0x{x}",
|
"Interpreter loaded: base=0x{x}, entry=0x{x}",
|
||||||
.{ interp_base, maybe_interp_entry.? },
|
.{ interp_base, maybe_interp_entry.? },
|
||||||
);
|
);
|
||||||
|
try patchLoadedElf(interp_base);
|
||||||
interp.close();
|
interp.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
var i: usize = 0;
|
var i: usize = 0;
|
||||||
const auxv = std.os.linux.elf_aux_maybe.?;
|
const auxv = std.os.linux.elf_aux_maybe.?;
|
||||||
while (auxv[i].a_type != elf.AT_NULL) : (i += 1) {
|
while (auxv[i].a_type != elf.AT_NULL) : (i += 1) {
|
||||||
// TODO: look at other auxv types and check if we need to change them.
|
|
||||||
auxv[i].a_un.a_val = switch (auxv[i].a_type) {
|
auxv[i].a_un.a_val = switch (auxv[i].a_type) {
|
||||||
elf.AT_PHDR => base + ehdr.phoff,
|
elf.AT_PHDR => base + ehdr.phoff,
|
||||||
elf.AT_PHENT => ehdr.phentsize,
|
elf.AT_PHENT => ehdr.phentsize,
|
||||||
@@ -111,6 +117,21 @@ pub fn main() !void {
|
|||||||
elf.AT_BASE => maybe_interp_base orelse auxv[i].a_un.a_val,
|
elf.AT_BASE => maybe_interp_base orelse auxv[i].a_un.a_val,
|
||||||
elf.AT_ENTRY => entry,
|
elf.AT_ENTRY => entry,
|
||||||
elf.AT_EXECFN => @intFromPtr(std.os.argv[arg_index]),
|
elf.AT_EXECFN => @intFromPtr(std.os.argv[arg_index]),
|
||||||
|
elf.AT_SYSINFO_EHDR => blk: {
|
||||||
|
log.info("Found vDSO at 0x{x}", .{auxv[i].a_un.a_val});
|
||||||
|
try patchLoadedElf(auxv[i].a_un.a_val);
|
||||||
|
break :blk auxv[i].a_un.a_val;
|
||||||
|
},
|
||||||
|
elf.AT_EXECFD => {
|
||||||
|
@panic("Got AT_EXECFD auxv value");
|
||||||
|
// TODO: handle AT_EXECFD, when needed
|
||||||
|
// The SysV ABI Specification says:
|
||||||
|
// > At process creation the system may pass control to an interpreter program. When
|
||||||
|
// > this happens, the system places either an entry of type AT_EXECFD or one of
|
||||||
|
// > type AT_PHDR in the auxiliary vector. The entry for type AT_EXECFD uses the
|
||||||
|
// > a_val member to contain a file descriptor open to read the application
|
||||||
|
// > program’s object file.
|
||||||
|
},
|
||||||
else => auxv[i].a_un.a_val,
|
else => auxv[i].a_un.a_val,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -205,16 +226,45 @@ fn loadStaticElf(ehdr: elf.Header, file_reader: *std.fs.File.Reader) !usize {
|
|||||||
return UnfinishedReadError.UnfinishedRead;
|
return UnfinishedReadError.UnfinishedRead;
|
||||||
|
|
||||||
const protections = elfToMmapProt(phdr.p_flags);
|
const protections = elfToMmapProt(phdr.p_flags);
|
||||||
if (protections & posix.PROT.EXEC > 0) {
|
|
||||||
log.info("Patching executable segment", .{});
|
|
||||||
try patcher.patchRegion(ptr);
|
|
||||||
}
|
|
||||||
try posix.mprotect(ptr, protections);
|
try posix.mprotect(ptr, protections);
|
||||||
}
|
}
|
||||||
log.debug("loadElf returning base: 0x{x}", .{@intFromPtr(base.ptr)});
|
log.debug("loadElf returning base: 0x{x}", .{@intFromPtr(base.ptr)});
|
||||||
return @intFromPtr(base.ptr);
|
return @intFromPtr(base.ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn patchLoadedElf(base: usize) !void {
|
||||||
|
const ehdr = @as(*const elf.Ehdr, @ptrFromInt(base));
|
||||||
|
if (!mem.eql(u8, ehdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic;
|
||||||
|
|
||||||
|
const phoff = ehdr.e_phoff;
|
||||||
|
const phnum = ehdr.e_phnum;
|
||||||
|
const phentsize = ehdr.e_phentsize;
|
||||||
|
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < phnum) : (i += 1) {
|
||||||
|
const phdr_ptr = base + phoff + (i * phentsize);
|
||||||
|
const phdr = @as(*const elf.Phdr, @ptrFromInt(phdr_ptr));
|
||||||
|
|
||||||
|
if (phdr.p_type != elf.PT_LOAD) continue;
|
||||||
|
if ((phdr.p_flags & elf.PF_X) == 0) continue;
|
||||||
|
|
||||||
|
// Determine VMA
|
||||||
|
// For ET_EXEC, p_vaddr is absolute.
|
||||||
|
// For ET_DYN, p_vaddr is offset from base.
|
||||||
|
const vaddr = if (ehdr.e_type == elf.ET.DYN) base + phdr.p_vaddr else phdr.p_vaddr;
|
||||||
|
const memsz = phdr.p_memsz;
|
||||||
|
|
||||||
|
const page_start = mem.alignBackward(usize, vaddr, page_size);
|
||||||
|
const page_end = mem.alignForward(usize, vaddr + memsz, page_size);
|
||||||
|
const size = page_end - page_start;
|
||||||
|
|
||||||
|
const region = @as([*]align(page_size) u8, @ptrFromInt(page_start))[0..size];
|
||||||
|
|
||||||
|
try Patcher.patchRegion(region);
|
||||||
|
try posix.mprotect(region, elfToMmapProt(phdr.p_flags));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Converts ELF program header protection flags to mmap protection flags.
|
/// Converts ELF program header protection flags to mmap protection flags.
|
||||||
fn elfToMmapProt(elf_prot: u64) u32 {
|
fn elfToMmapProt(elf_prot: u64) u32 {
|
||||||
var result: u32 = posix.PROT.NONE;
|
var result: u32 = posix.PROT.NONE;
|
||||||
@@ -269,3 +319,169 @@ test {
|
|||||||
_ = @import("Range.zig");
|
_ = @import("Range.zig");
|
||||||
_ = @import("PatchLocationIterator.zig");
|
_ = @import("PatchLocationIterator.zig");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: make this be passed in from the build system
|
||||||
|
const bin_path = "zig-out/bin/";
|
||||||
|
fn getTestExePath(comptime name: []const u8) []const u8 {
|
||||||
|
return bin_path ++ "test_" ++ name;
|
||||||
|
}
|
||||||
|
const flicker_path = bin_path ++ "flicker";
|
||||||
|
|
||||||
|
test "nolibc_nopie_exit" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("nolibc_nopie_exit") }, "");
|
||||||
|
}
|
||||||
|
test "nolibc_pie_exit" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("nolibc_pie_exit") }, "");
|
||||||
|
}
|
||||||
|
test "libc_pie_exit" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("libc_pie_exit") }, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_helloWorld" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("nolibc_nopie_helloWorld") }, "Hello World!\n");
|
||||||
|
}
|
||||||
|
test "nolibc_pie_helloWorld" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("nolibc_pie_helloWorld") }, "Hello World!\n");
|
||||||
|
}
|
||||||
|
test "libc_pie_helloWorld" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("libc_pie_helloWorld") }, "Hello World!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_printArgs" {
|
||||||
|
try testPrintArgs("nolibc_nopie_printArgs");
|
||||||
|
}
|
||||||
|
test "nolibc_pie_printArgs" {
|
||||||
|
try testPrintArgs("nolibc_pie_printArgs");
|
||||||
|
}
|
||||||
|
test "libc_pie_printArgs" {
|
||||||
|
try testPrintArgs("libc_pie_printArgs");
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_readlink" {
|
||||||
|
try testReadlink("nolibc_nopie_readlink");
|
||||||
|
}
|
||||||
|
test "nolibc_pie_readlink" {
|
||||||
|
try testReadlink("nolibc_pie_readlink");
|
||||||
|
}
|
||||||
|
test "libc_pie_readlink" {
|
||||||
|
try testReadlink("libc_pie_readlink");
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_clone_raw" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_nopie_clone_raw") },
|
||||||
|
"Child: Hello\nParent: Goodbye\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "nolibc_pie_clone_raw" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_pie_clone_raw") },
|
||||||
|
"Child: Hello\nParent: Goodbye\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_clone_no_new_stack" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_nopie_clone_no_new_stack") },
|
||||||
|
"Child: Hello\nParent: Goodbye\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "nolibc_pie_clone_no_new_stack" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_pie_clone_no_new_stack") },
|
||||||
|
"Child: Hello\nParent: Goodbye\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_fork" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_nopie_fork") },
|
||||||
|
"Child: I'm alive!\nParent: Child died.\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "nolibc_pie_fork" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_pie_fork") },
|
||||||
|
"Child: I'm alive!\nParent: Child died.\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "libc_pie_fork" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("libc_pie_fork") },
|
||||||
|
"Child: I'm alive!\nParent: Child died.\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_signal_handler" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_nopie_signal_handler") },
|
||||||
|
"In signal handler\nSignal handled successfully\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "nolibc_pie_signal_handler" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_pie_signal_handler") },
|
||||||
|
"In signal handler\nSignal handled successfully\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_vdso_clock" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_nopie_vdso_clock") },
|
||||||
|
"Time gotten\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "nolibc_pie_vdso_clock" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_pie_vdso_clock") },
|
||||||
|
"Time gotten\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "libc_pie_vdso_clock" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("libc_pie_vdso_clock") },
|
||||||
|
"Time gotten\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "echo" {
|
||||||
|
try testHelper(&.{ "echo", "Hello", "There" }, "Hello There\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testPrintArgs(comptime name: []const u8) !void {
|
||||||
|
const exe_path = getTestExePath(name);
|
||||||
|
const loader_argv: []const []const u8 = &.{ flicker_path, exe_path, "foo", "bar", "baz hi" };
|
||||||
|
const target_argv = loader_argv[1..];
|
||||||
|
const expected_stout = try mem.join(testing.allocator, " ", target_argv);
|
||||||
|
defer testing.allocator.free(expected_stout);
|
||||||
|
try testHelper(loader_argv, expected_stout);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testReadlink(comptime name: []const u8) !void {
|
||||||
|
const exe_path = getTestExePath(name);
|
||||||
|
const loader_argv: []const []const u8 = &.{ flicker_path, exe_path };
|
||||||
|
const cwd_path = try std.fs.cwd().realpathAlloc(testing.allocator, ".");
|
||||||
|
defer testing.allocator.free(cwd_path);
|
||||||
|
const expected_path = try std.fs.path.join(testing.allocator, &.{ cwd_path, exe_path });
|
||||||
|
defer testing.allocator.free(expected_path);
|
||||||
|
try testHelper(loader_argv, expected_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testHelper(
|
||||||
|
argv: []const []const u8,
|
||||||
|
expected_stdout: []const u8,
|
||||||
|
) !void {
|
||||||
|
const result = try std.process.Child.run(.{
|
||||||
|
.allocator = testing.allocator,
|
||||||
|
.argv = argv,
|
||||||
|
});
|
||||||
|
defer testing.allocator.free(result.stdout);
|
||||||
|
defer testing.allocator.free(result.stderr);
|
||||||
|
errdefer std.log.err("term: {}", .{result.term});
|
||||||
|
errdefer std.log.err("stdout: {s}", .{result.stdout});
|
||||||
|
errdefer std.log.err("stderr: {s}", .{result.stderr});
|
||||||
|
|
||||||
|
try testing.expectEqualStrings(expected_stdout, result.stdout);
|
||||||
|
try testing.expect(result.term == .Exited);
|
||||||
|
try testing.expectEqual(0, result.term.Exited);
|
||||||
|
}
|
||||||
|
|||||||
421
src/syscalls.zig
Normal file
421
src/syscalls.zig
Normal file
@@ -0,0 +1,421 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const linux = std.os.linux;
|
||||||
|
const posix = std.posix;
|
||||||
|
const Patcher = @import("Patcher.zig");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
|
||||||
|
const page_size = std.heap.pageSize();
|
||||||
|
|
||||||
|
const log = std.log.scoped(.syscalls);
|
||||||
|
|
||||||
|
/// Represents the stack layout pushed by `syscallEntry` before calling the handler.
|
||||||
|
pub const SavedContext = extern struct {
|
||||||
|
padding: u64, // Result of `sub $8, %rsp` for alignment
|
||||||
|
rflags: u64,
|
||||||
|
rax: u64,
|
||||||
|
rbx: u64,
|
||||||
|
rcx: u64,
|
||||||
|
rdx: u64,
|
||||||
|
rsi: u64,
|
||||||
|
rdi: u64,
|
||||||
|
rbp: u64,
|
||||||
|
r8: u64,
|
||||||
|
r9: u64,
|
||||||
|
r10: u64,
|
||||||
|
r11: u64,
|
||||||
|
r12: u64,
|
||||||
|
r13: u64,
|
||||||
|
r14: u64,
|
||||||
|
r15: u64,
|
||||||
|
/// Pushed automatically by the `call r11` instruction when entering `syscallEntry`.
|
||||||
|
/// Crucially we copy this onto the child stack (if needed) because then we can just return at
|
||||||
|
/// the end of the child handler inside `handleClone`.
|
||||||
|
return_address: u64,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// The main entry point for intercepted syscalls.
|
||||||
|
///
|
||||||
|
/// This function is called from `syscallEntry` with a pointer to the saved context.
|
||||||
|
/// It dispatches specific syscalls to handlers or executes them directly.
|
||||||
|
export fn syscall_handler(ctx: *SavedContext) callconv(.c) void {
|
||||||
|
// TODO: Handle signals (masking) to prevent re-entrancy issues if we touch global state.
|
||||||
|
|
||||||
|
const sys: linux.SYS = @enumFromInt(ctx.rax);
|
||||||
|
|
||||||
|
switch (sys) {
|
||||||
|
.readlink => {
|
||||||
|
// readlink(const char *path, char *buf, size_t bufsiz)
|
||||||
|
const path_ptr = @as([*:0]const u8, @ptrFromInt(ctx.rdi));
|
||||||
|
// TODO: handle relative paths with cwd
|
||||||
|
if (isProcSelfExe(path_ptr)) {
|
||||||
|
handleReadlink(ctx.rsi, ctx.rdx, ctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.readlinkat => {
|
||||||
|
// readlinkat(int dirfd, const char *pathname, char *buf, size_t bufsiz)
|
||||||
|
// We only intercept if pathname is absolute "/proc/self/exe".
|
||||||
|
// TODO: handle relative paths with dirfd pointing to /proc/self
|
||||||
|
// TODO: handle relative paths with dirfd == AT_FDCWD (like readlink)
|
||||||
|
// TODO: handle empty pathname
|
||||||
|
const path_ptr = @as([*:0]const u8, @ptrFromInt(ctx.rsi));
|
||||||
|
if (isProcSelfExe(path_ptr)) {
|
||||||
|
handleReadlink(ctx.rdx, ctx.r10, ctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.clone, .clone3 => {
|
||||||
|
handleClone(ctx);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
.rt_sigreturn => {
|
||||||
|
// The kernel expects the stack pointer to point to the `ucontext` structure. But in our
|
||||||
|
// case `syscallEntry` pushed the `SavedContext` onto the stack.
|
||||||
|
// So we just need to reset the stack pointer to what it was before `syscallEntry` was
|
||||||
|
// called. The `SavedContext` includes the return address pushed by the trampoline, so
|
||||||
|
// the original stack pointer is exactly at the end of `SavedContext`.
|
||||||
|
const rsp_orig = @intFromPtr(ctx) + @sizeOf(SavedContext);
|
||||||
|
|
||||||
|
asm volatile (
|
||||||
|
\\ mov %[rsp], %%rsp
|
||||||
|
\\ syscall
|
||||||
|
:
|
||||||
|
: [rsp] "r" (rsp_orig),
|
||||||
|
[number] "{rax}" (ctx.rax),
|
||||||
|
: .{ .memory = true });
|
||||||
|
unreachable;
|
||||||
|
},
|
||||||
|
.mmap => {
|
||||||
|
// mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
|
||||||
|
|
||||||
|
const prot: u32 = @intCast(ctx.rdx);
|
||||||
|
// Execute the syscall first to get the address (rax)
|
||||||
|
ctx.rax = executeSyscall(ctx);
|
||||||
|
const addr = ctx.rax;
|
||||||
|
var len = ctx.rsi;
|
||||||
|
const flags: linux.MAP = @bitCast(@as(u32, @intCast(ctx.r10)));
|
||||||
|
const fd: linux.fd_t = @bitCast(@as(u32, @truncate(ctx.r8)));
|
||||||
|
const offset = ctx.r9;
|
||||||
|
|
||||||
|
const is_error = @as(i64, @bitCast(ctx.rax)) < 0;
|
||||||
|
if (is_error) return;
|
||||||
|
if ((prot & posix.PROT.EXEC) == 0) return;
|
||||||
|
|
||||||
|
// If file-backed (not anonymous), clamp len to file size to avoid SIGBUS
|
||||||
|
if (!flags.ANONYMOUS) {
|
||||||
|
var stat: linux.Stat = undefined;
|
||||||
|
if (0 == linux.fstat(fd, &stat) and linux.S.ISREG(stat.mode)) {
|
||||||
|
const file_size: u64 = @intCast(stat.size);
|
||||||
|
len = if (offset >= file_size) 0 else @min(len, file_size - offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (len <= 0) return;
|
||||||
|
// mmap addresses are always page aligned
|
||||||
|
const ptr = @as([*]align(page_size) u8, @ptrFromInt(addr));
|
||||||
|
// Check if we can patch it
|
||||||
|
Patcher.patchRegion(ptr[0..len]) catch |err| {
|
||||||
|
std.log.warn("JIT Patching failed: {}", .{err});
|
||||||
|
};
|
||||||
|
|
||||||
|
// patchRegion leaves it as RW. We need to restore to requested prot.
|
||||||
|
_ = linux.syscall3(.mprotect, addr, len, prot);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
.mprotect => {
|
||||||
|
// mprotect(void *addr, size_t len, int prot)
|
||||||
|
// TODO: cleanup trampolines, when removing X
|
||||||
|
const prot: u32 = @intCast(ctx.rdx);
|
||||||
|
if ((prot & posix.PROT.EXEC) != 0) {
|
||||||
|
const addr = ctx.rdi;
|
||||||
|
const len = ctx.rsi;
|
||||||
|
// mprotect requires addr to be page aligned.
|
||||||
|
if (len > 0 and std.mem.isAligned(addr, page_size)) {
|
||||||
|
const ptr = @as([*]align(page_size) u8, @ptrFromInt(addr));
|
||||||
|
Patcher.patchRegion(ptr[0..len]) catch |err| {
|
||||||
|
std.log.warn("mprotect Patching failed: {}", .{err});
|
||||||
|
};
|
||||||
|
// patchRegion leaves it R|W.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ctx.rax = executeSyscall(ctx);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
.execve, .execveat => {
|
||||||
|
// TODO: option to persist across new processes
|
||||||
|
ctx.rax = executeSyscall(ctx);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
.prctl, .arch_prctl, .set_tid_address => {
|
||||||
|
// TODO: what do we need to handle from these?
|
||||||
|
// process name
|
||||||
|
// fs base(gs?)
|
||||||
|
// thread id pointers
|
||||||
|
ctx.rax = executeSyscall(ctx);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
.munmap, .mremap => {
|
||||||
|
// TODO: cleanup
|
||||||
|
ctx.rax = executeSyscall(ctx);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
// Write result back to the saved RAX so it is restored to the application.
|
||||||
|
ctx.rax = executeSyscall(ctx);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
unreachable;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline fn executeSyscall(ctx: *SavedContext) u64 {
|
||||||
|
return linux.syscall6(
|
||||||
|
@enumFromInt(ctx.rax),
|
||||||
|
ctx.rdi,
|
||||||
|
ctx.rsi,
|
||||||
|
ctx.rdx,
|
||||||
|
ctx.r10,
|
||||||
|
ctx.r8,
|
||||||
|
ctx.r9,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Assembly trampoline that saves state and calls the Zig handler.
|
||||||
|
/// This is the target of the `call r11` instruction in the syscall flicken.
|
||||||
|
pub fn syscallEntry() callconv(.naked) void {
|
||||||
|
asm volatile (
|
||||||
|
\\ # Save all GPRs that must be preserved or are arguments
|
||||||
|
\\ push %r15
|
||||||
|
\\ push %r14
|
||||||
|
\\ push %r13
|
||||||
|
\\ push %r12
|
||||||
|
\\ push %r11
|
||||||
|
\\ push %r10
|
||||||
|
\\ push %r9
|
||||||
|
\\ push %r8
|
||||||
|
\\ push %rbp
|
||||||
|
\\ push %rdi
|
||||||
|
\\ push %rsi
|
||||||
|
\\ push %rdx
|
||||||
|
\\ push %rcx
|
||||||
|
\\ push %rbx
|
||||||
|
\\ push %rax
|
||||||
|
\\ pushfq # Save Flags
|
||||||
|
\\
|
||||||
|
\\ # Align stack
|
||||||
|
\\ # Current pushes: 16 * 8 = 128 bytes.
|
||||||
|
\\ # Red zone sub: 128 bytes.
|
||||||
|
\\ # Trampoline call pushed ret addr: 8 bytes.
|
||||||
|
\\ # Total misalign: 8 bytes. We need 16-byte alignment for 'call'.
|
||||||
|
\\ sub $8, %rsp
|
||||||
|
\\
|
||||||
|
\\ # Pass pointer to ctx (current rsp) as 1st argument (rdi) and call handler.
|
||||||
|
\\ mov %rsp, %rdi
|
||||||
|
\\ call syscall_handler
|
||||||
|
\\
|
||||||
|
\\ # Restore State
|
||||||
|
\\ add $8, %rsp
|
||||||
|
\\ popfq
|
||||||
|
\\ pop %rax
|
||||||
|
\\ pop %rbx
|
||||||
|
\\ pop %rcx
|
||||||
|
\\ pop %rdx
|
||||||
|
\\ pop %rsi
|
||||||
|
\\ pop %rdi
|
||||||
|
\\ pop %rbp
|
||||||
|
\\ pop %r8
|
||||||
|
\\ pop %r9
|
||||||
|
\\ pop %r10
|
||||||
|
\\ pop %r11
|
||||||
|
\\ pop %r12
|
||||||
|
\\ pop %r13
|
||||||
|
\\ pop %r14
|
||||||
|
\\ pop %r15
|
||||||
|
\\
|
||||||
|
\\ ret
|
||||||
|
:
|
||||||
|
// TODO: can we somehow use %[handler] in the assembly instead?
|
||||||
|
// Right now this is just here such that lto does not discard the `syscall_handler` function
|
||||||
|
: [handler] "i" (syscall_handler),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn isProcSelfExe(path: [*:0]const u8) bool {
|
||||||
|
const needle = "/proc/self/exe";
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < needle.len) : (i += 1) {
|
||||||
|
if (path[i] != needle[i]) return false;
|
||||||
|
}
|
||||||
|
return path[i] == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handleReadlink(buf_addr: u64, buf_size: u64, ctx: *SavedContext) void {
|
||||||
|
const target = Patcher.target_exec_path;
|
||||||
|
const len = @min(target.len, buf_size);
|
||||||
|
const dest = @as([*]u8, @ptrFromInt(buf_addr));
|
||||||
|
@memcpy(dest[0..len], target[0..len]);
|
||||||
|
|
||||||
|
// readlink does not null-terminate if the buffer is full, it just returns length.
|
||||||
|
ctx.rax = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
const CloneArgs = extern struct {
|
||||||
|
flags: u64,
|
||||||
|
pidfd: u64,
|
||||||
|
child_tid: u64,
|
||||||
|
parent_tid: u64,
|
||||||
|
exit_signal: u64,
|
||||||
|
stack: u64,
|
||||||
|
stack_size: u64,
|
||||||
|
tls: u64,
|
||||||
|
set_tid: u64,
|
||||||
|
set_tid_size: u64,
|
||||||
|
cgroup: u64,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Handles `clone` and `clone3` syscalls, which are used for thread and process creation.
|
||||||
|
///
|
||||||
|
/// **The Stack Switching Problem:**
|
||||||
|
/// When a thread is created, the caller provides a pointer to a new, empty stack (`child_stack`).
|
||||||
|
/// 1. The parent enters the kernel via `syscallEntry` (the trampoline).
|
||||||
|
/// 2. `syscallEntry` saves all registers and the return address onto the **parent's stack**.
|
||||||
|
/// 3. The kernel creates the child thread and switches its stack pointer (`RSP`) to `child_stack`.
|
||||||
|
/// 4. The child wakes up. If we simply let it return to `syscallEntry`, it would try to `pop`
|
||||||
|
/// registers from its `child_stack`. But that stack is empty! It would pop garbage and crash.
|
||||||
|
///
|
||||||
|
/// **The Solution:**
|
||||||
|
/// We manually replicate the parent's saved state onto the child's new stack *before* the syscall.
|
||||||
|
///
|
||||||
|
/// For that the following steps occur:
|
||||||
|
/// 1. We decode the arguments to determine if this is `clone` or `clone3` and locate the target
|
||||||
|
/// `child_stack`.
|
||||||
|
/// 2. If `child_stack` is 0 (e.g., `fork`), no stack switching occurs. The function simply executes
|
||||||
|
/// the syscall and handles the return value normally.
|
||||||
|
/// 3. Else we need to stack switch:
|
||||||
|
/// a. We calculate where `SavedContext` (registers + return addr) would sit on the top of the
|
||||||
|
/// *new* `child_stack`. We then `memcpy` the current `ctx` (from the parent's stack) to this
|
||||||
|
/// new location.
|
||||||
|
/// b. We set `rax = 0` in the *copied* context, so the child sees itself as the child.
|
||||||
|
/// c. We modify the syscall argument (the stack pointer passed to the kernel) to point to the
|
||||||
|
/// *start* of our copied context on the new stack, rather than the raw top. This ensures that
|
||||||
|
/// when the child wakes up, its `RSP` points exactly at the saved registers we just copied.
|
||||||
|
/// d. We execute the raw syscall inline.
|
||||||
|
/// - **Parent:** Returns from the syscall, updates `ctx.rax` with the Child PID, and returns
|
||||||
|
/// to the trampoline normally.
|
||||||
|
/// - **Child:** Wakes up on the new stack. It executes `postCloneChild`, restores all
|
||||||
|
/// registers from the *new* stack (popping the values we copied in step 3a), and finally
|
||||||
|
/// executes `ret`. This `ret` pops the `return_address` we copied, jumping directly back
|
||||||
|
/// to the user code, effectively bypassing the `syscallEntry` epilogue.
|
||||||
|
fn handleClone(ctx: *SavedContext) void {
|
||||||
|
const sys: linux.syscalls.X64 = @enumFromInt(ctx.rax);
|
||||||
|
var child_stack: u64 = 0;
|
||||||
|
|
||||||
|
// Determine stack
|
||||||
|
if (sys == .clone) {
|
||||||
|
// clone(flags, stack, ...)
|
||||||
|
child_stack = ctx.rsi;
|
||||||
|
} else {
|
||||||
|
// clone3(struct clone_args *args, size_t size)
|
||||||
|
const args = @as(*const CloneArgs, @ptrFromInt(ctx.rdi));
|
||||||
|
if (args.stack != 0) {
|
||||||
|
child_stack = args.stack + args.stack_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no new stack, just execute (like fork)
|
||||||
|
if (child_stack == 0) {
|
||||||
|
ctx.rax = executeSyscall(ctx);
|
||||||
|
if (ctx.rax == 0) {
|
||||||
|
postCloneChild(ctx);
|
||||||
|
} else {
|
||||||
|
assert(ctx.rax > 0); // TODO:: error handling
|
||||||
|
postCloneParent(ctx);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare child stack by copying SavedContext.
|
||||||
|
// TODO: test alignment
|
||||||
|
child_stack &= ~@as(u64, 0xf - 1); // align to 16 bytes
|
||||||
|
const child_ctx_addr = child_stack - @sizeOf(SavedContext);
|
||||||
|
const child_ctx = @as(*SavedContext, @ptrFromInt(child_ctx_addr));
|
||||||
|
child_ctx.* = ctx.*;
|
||||||
|
child_ctx.rax = 0;
|
||||||
|
|
||||||
|
// Prepare arguments for syscall
|
||||||
|
var new_rsi = ctx.rsi;
|
||||||
|
var new_rdi = ctx.rdi;
|
||||||
|
var clone3_args_copy: CloneArgs = undefined;
|
||||||
|
|
||||||
|
if (sys == .clone) {
|
||||||
|
new_rsi = child_ctx_addr;
|
||||||
|
} else {
|
||||||
|
const args = @as(*const CloneArgs, @ptrFromInt(ctx.rdi));
|
||||||
|
clone3_args_copy = args.*;
|
||||||
|
clone3_args_copy.stack = child_ctx_addr;
|
||||||
|
clone3_args_copy.stack_size = 0; // TODO:
|
||||||
|
new_rdi = @intFromPtr(&clone3_args_copy);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute clone/clone3 via inline assembly
|
||||||
|
// We handle the child path entirely in assembly to avoid stack frame issues.
|
||||||
|
const ret = asm volatile (
|
||||||
|
\\ syscall
|
||||||
|
\\ test %rax, %rax
|
||||||
|
\\ jnz 1f
|
||||||
|
\\
|
||||||
|
\\ # --- CHILD PATH ---
|
||||||
|
\\ # We are now on the new stack and %rsp points to child_ctx_addr
|
||||||
|
\\
|
||||||
|
\\ # Run Child Hook
|
||||||
|
\\ # Argument 1 (rdi): Pointer to SavedContext (which is current rsp)
|
||||||
|
\\ mov %rsp, %rdi
|
||||||
|
\\ call postCloneChild
|
||||||
|
\\
|
||||||
|
\\ # Restore Context
|
||||||
|
\\ add $8, %rsp # Skip padding
|
||||||
|
\\ popfq
|
||||||
|
\\ pop %rax
|
||||||
|
\\ pop %rbx
|
||||||
|
\\ pop %rcx
|
||||||
|
\\ pop %rdx
|
||||||
|
\\ pop %rsi
|
||||||
|
\\ pop %rdi
|
||||||
|
\\ pop %rbp
|
||||||
|
\\ pop %r8
|
||||||
|
\\ pop %r9
|
||||||
|
\\ pop %r10
|
||||||
|
\\ pop %r11
|
||||||
|
\\ pop %r12
|
||||||
|
\\ pop %r13
|
||||||
|
\\ pop %r14
|
||||||
|
\\ pop %r15
|
||||||
|
\\
|
||||||
|
\\ # %rsp now points to `return_address` so we can just return.
|
||||||
|
\\ ret
|
||||||
|
\\
|
||||||
|
\\ 1:
|
||||||
|
\\ # --- PARENT PATH ---
|
||||||
|
: [ret] "={rax}" (-> usize),
|
||||||
|
: [number] "{rax}" (ctx.rax),
|
||||||
|
[arg1] "{rdi}" (new_rdi),
|
||||||
|
[arg2] "{rsi}" (new_rsi),
|
||||||
|
[arg3] "{rdx}" (ctx.rdx),
|
||||||
|
[arg4] "{r10}" (ctx.r10),
|
||||||
|
[arg5] "{r8}" (ctx.r8),
|
||||||
|
[arg6] "{r9}" (ctx.r9),
|
||||||
|
[child_hook] "i" (postCloneChild),
|
||||||
|
: .{ .rcx = true, .r11 = true, .memory = true });
|
||||||
|
|
||||||
|
// Parent continues here
|
||||||
|
ctx.rax = ret;
|
||||||
|
postCloneParent(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
export fn postCloneChild(ctx: *SavedContext) callconv(.c) void {
|
||||||
|
_ = ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn postCloneParent(ctx: *SavedContext) void {
|
||||||
|
_ = ctx;
|
||||||
|
}
|
||||||
58
src/test/clone_no_new_stack.zig
Normal file
58
src/test/clone_no_new_stack.zig
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const linux = std.os.linux;
|
||||||
|
const clone = linux.CLONE;
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
// SIGCHLD: Send signal to parent on exit (required for waitpid)
|
||||||
|
const flags = clone.FILES | clone.FS | linux.SIG.CHLD;
|
||||||
|
|
||||||
|
const msg = "Child: Hello\n";
|
||||||
|
const msg_len = msg.len;
|
||||||
|
|
||||||
|
// We use inline assembly to perform the clone syscall and handle the child path completely to
|
||||||
|
// avoid the compiler generating code that relies on the parent's stack frame in the child
|
||||||
|
// process (where the stack is empty).
|
||||||
|
const ret = asm volatile (
|
||||||
|
\\ syscall
|
||||||
|
\\ test %%rax, %%rax
|
||||||
|
\\ jnz 1f
|
||||||
|
\\
|
||||||
|
\\ # Child Path
|
||||||
|
\\ # Write to stdout
|
||||||
|
\\ mov $1, %%rdi # fd = 1 (stdout)
|
||||||
|
\\ mov %[msg], %%rsi # buffer
|
||||||
|
\\ mov %[len], %%rdx # length
|
||||||
|
\\ mov $1, %%rax # SYS_write
|
||||||
|
\\ syscall
|
||||||
|
\\
|
||||||
|
\\ # Exit
|
||||||
|
\\ mov $0, %%rdi # code = 0
|
||||||
|
\\ mov $60, %%rax # SYS_exit
|
||||||
|
\\ syscall
|
||||||
|
\\
|
||||||
|
\\ 1:
|
||||||
|
\\ # Parent Path continues
|
||||||
|
: [ret] "={rax}" (-> usize),
|
||||||
|
: [number] "{rax}" (@intFromEnum(linux.syscalls.X64.clone)),
|
||||||
|
[arg1] "{rdi}" (flags),
|
||||||
|
[arg2] "{rsi}" (0),
|
||||||
|
[arg3] "{rdx}" (0),
|
||||||
|
[arg4] "{r10}" (0),
|
||||||
|
[arg5] "{r8}" (0),
|
||||||
|
[msg] "r" (msg.ptr),
|
||||||
|
[len] "r" (msg_len),
|
||||||
|
: .{ .rcx = true, .r11 = true, .memory = true });
|
||||||
|
|
||||||
|
// Parent Process
|
||||||
|
const child_pid: i32 = @intCast(ret);
|
||||||
|
if (child_pid < 0) {
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr("Parent: Clone failed\n"), 21);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var status: u32 = 0;
|
||||||
|
// wait4 for the child to exit
|
||||||
|
_ = linux.syscall4(.wait4, @as(usize, @intCast(child_pid)), @intFromPtr(&status), 0, 0);
|
||||||
|
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr("Parent: Goodbye\n"), 16);
|
||||||
|
}
|
||||||
65
src/test/clone_raw.zig
Normal file
65
src/test/clone_raw.zig
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const linux = std.os.linux;
|
||||||
|
const clone = linux.CLONE;
|
||||||
|
|
||||||
|
var child_stack: [4096 * 4]u8 align(16) = undefined;
|
||||||
|
pub fn main() !void {
|
||||||
|
// SIGCHLD: Send signal to parent on exit (required for waitpid)
|
||||||
|
const flags = clone.VM | clone.FILES | clone.FS | clone.SIGHAND | linux.SIG.CHLD;
|
||||||
|
|
||||||
|
// Stack grows downwards. Point to the end.
|
||||||
|
const stack_top = @intFromPtr(&child_stack) + child_stack.len;
|
||||||
|
|
||||||
|
const msg = "Child: Hello\n";
|
||||||
|
const msg_len = msg.len;
|
||||||
|
|
||||||
|
// We use inline assembly to perform the clone syscall and handle the child path completely to
|
||||||
|
// avoid the compiler generating code that relies on the parent's stack frame in the child
|
||||||
|
// process (where the stack is empty).
|
||||||
|
const ret = asm volatile (
|
||||||
|
\\ syscall
|
||||||
|
\\ test %%rax, %%rax
|
||||||
|
\\ jnz 1f
|
||||||
|
\\
|
||||||
|
\\ # Child Path
|
||||||
|
\\ # Write to stdout
|
||||||
|
\\ mov $1, %%rdi # fd = 1 (stdout)
|
||||||
|
\\ mov %[msg], %%rsi # buffer
|
||||||
|
\\ mov %[len], %%rdx # length
|
||||||
|
\\ mov $1, %%rax # SYS_write
|
||||||
|
\\ syscall
|
||||||
|
\\
|
||||||
|
\\ # Exit
|
||||||
|
\\ mov $0, %%rdi # code = 0
|
||||||
|
\\ mov $60, %%rax # SYS_exit
|
||||||
|
\\ syscall
|
||||||
|
\\
|
||||||
|
\\ 1:
|
||||||
|
\\ # Parent Path continues
|
||||||
|
: [ret] "={rax}" (-> usize),
|
||||||
|
: [number] "{rax}" (@intFromEnum(linux.syscalls.X64.clone)),
|
||||||
|
[arg1] "{rdi}" (flags),
|
||||||
|
[arg2] "{rsi}" (stack_top),
|
||||||
|
[arg3] "{rdx}" (0),
|
||||||
|
[arg4] "{r10}" (0),
|
||||||
|
[arg5] "{r8}" (0),
|
||||||
|
[msg] "r" (msg.ptr),
|
||||||
|
[len] "r" (msg_len),
|
||||||
|
: .{ .rcx = true, .r11 = true, .memory = true });
|
||||||
|
|
||||||
|
// Parent Process
|
||||||
|
const child_pid: i64 = @bitCast(ret);
|
||||||
|
if (child_pid < 0) {
|
||||||
|
std.debug.print(
|
||||||
|
"Parent: Clone failed with: {}\n",
|
||||||
|
.{@as(linux.E, @enumFromInt(-child_pid))},
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var status: u32 = 0;
|
||||||
|
// wait4 for the child to exit
|
||||||
|
_ = linux.syscall4(.wait4, @as(usize, @intCast(child_pid)), @intFromPtr(&status), 0, 0);
|
||||||
|
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr("Parent: Goodbye\n"), 16);
|
||||||
|
}
|
||||||
3
src/test/exit.zig
Normal file
3
src/test/exit.zig
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
pub fn main() void {
|
||||||
|
return;
|
||||||
|
}
|
||||||
23
src/test/fork.zig
Normal file
23
src/test/fork.zig
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const linux = std.os.linux;
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
const ret = linux.syscall0(.fork);
|
||||||
|
const pid: i32 = @intCast(ret);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
// --- Child ---
|
||||||
|
const msg = "Child: I'm alive!\n";
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr(msg.ptr), msg.len);
|
||||||
|
linux.exit(0);
|
||||||
|
} else if (pid > 0) {
|
||||||
|
// --- Parent ---
|
||||||
|
var status: u32 = 0;
|
||||||
|
_ = linux.syscall4(.wait4, @intCast(pid), @intFromPtr(&status), 0, 0);
|
||||||
|
const msg = "Parent: Child died.\n";
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr(msg.ptr), msg.len);
|
||||||
|
} else {
|
||||||
|
const msg = "Fork failed!\n";
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr(msg.ptr), msg.len);
|
||||||
|
}
|
||||||
|
}
|
||||||
9
src/test/helloWorld.zig
Normal file
9
src/test/helloWorld.zig
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var stdout_buffer: [64]u8 = undefined;
|
||||||
|
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||||
|
const stdout = &stdout_writer.interface;
|
||||||
|
try stdout.print("Hello World!\n", .{});
|
||||||
|
try stdout.flush();
|
||||||
|
}
|
||||||
17
src/test/printArgs.zig
Normal file
17
src/test/printArgs.zig
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var stdout_buffer: [64]u8 = undefined;
|
||||||
|
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||||
|
const stdout = &stdout_writer.interface;
|
||||||
|
|
||||||
|
// It is done this way to remove the trailing space with a naive implementation.
|
||||||
|
var args = std.process.args();
|
||||||
|
if (args.next()) |arg| {
|
||||||
|
try stdout.print("{s}", .{arg});
|
||||||
|
}
|
||||||
|
while (args.next()) |arg| {
|
||||||
|
try stdout.print(" {s}", .{arg});
|
||||||
|
}
|
||||||
|
try stdout.flush();
|
||||||
|
}
|
||||||
13
src/test/readlink.zig
Normal file
13
src/test/readlink.zig
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var buf: [std.fs.max_path_bytes]u8 = undefined;
|
||||||
|
// We use /proc/self/exe to test if the loader interception works.
|
||||||
|
// const path = try std.posix.readlink("/proc/self/exe", &buf);
|
||||||
|
const size = std.posix.system.readlink("/proc/self/exe", &buf, buf.len);
|
||||||
|
var stdout_buffer: [64]u8 = undefined;
|
||||||
|
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||||
|
const stdout = &stdout_writer.interface;
|
||||||
|
try stdout.print("{s}", .{buf[0..@intCast(size)]});
|
||||||
|
try stdout.flush();
|
||||||
|
}
|
||||||
35
src/test/signal_handler.zig
Normal file
35
src/test/signal_handler.zig
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const linux = std.os.linux;
|
||||||
|
|
||||||
|
var handled = false;
|
||||||
|
|
||||||
|
fn handler(sig: i32, _: *const linux.siginfo_t, _: ?*anyopaque) callconv(.c) void {
|
||||||
|
if (sig == linux.SIG.USR1) {
|
||||||
|
handled = true;
|
||||||
|
const msg = "In signal handler\n";
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr(msg.ptr), msg.len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
const act = linux.Sigaction{
|
||||||
|
.handler = .{ .sigaction = handler },
|
||||||
|
.mask = std.mem.zeroes(linux.sigset_t),
|
||||||
|
.flags = linux.SA.SIGINFO | linux.SA.RESTART,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (linux.sigaction(linux.SIG.USR1, &act, null) != 0) {
|
||||||
|
return error.SigactionFailed;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = linux.kill(linux.getpid(), linux.SIG.USR1);
|
||||||
|
|
||||||
|
if (handled) {
|
||||||
|
const msg = "Signal handled successfully\n";
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr(msg.ptr), msg.len);
|
||||||
|
} else {
|
||||||
|
const msg = "Signal NOT handled\n";
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr(msg.ptr), msg.len);
|
||||||
|
std.process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
8
src/test/vdso_clock.zig
Normal file
8
src/test/vdso_clock.zig
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
_ = try std.posix.clock_gettime(std.posix.CLOCK.MONOTONIC);
|
||||||
|
|
||||||
|
const msg = "Time gotten\n";
|
||||||
|
_ = try std.posix.write(1, msg);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user