Compare commits
9 Commits
ef6cd851f7
...
403301a06e
| Author | SHA1 | Date | |
|---|---|---|---|
| 403301a06e | |||
| 1b109ab5aa | |||
| d0c227faa8 | |||
| f4064aff89 | |||
| d3271963a8 | |||
| b73ac766bf | |||
| 3211a7705b | |||
| da69c60ffd | |||
| 9ac107b398 |
49
build.zig
49
build.zig
@@ -33,6 +33,7 @@ pub fn build(b: *std.Build) !void {
|
|||||||
const exe = b.addExecutable(.{
|
const exe = b.addExecutable(.{
|
||||||
.name = "flicker",
|
.name = "flicker",
|
||||||
.root_module = mod,
|
.root_module = mod,
|
||||||
|
.use_llvm = true,
|
||||||
});
|
});
|
||||||
exe.pie = true;
|
exe.pie = true;
|
||||||
exe.lto = if (optimize == .Debug) .none else .full;
|
exe.lto = if (optimize == .Debug) .none else .full;
|
||||||
@@ -46,8 +47,56 @@ pub fn build(b: *std.Build) !void {
|
|||||||
run_cmd.addArgs(args);
|
run_cmd.addArgs(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try compileTestApplications(b, target, optimize, false, false);
|
||||||
|
try compileTestApplications(b, target, optimize, false, true);
|
||||||
|
try compileTestApplications(b, target, optimize, true, true);
|
||||||
|
|
||||||
const exe_tests = b.addTest(.{ .root_module = mod });
|
const exe_tests = b.addTest(.{ .root_module = mod });
|
||||||
const run_exe_tests = b.addRunArtifact(exe_tests);
|
const run_exe_tests = b.addRunArtifact(exe_tests);
|
||||||
const test_step = b.step("test", "Run tests");
|
const test_step = b.step("test", "Run tests");
|
||||||
|
test_step.dependOn(b.getInstallStep());
|
||||||
test_step.dependOn(&run_exe_tests.step);
|
test_step.dependOn(&run_exe_tests.step);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn compileTestApplications(
|
||||||
|
b: *std.Build,
|
||||||
|
target: std.Build.ResolvedTarget,
|
||||||
|
optimize: std.builtin.OptimizeMode,
|
||||||
|
comptime link_libc: bool,
|
||||||
|
comptime pie: bool,
|
||||||
|
) !void {
|
||||||
|
// Compile test applications
|
||||||
|
const test_path = "src/test/";
|
||||||
|
const test_prefix = prefix: {
|
||||||
|
const p1 = "test_" ++ if (link_libc) "libc_" else "nolibc_";
|
||||||
|
const p2 = p1 ++ if (pie) "pie_" else "nopie_";
|
||||||
|
break :prefix p2;
|
||||||
|
};
|
||||||
|
var test_dir = try std.fs.cwd().openDir(test_path, .{ .iterate = true });
|
||||||
|
defer test_dir.close();
|
||||||
|
var iterator = test_dir.iterate();
|
||||||
|
while (try iterator.next()) |entry| {
|
||||||
|
if (entry.kind != .file) continue;
|
||||||
|
if (!std.mem.endsWith(u8, entry.name, ".zig")) continue;
|
||||||
|
|
||||||
|
const name = try std.mem.concat(b.allocator, u8, &.{
|
||||||
|
test_prefix, entry.name[0 .. entry.name.len - 4], // strip .zig suffix
|
||||||
|
});
|
||||||
|
const test_executable = b.addExecutable(.{
|
||||||
|
.name = name,
|
||||||
|
.root_module = b.createModule(.{
|
||||||
|
.root_source_file = b.path(b.pathJoin(&.{ test_path, entry.name })),
|
||||||
|
.optimize = optimize,
|
||||||
|
.target = target,
|
||||||
|
.link_libc = link_libc,
|
||||||
|
.link_libcpp = false,
|
||||||
|
.pic = pie,
|
||||||
|
}),
|
||||||
|
.linkage = if (link_libc) .dynamic else .static,
|
||||||
|
.use_llvm = true,
|
||||||
|
.use_lld = true,
|
||||||
|
});
|
||||||
|
test_executable.pie = pie;
|
||||||
|
b.installArtifact(test_executable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
120
docs/use_cases.md
Normal file
120
docs/use_cases.md
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
# Use Cases for Flicker
|
||||||
|
|
||||||
|
Flicker's architecture, load-time binary rewriting without control-flow recovery, uniquely positions
|
||||||
|
it to handle scenarios where source code is unavailable (legacy/commercial software) and performance
|
||||||
|
is critical. Unlike Dynamic Binary Translation (DBT) tools like Valgrind or QEMU, which incur high
|
||||||
|
overhead due to JIT compilation/emulation, Flicker patches code to run natively.
|
||||||
|
|
||||||
|
Below are possible use cases categorized by domain.
|
||||||
|
|
||||||
|
## High Performance Computing (HPC) & Optimization
|
||||||
|
|
||||||
|
### Approximate Computing and Mixed-Precision Analysis
|
||||||
|
|
||||||
|
Scientific simulations often default to double precision (64-bit) for safety, even when single
|
||||||
|
(32-bit) or half (16-bit) precision would yield accurate results with significantly higher
|
||||||
|
performance. But rewriting massive legacy Fortran/C++ codebases to test precision sensitivity is
|
||||||
|
impractical.
|
||||||
|
|
||||||
|
Flicker could instrument floating-point instructions to perform "Shadow Execution," running
|
||||||
|
operations in both double and single precision to log divergence. Alternatively, it can mask lower
|
||||||
|
bits of registers to simulate low-precision hardware.
|
||||||
|
|
||||||
|
Unlike compiler-based approaches that change the whole binary, Flicker can apply these patches
|
||||||
|
selectively to specific "hot" functions at load-time, preserving accuracy in sensitive setup/solver
|
||||||
|
phases while optimizing the bulk computation.
|
||||||
|
|
||||||
|
### Profiling Memory Access Patterns (False Sharing)
|
||||||
|
|
||||||
|
In multi-threaded HPC applications, performance often degrades due to "False Sharing", where multiple
|
||||||
|
threads modify independent variables that happen to reside on the same CPU cache line, causing cache
|
||||||
|
thrashing.
|
||||||
|
|
||||||
|
Sampling profilers (like `perf`) provide statistical approximations but often miss precise
|
||||||
|
interaction timings. Source-level instrumentation disrupts compiler optimizations.
|
||||||
|
|
||||||
|
Flicker could instrument memory store instructions (`MOV` etc.) to record effective addresses. By
|
||||||
|
aggregating this data, it can generate heatmaps of cache line access density, precisely identifying
|
||||||
|
false sharing or inefficient strided access patterns in optimized binaries.
|
||||||
|
|
||||||
|
### Low-Overhead I/O Tracing
|
||||||
|
|
||||||
|
Parallel MPI jobs often inadvertently stress parallel filesystems (Lustre, GPFS) by performing
|
||||||
|
excessive small writes or metadata operations.
|
||||||
|
|
||||||
|
Tools like `strace` force a context switch for every syscall, slowing down the application so much
|
||||||
|
that the race conditions or I/O storms disappear (Heisenbugs).
|
||||||
|
|
||||||
|
By intercepting I/O syscalls (`write`, `read`, `open`, ...) inside the process memory, Flicker could
|
||||||
|
aggregate I/O statistics (e.g., "Rank 7 performed 50,000 writes of 4 bytes") with negligible
|
||||||
|
overhead, providing a lightweight alternative to `strace` for high-throughput jobs.
|
||||||
|
|
||||||
|
### MPI Communication Profiling
|
||||||
|
|
||||||
|
HPC performance is often bound by network latency between nodes. Profiling tools like Vampir are
|
||||||
|
heavy and costly. Flicker can patch shared library exports (like MPI_Send or MPI_Recv) at load-time.
|
||||||
|
This allows lightweight logging of message sizes and latencies without recompiling the application
|
||||||
|
or linking against special profiling libraries.
|
||||||
|
|
||||||
|
## Security and Hardening
|
||||||
|
|
||||||
|
### Coverage-Guided Fuzzing (Closed Source)
|
||||||
|
|
||||||
|
Fuzzing requires feedback on which code paths are executed to be effective. But for closed-source
|
||||||
|
software, researchers typically use QEMU-mode in AFL. QEMU translates instructions dynamically,
|
||||||
|
resulting in slow execution speeds (often 2-10x slower than native).
|
||||||
|
|
||||||
|
Flicker could inject coverage instrumentation (updating a shared memory bitmap on branch targets)
|
||||||
|
directly into the binary at load time. This would allow closed-source binaries to be fuzzed at
|
||||||
|
near-native speeds, significantly increasing the number of test cases run per second.
|
||||||
|
|
||||||
|
### Software Shadow Stacks
|
||||||
|
|
||||||
|
Return-Oriented Programming (ROP) attacks exploit buffer overflows to overwrite return addresses on
|
||||||
|
the stack.
|
||||||
|
|
||||||
|
Hardware enforcement (Intel CET/AMD Shadow Stack) requires modern CPUs (Intel 11th Gen+, Zen 3+) and
|
||||||
|
recent kernels (Linux 6.6+). Older systems remain vulnerable.
|
||||||
|
|
||||||
|
Flicker could instrument `CALL` and `RET` instructions to implement a Software Shadow Stack. On
|
||||||
|
`CALL`, the return address is pushed to a secure, isolated stack region. On `RET`, the address on
|
||||||
|
the stack is compared against the shadow stack. If they mismatch, the program terminates, preventing
|
||||||
|
ROP chains.
|
||||||
|
|
||||||
|
### Binary-Only Address Sanitizer (ASan)
|
||||||
|
|
||||||
|
Memory safety errors (buffer overflows, use-after-free) in C/C++ are often found with ASan or
|
||||||
|
Valgrind. ASan requires recompilation. Valgrind works on binaries but slows execution by 20x-50x,
|
||||||
|
making it unusable for large datasets.
|
||||||
|
|
||||||
|
Flicker could intercept allocator calls (`malloc`/`free`) to poison "red zones" around memory and
|
||||||
|
instrument memory access instructions to check these zones. This provides ASan-like capabilities for
|
||||||
|
legacy binaries with significantly lower overhead than Valgrind.
|
||||||
|
|
||||||
|
## Systems and Maintenance
|
||||||
|
|
||||||
|
### Hardware Feature Emulation (Forward Compatibility)
|
||||||
|
|
||||||
|
HPC clusters are often heterogeneous, with older nodes lacking newer instruction sets (e.g.,
|
||||||
|
AVX-512, AMX). A binary compiled for a newer architecture will crash with `SIGILL` on an older node.
|
||||||
|
|
||||||
|
Flicker could detect these instructions and patch them to jump to a software emulation routine or a
|
||||||
|
scalar fallback implementation. This allows binaries optimized for the latest hardware to run
|
||||||
|
(albeit slower) on legacy nodes for testing or resource-filling purposes.
|
||||||
|
|
||||||
|
### Fault Injection
|
||||||
|
|
||||||
|
To certify software for mission-critical environments, developers must verify how it handles
|
||||||
|
hardware errors.
|
||||||
|
|
||||||
|
Flicker could instrument instructions to probabilistically flip bits in registers or memory
|
||||||
|
("Bit-flip injection"), or intercept syscalls to return error codes (e.g., returning `ENOSPC` on
|
||||||
|
`write`). It can also simulate malfunctioning or intermittent devices by corrupting buffers returned
|
||||||
|
by `read`. This allows testing error recovery paths without physical hardware damage.
|
||||||
|
|
||||||
|
### Record/Replay Engine
|
||||||
|
|
||||||
|
Debugging non-deterministic bugs (race conditions) is difficult because they are hard to reproduce.
|
||||||
|
By intercepting all sources of non-determinism (syscalls, `rdtsc`, atomic instructions, signals),
|
||||||
|
Flicker could record a trace of an execution. This trace can be replayed later to force the exact
|
||||||
|
same execution path, allowing developers to debug the error state interactively.
|
||||||
@@ -39,9 +39,12 @@ const prefixes = [_]u8{
|
|||||||
0x36,
|
0x36,
|
||||||
};
|
};
|
||||||
|
|
||||||
var syscall_flicken_bytes = [13]u8{
|
/// As of the SysV ABI: 'The kernel destroys registers %rcx and %r11."
|
||||||
0x49, 0xBB, // mov r11
|
/// So we put the address of the function to call into %r11.
|
||||||
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, // 8byte immediate
|
// TODO: Don't we need to save the red zone here, because we push the return address onto the stack
|
||||||
|
// with the `call r11` instruction?
|
||||||
|
var syscall_flicken_bytes = [_]u8{
|
||||||
|
0x49, 0xBB, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, // mov r11 <imm>
|
||||||
0x41, 0xff, 0xd3, // call r11
|
0x41, 0xff, 0xd3, // call r11
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -52,19 +55,43 @@ pub var address_allocator: AddressAllocator = .empty;
|
|||||||
pub var allocated_pages: std.AutoHashMapUnmanaged(u64, void) = .empty;
|
pub var allocated_pages: std.AutoHashMapUnmanaged(u64, void) = .empty;
|
||||||
pub var mutex: std.Thread.Mutex = .{};
|
pub var mutex: std.Thread.Mutex = .{};
|
||||||
|
|
||||||
var init_once = std.once(initInner);
|
pub var target_exec_path_buf: [std.fs.max_path_bytes]u8 = @splat(0);
|
||||||
pub fn init() void {
|
pub var target_exec_path: []const u8 = undefined;
|
||||||
init_once.call();
|
|
||||||
}
|
/// Initialize the patcher.
|
||||||
fn initInner() void {
|
/// NOTE: This should only be called **once**.
|
||||||
|
pub fn init() !void {
|
||||||
gpa = std.heap.page_allocator;
|
gpa = std.heap.page_allocator;
|
||||||
flicken_templates.ensureTotalCapacity(
|
|
||||||
|
try flicken_templates.ensureTotalCapacity(
|
||||||
std.heap.page_allocator,
|
std.heap.page_allocator,
|
||||||
page_size / @sizeOf(Flicken),
|
page_size / @sizeOf(Flicken),
|
||||||
) catch @panic("failed initializing patcher");
|
);
|
||||||
flicken_templates.putAssumeCapacity("nop", .{ .name = "nop", .bytes = &.{} });
|
flicken_templates.putAssumeCapacity("nop", .{ .name = "nop", .bytes = &.{} });
|
||||||
mem.writeInt(u64, syscall_flicken_bytes[2..][0..8], @intFromPtr(&syscalls.syscall_entry), .little);
|
mem.writeInt(
|
||||||
|
u64,
|
||||||
|
syscall_flicken_bytes[2..][0..8],
|
||||||
|
@intFromPtr(&syscalls.syscall_entry),
|
||||||
|
.little,
|
||||||
|
);
|
||||||
flicken_templates.putAssumeCapacity("syscall", .{ .name = "syscall", .bytes = &syscall_flicken_bytes });
|
flicken_templates.putAssumeCapacity("syscall", .{ .name = "syscall", .bytes = &syscall_flicken_bytes });
|
||||||
|
|
||||||
|
{
|
||||||
|
// Read mmap_min_addr to block the low memory range. This prevents us from allocating
|
||||||
|
// trampolines in the forbidden low address range.
|
||||||
|
var min_addr: u64 = 0x10000; // Default safe fallback (64KB)
|
||||||
|
if (std.fs.openFileAbsolute("/proc/sys/vm/mmap_min_addr", .{})) |file| {
|
||||||
|
defer file.close();
|
||||||
|
var buf: [32]u8 = undefined;
|
||||||
|
if (file.readAll(&buf)) |len| {
|
||||||
|
const trimmed = std.mem.trim(u8, buf[0..len], " \n\r\t");
|
||||||
|
if (std.fmt.parseInt(u64, trimmed, 10)) |val| {
|
||||||
|
min_addr = val;
|
||||||
|
} else |_| {}
|
||||||
|
} else |_| {}
|
||||||
|
} else |_| {}
|
||||||
|
try address_allocator.block(gpa, .{ .start = 0, .end = @intCast(min_addr) }, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Flicken name and bytes have to be valid for the lifetime it's used. If a trampoline with the
|
/// Flicken name and bytes have to be valid for the lifetime it's used. If a trampoline with the
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ pub fn touches(range: Range, other: Range) bool {
|
|||||||
pub fn compare(lhs: Range, rhs: Range) std.math.Order {
|
pub fn compare(lhs: Range, rhs: Range) std.math.Order {
|
||||||
assert(lhs.end >= lhs.start);
|
assert(lhs.end >= lhs.start);
|
||||||
assert(rhs.end >= rhs.start);
|
assert(rhs.end >= rhs.start);
|
||||||
return if (lhs.start >= rhs.end) .gt else if (lhs.end <= rhs.start) .lt else .eq;
|
return if (lhs.start > rhs.end) .gt else if (lhs.end < rhs.start) .lt else .eq;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn getStart(range: Range, T: type) T {
|
pub fn getStart(range: Range, T: type) T {
|
||||||
|
|||||||
138
src/main.zig
138
src/main.zig
@@ -49,14 +49,21 @@ pub fn main() !void {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize patcher
|
const file = try lookupFile(mem.sliceTo(std.os.argv[arg_index], 0));
|
||||||
Patcher.init();
|
|
||||||
// Block the first 64k to avoid mmap_min_addr (EPERM) issues on Linux.
|
{
|
||||||
// TODO: read it from `/proc/sys/vm/mmap_min_addr` instead.
|
// Initialize patcher
|
||||||
try Patcher.address_allocator.block(Patcher.gpa, .{ .start = 0, .end = 0x10000 }, 0);
|
try Patcher.init();
|
||||||
|
// Resolve the absolute path of the target executable. This is needed for the
|
||||||
|
// readlink("/proc/self/exe") interception. We use the file descriptor to get the
|
||||||
|
// authoritative path.
|
||||||
|
var self_buf: [128]u8 = undefined;
|
||||||
|
const fd_path = try std.fmt.bufPrint(&self_buf, "/proc/self/fd/{d}", .{file.handle});
|
||||||
|
Patcher.target_exec_path = try std.fs.readLinkAbsolute(fd_path, &Patcher.target_exec_path_buf);
|
||||||
|
log.debug("Resolved target executable path: {s}", .{Patcher.target_exec_path});
|
||||||
|
}
|
||||||
|
|
||||||
// Map file into memory
|
// Map file into memory
|
||||||
const file = try lookupFile(mem.sliceTo(std.os.argv[arg_index], 0));
|
|
||||||
var file_buffer: [128]u8 = undefined;
|
var file_buffer: [128]u8 = undefined;
|
||||||
var file_reader = file.reader(&file_buffer);
|
var file_reader = file.reader(&file_buffer);
|
||||||
log.info("--- Loading executable: {s} ---", .{std.os.argv[arg_index]});
|
log.info("--- Loading executable: {s} ---", .{std.os.argv[arg_index]});
|
||||||
@@ -267,3 +274,122 @@ test {
|
|||||||
_ = @import("Range.zig");
|
_ = @import("Range.zig");
|
||||||
_ = @import("PatchLocationIterator.zig");
|
_ = @import("PatchLocationIterator.zig");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: make this be passed in from the build system
|
||||||
|
const bin_path = "zig-out/bin/";
|
||||||
|
fn getTestExePath(comptime name: []const u8) []const u8 {
|
||||||
|
return bin_path ++ "test_" ++ name;
|
||||||
|
}
|
||||||
|
const flicker_path = bin_path ++ "flicker";
|
||||||
|
|
||||||
|
test "nolibc_nopie_exit" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("nolibc_nopie_exit") }, "");
|
||||||
|
}
|
||||||
|
test "nolibc_pie_exit" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("nolibc_pie_exit") }, "");
|
||||||
|
}
|
||||||
|
// BUG: This one is flaky
|
||||||
|
// test "libc_pie_exit" {
|
||||||
|
// try testHelper(&.{ flicker_path, getTestExePath("libc_pie_exit") }, "");
|
||||||
|
// }
|
||||||
|
|
||||||
|
test "nolibc_nopie_helloWorld" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("nolibc_nopie_helloWorld") }, "Hello World!\n");
|
||||||
|
}
|
||||||
|
test "nolibc_pie_helloWorld" {
|
||||||
|
try testHelper(&.{ flicker_path, getTestExePath("nolibc_pie_helloWorld") }, "Hello World!\n");
|
||||||
|
}
|
||||||
|
// BUG: This one is flaky
|
||||||
|
// test "libc_pie_helloWorld" {
|
||||||
|
// try testHelper(&.{ flicker_path, getTestExePath("libc_pie_helloWorld") }, "Hello World!\n");
|
||||||
|
// }
|
||||||
|
|
||||||
|
test "nolibc_nopie_printArgs" {
|
||||||
|
try testPrintArgs("nolibc_nopie_printArgs");
|
||||||
|
}
|
||||||
|
test "nolibc_pie_printArgs" {
|
||||||
|
try testPrintArgs("nolibc_pie_printArgs");
|
||||||
|
}
|
||||||
|
// BUG: This one is flaky
|
||||||
|
// test "libc_pie_printArgs" {
|
||||||
|
// try testPrintArgs("libc_pie_printArgs");
|
||||||
|
// }
|
||||||
|
|
||||||
|
test "nolibc_nopie_readlink" {
|
||||||
|
try testReadlink("nolibc_nopie_readlink");
|
||||||
|
}
|
||||||
|
test "nolibc_pie_readlink" {
|
||||||
|
try testReadlink("nolibc_pie_readlink");
|
||||||
|
}
|
||||||
|
// BUG: This one just outputs the path to the flicker executable and is likely also flaky
|
||||||
|
// test "libc_pie_readlink" {
|
||||||
|
// try testReadlink("libc_pie_readlink");
|
||||||
|
// }
|
||||||
|
|
||||||
|
test "nolibc_nopie_clone_raw" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_nopie_clone_raw") },
|
||||||
|
"Child: Hello\nParent: Goodbye\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "nolibc_pie_clone_raw" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_pie_clone_raw") },
|
||||||
|
"Child: Hello\nParent: Goodbye\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "nolibc_nopie_clone_no_new_stack" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_nopie_clone_no_new_stack") },
|
||||||
|
"Child: Hello\nParent: Goodbye\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
test "nolibc_pie_clone_no_new_stack" {
|
||||||
|
try testHelper(
|
||||||
|
&.{ flicker_path, getTestExePath("nolibc_pie_clone_no_new_stack") },
|
||||||
|
"Child: Hello\nParent: Goodbye\n",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "echo" {
|
||||||
|
try testHelper(&.{ "echo", "Hello", "There" }, "Hello There\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testPrintArgs(comptime name: []const u8) !void {
|
||||||
|
const exe_path = getTestExePath(name);
|
||||||
|
const loader_argv: []const []const u8 = &.{ flicker_path, exe_path, "foo", "bar", "baz hi" };
|
||||||
|
const target_argv = loader_argv[1..];
|
||||||
|
const expected_stout = try mem.join(testing.allocator, " ", target_argv);
|
||||||
|
defer testing.allocator.free(expected_stout);
|
||||||
|
try testHelper(loader_argv, expected_stout);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testReadlink(comptime name: []const u8) !void {
|
||||||
|
const exe_path = getTestExePath(name);
|
||||||
|
const loader_argv: []const []const u8 = &.{ flicker_path, exe_path };
|
||||||
|
const cwd_path = try std.fs.cwd().realpathAlloc(testing.allocator, ".");
|
||||||
|
defer testing.allocator.free(cwd_path);
|
||||||
|
const expected_path = try std.fs.path.join(testing.allocator, &.{ cwd_path, exe_path });
|
||||||
|
defer testing.allocator.free(expected_path);
|
||||||
|
try testHelper(loader_argv, expected_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn testHelper(
|
||||||
|
argv: []const []const u8,
|
||||||
|
expected_stdout: []const u8,
|
||||||
|
) !void {
|
||||||
|
const result = try std.process.Child.run(.{
|
||||||
|
.allocator = testing.allocator,
|
||||||
|
.argv = argv,
|
||||||
|
});
|
||||||
|
defer testing.allocator.free(result.stdout);
|
||||||
|
defer testing.allocator.free(result.stderr);
|
||||||
|
errdefer std.log.err("term: {}", .{result.term});
|
||||||
|
errdefer std.log.err("stdout: {s}", .{result.stdout});
|
||||||
|
errdefer std.log.err("stderr: {s}", .{result.stderr});
|
||||||
|
|
||||||
|
try testing.expectEqualStrings(expected_stdout, result.stdout);
|
||||||
|
try testing.expect(result.term == .Exited);
|
||||||
|
try testing.expectEqual(0, result.term.Exited);
|
||||||
|
}
|
||||||
|
|||||||
170
src/syscalls.zig
170
src/syscalls.zig
@@ -1,5 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const linux = std.os.linux;
|
const linux = std.os.linux;
|
||||||
|
const Patcher = @import("Patcher.zig");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
|
||||||
/// Represents the stack layout pushed by `syscall_entry` before calling the handler.
|
/// Represents the stack layout pushed by `syscall_entry` before calling the handler.
|
||||||
pub const UserRegs = extern struct {
|
pub const UserRegs = extern struct {
|
||||||
@@ -20,41 +22,94 @@ pub const UserRegs = extern struct {
|
|||||||
r13: u64,
|
r13: u64,
|
||||||
r14: u64,
|
r14: u64,
|
||||||
r15: u64,
|
r15: u64,
|
||||||
|
/// This one isn't pushed on the stack by `syscall_entry`. It's pushed by the `call r11` to get
|
||||||
|
/// to the `syscall_entry`
|
||||||
|
return_address: u64,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The main entry point for intercepted syscalls.
|
/// The main entry point for intercepted syscalls.
|
||||||
///
|
///
|
||||||
/// This function is called from `syscall_entry` with a pointer to the saved registers.
|
/// This function is called from `syscall_entry` with a pointer to the saved registers.
|
||||||
/// It effectively emulates the syscall instruction while allowing for interception.
|
/// It effectively emulates the syscall instruction while allowing for interception.
|
||||||
export fn syscall_handler(regs: *UserRegs) void {
|
export fn syscall_handler(regs: *UserRegs) callconv(.c) void {
|
||||||
// TODO: Handle signals (masking) to prevent re-entrancy issues if we touch global state.
|
// TODO: Handle signals (masking) to prevent re-entrancy issues if we touch global state.
|
||||||
// TODO: Handle `clone` specially because the child thread wakes up with a fresh stack
|
|
||||||
// and cannot pop the registers we saved here.
|
|
||||||
|
|
||||||
const sys_nr = regs.rax;
|
const sys: linux.SYS = @enumFromInt(regs.rax);
|
||||||
const sys: linux.SYS = @enumFromInt(sys_nr);
|
|
||||||
const arg1 = regs.rdi;
|
|
||||||
const arg2 = regs.rsi;
|
|
||||||
const arg3 = regs.rdx;
|
|
||||||
const arg4 = regs.r10;
|
|
||||||
const arg5 = regs.r8;
|
|
||||||
const arg6 = regs.r9;
|
|
||||||
|
|
||||||
std.debug.print("Got syscall {s}\n", .{@tagName(sys)});
|
switch (sys) {
|
||||||
// For now, we just pass through everything.
|
.readlink => {
|
||||||
// In the future, we will switch on `sys` to handle mmap, mprotect, etc.
|
// readlink(const char *path, char *buf, size_t bufsiz)
|
||||||
const result = std.os.linux.syscall6(sys, arg1, arg2, arg3, arg4, arg5, arg6);
|
const path_ptr = @as([*:0]const u8, @ptrFromInt(regs.rdi));
|
||||||
|
// TODO: handle relative paths with cwd
|
||||||
|
if (isProcSelfExe(path_ptr)) {
|
||||||
|
handleReadlink(regs.rsi, regs.rdx, regs);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.readlinkat => {
|
||||||
|
// readlinkat(int dirfd, const char *pathname, char *buf, size_t bufsiz)
|
||||||
|
// We only intercept if pathname is absolute "/proc/self/exe".
|
||||||
|
// TODO: handle relative paths with dirfd pointing to /proc/self
|
||||||
|
// TODO: handle relative paths with dirfd == AT_FDCWD (like readlink)
|
||||||
|
// TODO: handle empty pathname
|
||||||
|
const path_ptr = @as([*:0]const u8, @ptrFromInt(regs.rsi));
|
||||||
|
if (isProcSelfExe(path_ptr)) {
|
||||||
|
handleReadlink(regs.rdx, regs.r10, regs);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.clone, .clone3 => {
|
||||||
|
handleClone(regs);
|
||||||
|
return;
|
||||||
|
},
|
||||||
|
.fork, .vfork => {
|
||||||
|
// fork/vfork duplicate the stack (or share it until exec), so the return path via
|
||||||
|
// syscall_entry works fine.
|
||||||
|
},
|
||||||
|
.rt_sigreturn => {
|
||||||
|
@panic("sigreturn is not supported yet");
|
||||||
|
},
|
||||||
|
.execve, .execveat => |s| {
|
||||||
|
// TODO: option to persist across new processes
|
||||||
|
std.debug.print("syscall {} called\n", .{s});
|
||||||
|
},
|
||||||
|
.prctl, .arch_prctl, .set_tid_address => |s| {
|
||||||
|
// TODO: what do we need to handle from these?
|
||||||
|
// process name
|
||||||
|
// fs base(gs?)
|
||||||
|
// thread id pointers
|
||||||
|
std.debug.print("syscall {} called\n", .{s});
|
||||||
|
},
|
||||||
|
.mmap, .mprotect => {
|
||||||
|
// TODO: JIT support
|
||||||
|
// TODO: cleanup
|
||||||
|
},
|
||||||
|
.munmap, .mremap => {
|
||||||
|
// TODO: cleanup
|
||||||
|
},
|
||||||
|
|
||||||
|
else => {},
|
||||||
|
}
|
||||||
|
|
||||||
// Write result back to the saved RAX so it is restored to the application.
|
// Write result back to the saved RAX so it is restored to the application.
|
||||||
regs.rax = result;
|
regs.rax = executeSyscall(regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline fn executeSyscall(regs: *UserRegs) u64 {
|
||||||
|
return linux.syscall6(
|
||||||
|
@enumFromInt(regs.rax),
|
||||||
|
regs.rdi,
|
||||||
|
regs.rsi,
|
||||||
|
regs.rdx,
|
||||||
|
regs.r10,
|
||||||
|
regs.r8,
|
||||||
|
regs.r9,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Assembly trampoline that saves state and calls the Zig handler.
|
/// Assembly trampoline that saves state and calls the Zig handler.
|
||||||
pub fn syscall_entry() callconv(.naked) void {
|
pub fn syscall_entry() callconv(.naked) void {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
\\ # Respect the Red Zone (128 bytes)
|
|
||||||
\\ sub $128, %rsp
|
|
||||||
\\
|
|
||||||
\\ # Save all GPRs that must be preserved or are arguments
|
\\ # Save all GPRs that must be preserved or are arguments
|
||||||
\\ push %r15
|
\\ push %r15
|
||||||
\\ push %r14
|
\\ push %r14
|
||||||
@@ -103,8 +158,6 @@ pub fn syscall_entry() callconv(.naked) void {
|
|||||||
\\ pop %r14
|
\\ pop %r14
|
||||||
\\ pop %r15
|
\\ pop %r15
|
||||||
\\
|
\\
|
||||||
\\ # Restore Red Zone and Return
|
|
||||||
\\ add $128, %rsp
|
|
||||||
\\ ret
|
\\ ret
|
||||||
:
|
:
|
||||||
// TODO: can we somehow use %[handler] in the assembly instead?
|
// TODO: can we somehow use %[handler] in the assembly instead?
|
||||||
@@ -112,3 +165,78 @@ pub fn syscall_entry() callconv(.naked) void {
|
|||||||
: [handler] "i" (syscall_handler),
|
: [handler] "i" (syscall_handler),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn isProcSelfExe(path: [*:0]const u8) bool {
|
||||||
|
const needle = "/proc/self/exe";
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < needle.len) : (i += 1) {
|
||||||
|
if (path[i] != needle[i]) return false;
|
||||||
|
}
|
||||||
|
return path[i] == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handleReadlink(buf_addr: u64, buf_size: u64, regs: *UserRegs) void {
|
||||||
|
const target = Patcher.target_exec_path;
|
||||||
|
const len = @min(target.len, buf_size);
|
||||||
|
const dest = @as([*]u8, @ptrFromInt(buf_addr));
|
||||||
|
@memcpy(dest[0..len], target[0..len]);
|
||||||
|
|
||||||
|
// readlink does not null-terminate if the buffer is full, it just returns length.
|
||||||
|
regs.rax = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
const CloneArgs = extern struct {
|
||||||
|
flags: u64,
|
||||||
|
pidfd: u64,
|
||||||
|
child_tid: u64,
|
||||||
|
parent_tid: u64,
|
||||||
|
exit_signal: u64,
|
||||||
|
stack: u64,
|
||||||
|
stack_size: u64,
|
||||||
|
tls: u64,
|
||||||
|
set_tid: u64,
|
||||||
|
set_tid_size: u64,
|
||||||
|
cgroup: u64,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn handleClone(regs: *UserRegs) void {
|
||||||
|
const sys: linux.syscalls.X64 = @enumFromInt(regs.rax);
|
||||||
|
std.debug.print("got: {}\n", .{sys});
|
||||||
|
var child_stack: u64 = 0;
|
||||||
|
|
||||||
|
// Determine stack
|
||||||
|
if (sys == .clone) {
|
||||||
|
// clone(flags, stack, ...)
|
||||||
|
child_stack = regs.rsi;
|
||||||
|
} else {
|
||||||
|
// clone3(struct clone_args *args, size_t size)
|
||||||
|
const args = @as(*const CloneArgs, @ptrFromInt(regs.rdi));
|
||||||
|
if (args.stack != 0) {
|
||||||
|
child_stack = args.stack + args.stack_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std.debug.print("child_stack: {x}\n", .{child_stack});
|
||||||
|
|
||||||
|
// If no new stack, just execute (like fork)
|
||||||
|
if (child_stack == 0) {
|
||||||
|
regs.rax = executeSyscall(regs);
|
||||||
|
if (regs.rax == 0) {
|
||||||
|
postCloneChild(regs);
|
||||||
|
} else {
|
||||||
|
assert(regs.rax > 0); // TODO:: error handling
|
||||||
|
postCloneParent(regs);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
@panic("case with a different stack is not handled yet");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn postCloneChild(regs: *UserRegs) void {
|
||||||
|
_ = regs;
|
||||||
|
std.debug.print("Child: post clone\n", .{});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn postCloneParent(regs: *UserRegs) void {
|
||||||
|
std.debug.print("Parent: post clone; Child PID: {}\n", .{regs.rax});
|
||||||
|
}
|
||||||
|
|||||||
65
src/test/clone_raw.zig
Normal file
65
src/test/clone_raw.zig
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const linux = std.os.linux;
|
||||||
|
const clone = linux.CLONE;
|
||||||
|
|
||||||
|
var child_stack: [4096 * 4]u8 align(16) = undefined;
|
||||||
|
pub fn main() !void {
|
||||||
|
// SIGCHLD: Send signal to parent on exit (required for waitpid)
|
||||||
|
const flags = clone.VM | clone.FILES | clone.FS | clone.SIGHAND | linux.SIG.CHLD;
|
||||||
|
|
||||||
|
// Stack grows downwards. Point to the end.
|
||||||
|
const stack_top = @intFromPtr(&child_stack) + child_stack.len;
|
||||||
|
|
||||||
|
const msg = "Child: Hello\n";
|
||||||
|
const msg_len = msg.len;
|
||||||
|
|
||||||
|
// We use inline assembly to perform the clone syscall and handle the child path completely to
|
||||||
|
// avoid the compiler generating code that relies on the parent's stack frame in the child
|
||||||
|
// process (where the stack is empty).
|
||||||
|
const ret = asm volatile (
|
||||||
|
\\ syscall
|
||||||
|
\\ test %%rax, %%rax
|
||||||
|
\\ jnz 1f
|
||||||
|
\\
|
||||||
|
\\ # Child Path
|
||||||
|
\\ # Write to stdout
|
||||||
|
\\ mov $1, %%rdi # fd = 1 (stdout)
|
||||||
|
\\ mov %[msg], %%rsi # buffer
|
||||||
|
\\ mov %[len], %%rdx # length
|
||||||
|
\\ mov $1, %%rax # SYS_write
|
||||||
|
\\ syscall
|
||||||
|
\\
|
||||||
|
\\ # Exit
|
||||||
|
\\ mov $0, %%rdi # code = 0
|
||||||
|
\\ mov $60, %%rax # SYS_exit
|
||||||
|
\\ syscall
|
||||||
|
\\
|
||||||
|
\\ # Should not be reached
|
||||||
|
\\ ud2
|
||||||
|
\\
|
||||||
|
\\ 1:
|
||||||
|
\\ # Parent Path continues
|
||||||
|
: [ret] "={rax}" (-> usize),
|
||||||
|
: [number] "{rax}" (@intFromEnum(linux.syscalls.X64.clone)),
|
||||||
|
[arg1] "{rdi}" (flags),
|
||||||
|
[arg2] "{rsi}" (stack_top),
|
||||||
|
[arg3] "{rdx}" (0),
|
||||||
|
[arg4] "{r10}" (0),
|
||||||
|
[arg5] "{r8}" (0),
|
||||||
|
[msg] "r" (msg.ptr),
|
||||||
|
[len] "r" (msg_len),
|
||||||
|
: .{ .rcx = true, .r11 = true, .memory = true });
|
||||||
|
|
||||||
|
// Parent Process
|
||||||
|
const child_pid: i32 = @intCast(ret);
|
||||||
|
if (child_pid < 0) {
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr("Parent: Clone failed\n"), 21);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var status: u32 = 0;
|
||||||
|
// wait4 for the child to exit
|
||||||
|
_ = linux.syscall4(.wait4, @as(usize, @intCast(child_pid)), @intFromPtr(&status), 0, 0);
|
||||||
|
|
||||||
|
_ = linux.syscall3(.write, 1, @intFromPtr("Parent: Goodbye\n"), 16);
|
||||||
|
}
|
||||||
3
src/test/exit.zig
Normal file
3
src/test/exit.zig
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
pub fn main() void {
|
||||||
|
return;
|
||||||
|
}
|
||||||
9
src/test/helloWorld.zig
Normal file
9
src/test/helloWorld.zig
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var stdout_buffer: [64]u8 = undefined;
|
||||||
|
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||||
|
const stdout = &stdout_writer.interface;
|
||||||
|
try stdout.print("Hello World!\n", .{});
|
||||||
|
try stdout.flush();
|
||||||
|
}
|
||||||
17
src/test/printArgs.zig
Normal file
17
src/test/printArgs.zig
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var stdout_buffer: [64]u8 = undefined;
|
||||||
|
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||||
|
const stdout = &stdout_writer.interface;
|
||||||
|
|
||||||
|
// It is done this way to remove the trailing space with a naive implementation.
|
||||||
|
var args = std.process.args();
|
||||||
|
if (args.next()) |arg| {
|
||||||
|
try stdout.print("{s}", .{arg});
|
||||||
|
}
|
||||||
|
while (args.next()) |arg| {
|
||||||
|
try stdout.print(" {s}", .{arg});
|
||||||
|
}
|
||||||
|
try stdout.flush();
|
||||||
|
}
|
||||||
13
src/test/readlink.zig
Normal file
13
src/test/readlink.zig
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var buf: [std.fs.max_path_bytes]u8 = undefined;
|
||||||
|
// We use /proc/self/exe to test if the loader interception works.
|
||||||
|
// const path = try std.posix.readlink("/proc/self/exe", &buf);
|
||||||
|
const size = std.posix.system.readlink("/proc/self/exe", &buf, buf.len);
|
||||||
|
var stdout_buffer: [64]u8 = undefined;
|
||||||
|
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
|
||||||
|
const stdout = &stdout_writer.interface;
|
||||||
|
try stdout.print("{s}", .{buf[0..@intCast(size)]});
|
||||||
|
try stdout.flush();
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user