1092 lines
41 KiB
Zig
1092 lines
41 KiB
Zig
const std = @import("std");
|
|
const builtin = @import("builtin");
|
|
const testing = std.testing;
|
|
const math = std.math;
|
|
const mem = std.mem;
|
|
const posix = std.posix;
|
|
const zydis = @import("zydis").zydis;
|
|
const dis = @import("disassembler.zig");
|
|
const syscalls = @import("syscalls.zig");
|
|
|
|
const log = std.log.scoped(.patcher);
|
|
const AddressAllocator = @import("AddressAllocator.zig");
|
|
const InstructionFormatter = dis.InstructionFormatter;
|
|
const InstructionIterator = dis.InstructionIterator;
|
|
const PatchLocationIterator = @import("PatchLocationIterator.zig");
|
|
const PatchByte = PatchLocationIterator.PatchByte;
|
|
const Range = @import("Range.zig");
|
|
|
|
const assert = std.debug.assert;
|
|
|
|
const page_size = std.heap.pageSize();
|
|
const jump_rel32: u8 = 0xe9;
|
|
const jump_rel32_size = 5;
|
|
const jump_rel8: u8 = 0xeb;
|
|
const jump_rel8_size = 2;
|
|
|
|
// TODO: Find an invalid instruction to use.
|
|
// const invalid: u8 = 0xaa;
|
|
const int3: u8 = 0xcc;
|
|
const nop: u8 = 0x90;
|
|
|
|
// Prefixes for Padded Jumps (Tactic T1)
|
|
const prefixes = [_]u8{
|
|
// prefix_fs,
|
|
0x64,
|
|
// prefix_gs,
|
|
0x65,
|
|
// prefix_ss,
|
|
0x36,
|
|
};
|
|
|
|
/// As of the SysV ABI: 'The kernel destroys registers %rcx and %r11."
|
|
/// So we put the address of the function to call into %r11.
|
|
// TODO: Don't we need to save the red zone here, because we push the return address onto the stack
|
|
// with the `call r11` instruction?
|
|
var syscall_flicken_bytes = [_]u8{
|
|
0x49, 0xBB, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, // mov r11 <imm>
|
|
0x41, 0xff, 0xd3, // call r11
|
|
};
|
|
|
|
pub var gpa: mem.Allocator = undefined;
|
|
pub var flicken_templates: std.StringArrayHashMapUnmanaged(Flicken) = .empty;
|
|
pub var address_allocator: AddressAllocator = .empty;
|
|
/// Tracks the base addresses of pages we have mmap'd for Flicken.
|
|
pub var allocated_pages: std.AutoHashMapUnmanaged(u64, void) = .empty;
|
|
pub var mutex: std.Thread.Mutex = .{};
|
|
|
|
pub var target_exec_path_buf: [std.fs.max_path_bytes]u8 = @splat(0);
|
|
pub var target_exec_path: []const u8 = undefined;
|
|
|
|
/// Initialize the patcher.
|
|
/// NOTE: This should only be called **once**.
|
|
pub fn init() !void {
|
|
gpa = std.heap.page_allocator;
|
|
|
|
try flicken_templates.ensureTotalCapacity(
|
|
std.heap.page_allocator,
|
|
page_size / @sizeOf(Flicken),
|
|
);
|
|
flicken_templates.putAssumeCapacity("nop", .{ .name = "nop", .bytes = &.{} });
|
|
mem.writeInt(
|
|
u64,
|
|
syscall_flicken_bytes[2..][0..8],
|
|
@intFromPtr(&syscalls.syscallEntry),
|
|
.little,
|
|
);
|
|
flicken_templates.putAssumeCapacity("syscall", .{ .name = "syscall", .bytes = &syscall_flicken_bytes });
|
|
|
|
{
|
|
// Read mmap_min_addr to block the low memory range. This prevents us from allocating
|
|
// trampolines in the forbidden low address range.
|
|
var min_addr: u64 = 0x10000; // Default safe fallback (64KB)
|
|
if (std.fs.openFileAbsolute("/proc/sys/vm/mmap_min_addr", .{})) |file| {
|
|
defer file.close();
|
|
var buf: [32]u8 = undefined;
|
|
if (file.readAll(&buf)) |len| {
|
|
const trimmed = std.mem.trim(u8, buf[0..len], " \n\r\t");
|
|
if (std.fmt.parseInt(u64, trimmed, 10)) |val| {
|
|
min_addr = val;
|
|
} else |_| {}
|
|
} else |_| {}
|
|
} else |_| {}
|
|
try address_allocator.block(gpa, .{ .start = 0, .end = @intCast(min_addr) }, 0);
|
|
}
|
|
}
|
|
|
|
/// Flicken name and bytes have to be valid for the lifetime it's used. If a trampoline with the
|
|
/// name is already registered it gets overwritten.
|
|
/// NOTE: The name "nop" is reserved and always has the ID 0.
|
|
pub fn addFlicken(trampoline: Flicken) !FlickenId {
|
|
assert(!mem.eql(u8, "nop", trampoline.name));
|
|
assert(!mem.eql(u8, "syscall", trampoline.name));
|
|
try flicken_templates.ensureUnusedCapacity(gpa, 1);
|
|
errdefer comptime unreachable;
|
|
|
|
const gop = flicken_templates.getOrPutAssumeCapacity(trampoline.name);
|
|
if (gop.found_existing) {
|
|
log.warn("addTrampoline: Overwriting existing trampoline: {s}", .{trampoline.name});
|
|
}
|
|
gop.key_ptr.* = trampoline.name;
|
|
gop.value_ptr.* = trampoline;
|
|
return @enumFromInt(gop.index);
|
|
}
|
|
|
|
pub const Flicken = struct {
|
|
name: []const u8,
|
|
bytes: []const u8,
|
|
|
|
pub fn size(flicken: *const Flicken) u64 {
|
|
return flicken.bytes.len + jump_rel32_size;
|
|
}
|
|
};
|
|
|
|
pub const FlickenId = enum(u64) {
|
|
/// The nop flicken is special. It just does the patched instruction and immediately jumps back
|
|
/// to the normal instruction stream. It **cannot** be changed.
|
|
/// The bytes are always empty, meaning that `bytes.len == 0`.
|
|
/// It also needs special handling when constructing the patches, because it's different for
|
|
/// each instruction.
|
|
nop = 0,
|
|
/// TODO: docs
|
|
syscall = 1,
|
|
_,
|
|
};
|
|
|
|
/// Must point to first byte of an instruction.
|
|
pub const PatchRequest = struct {
|
|
/// What to patch with.
|
|
flicken: FlickenId,
|
|
/// Offset within the region.
|
|
offset: u64,
|
|
/// Number of bytes of instruction.
|
|
size: u8,
|
|
/// A byte slice from the start of the offset to the end of the region. This isn't necessary to
|
|
/// have but makes things more accessible.
|
|
bytes: []u8,
|
|
|
|
pub fn desc(_: void, lhs: PatchRequest, rhs: PatchRequest) bool {
|
|
return @intFromPtr(lhs.bytes.ptr) > @intFromPtr(rhs.bytes.ptr);
|
|
}
|
|
|
|
pub fn format(
|
|
self: @This(),
|
|
writer: *std.Io.Writer,
|
|
) std.Io.Writer.Error!void {
|
|
try writer.print(
|
|
".{{ .address = 0x{x}, .bytes = 0x{x}, .flicken = {} }}",
|
|
.{ @intFromPtr(self.bytes.ptr), self.bytes[0..self.size], @intFromEnum(self.flicken) },
|
|
);
|
|
}
|
|
};
|
|
|
|
pub const Statistics = struct {
|
|
/// Direct jumps
|
|
jump: u64,
|
|
/// Punning - index represents number of prefixes used
|
|
punning: [4]u64,
|
|
/// Successor Eviction
|
|
successor_eviction: u64,
|
|
/// Neighbor Eviction
|
|
neighbor_eviction: u64,
|
|
/// Failed to patch
|
|
failed: u64,
|
|
|
|
pub const empty = mem.zeroes(Statistics);
|
|
|
|
pub fn punningSum(stats: *const Statistics) u64 {
|
|
return stats.punning[0] + stats.punning[1] +
|
|
stats.punning[2] + stats.punning[3];
|
|
}
|
|
|
|
pub fn successful(stats: *const Statistics) u64 {
|
|
return stats.jump + stats.punningSum() +
|
|
stats.successor_eviction + stats.neighbor_eviction;
|
|
}
|
|
|
|
pub fn total(stats: *const Statistics) u64 {
|
|
return stats.successful() + stats.failed;
|
|
}
|
|
|
|
pub fn percentage(stats: *const Statistics) f64 {
|
|
if (stats.total() == 0) return 1;
|
|
const s: f64 = @floatFromInt(stats.successful());
|
|
const t: f64 = @floatFromInt(stats.total());
|
|
return s / t;
|
|
}
|
|
|
|
pub fn add(self: *Statistics, other: *const Statistics) void {
|
|
self.jump += other.jump;
|
|
for (0..self.punning.len) |i| {
|
|
self.punning[i] += other.punning[i];
|
|
}
|
|
self.successor_eviction += other.successor_eviction;
|
|
self.neighbor_eviction += other.neighbor_eviction;
|
|
self.failed += other.failed;
|
|
}
|
|
};
|
|
|
|
/// Scans a memory region for instructions that require patching and applies the patches
|
|
/// using a hierarchy of tactics (Direct/Punning -> Successor Eviction -> Neighbor Eviction).
|
|
///
|
|
/// The region is processed Back-to-Front to ensure that modifications (punning) only
|
|
/// constrain instructions that have already been processed or are locked.
|
|
pub fn patchRegion(region: []align(page_size) u8) !void {
|
|
// For now just do a coarse lock.
|
|
// TODO: should we make this more fine grained?
|
|
mutex.lock();
|
|
defer mutex.unlock();
|
|
|
|
{
|
|
// Block the region, such that we don't try to allocate there anymore.
|
|
const start: i64 = @intCast(@intFromPtr(region.ptr));
|
|
try address_allocator.block(
|
|
gpa,
|
|
.{ .start = start, .end = start + @as(i64, @intCast(region.len)) },
|
|
page_size,
|
|
);
|
|
}
|
|
|
|
var arena_impl = std.heap.ArenaAllocator.init(gpa);
|
|
const arena = arena_impl.allocator();
|
|
defer arena_impl.deinit();
|
|
|
|
var patch_requests: std.ArrayListUnmanaged(PatchRequest) = .empty;
|
|
// We save the bytes where instructions start to be able to disassemble them on the fly. This is
|
|
// necessary for the neighbor eviction, since we can't just iterate forwards from a target
|
|
// instruction and disassemble happily. This is because some bytes may already be the patched
|
|
// ones which means that we might disassemble garbage or something different that wasn't there
|
|
// before. This means that we would need to stop disassembling on the first byte that is locked,
|
|
// which kind of defeats the purpose of neighbor eviction.
|
|
var instruction_starts = try std.DynamicBitSetUnmanaged.initEmpty(arena, region.len);
|
|
|
|
{
|
|
// Get where to patch.
|
|
var instruction_iterator = InstructionIterator.init(region);
|
|
while (instruction_iterator.next()) |instruction| {
|
|
const offset = instruction.address - @intFromPtr(region.ptr);
|
|
instruction_starts.set(offset);
|
|
|
|
const is_syscall = instruction.instruction.mnemonic == zydis.ZYDIS_MNEMONIC_SYSCALL;
|
|
const should_patch = is_syscall or
|
|
instruction.instruction.attributes & zydis.ZYDIS_ATTRIB_HAS_LOCK > 0;
|
|
if (should_patch) {
|
|
const request: PatchRequest = .{
|
|
.flicken = if (is_syscall) .syscall else .nop,
|
|
.offset = offset,
|
|
.size = instruction.instruction.length,
|
|
.bytes = region[offset..],
|
|
};
|
|
try patch_requests.append(arena, request);
|
|
}
|
|
}
|
|
log.info("patchRegion: Got {} patch requests", .{patch_requests.items.len});
|
|
}
|
|
|
|
// Sort patch requests in descending order by address, such that we patch from back to front.
|
|
mem.sortUnstable(PatchRequest, patch_requests.items, {}, PatchRequest.desc);
|
|
|
|
{
|
|
// Check for duplicate patch requests and undefined IDs
|
|
var last_offset: ?u64 = null;
|
|
for (patch_requests.items, 0..) |request, i| {
|
|
if (last_offset != null and last_offset.? == request.offset) {
|
|
const fmt = dis.formatBytes(request.bytes);
|
|
log.err(
|
|
"patchRegion: Found duplicate patch requests for instruction: {s}",
|
|
.{fmt},
|
|
);
|
|
log.err("patchRegion: request 1: {f}", .{patch_requests.items[i - 1]});
|
|
log.err("patchRegion: request 2: {f}", .{patch_requests.items[i]});
|
|
return error.DuplicatePatchRequest;
|
|
}
|
|
last_offset = request.offset;
|
|
|
|
if (@as(u64, @intFromEnum(request.flicken)) >= flicken_templates.count()) {
|
|
const fmt = dis.formatBytes(request.bytes[0..request.size]);
|
|
log.err(
|
|
"patchRegion: Usage of undefined flicken in request {f} for instruction: {s}",
|
|
.{ request, fmt },
|
|
);
|
|
return error.undefinedFlicken;
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
// Apply patches.
|
|
try posix.mprotect(region, posix.PROT.READ | posix.PROT.WRITE);
|
|
defer posix.mprotect(region, posix.PROT.READ | posix.PROT.EXEC) catch
|
|
@panic("patchRegion: mprotect back to R|X failed. Can't continue");
|
|
|
|
var stats = Statistics.empty;
|
|
// Used to track which bytes have been modified or used for constraints (punning),
|
|
// to prevent future patches (from neighbor/successor eviction) from corrupting them.
|
|
var locked_bytes = try std.DynamicBitSetUnmanaged.initEmpty(arena, region.len);
|
|
// PERF: A set of the pages for the patches/flicken we made writable. This way we don't
|
|
// repeatedly change call `mprotect` on the same page to switch it from R|W to R|X and back.
|
|
// At the end we `mprotect` all pages in this set back to being R|X.
|
|
var pages_made_writable: std.AutoHashMapUnmanaged(u64, void) = .empty;
|
|
|
|
requests: for (patch_requests.items) |request| {
|
|
for (0..request.size) |i| {
|
|
if (locked_bytes.isSet(request.offset + i)) {
|
|
log.warn("patchRegion: Skipping request at offset 0x{x} because it is locked", .{request.offset});
|
|
stats.failed += 1;
|
|
continue :requests;
|
|
}
|
|
}
|
|
|
|
if (try attemptDirectOrPunning(
|
|
request,
|
|
arena,
|
|
&locked_bytes,
|
|
&pages_made_writable,
|
|
&stats,
|
|
)) {
|
|
continue :requests;
|
|
}
|
|
|
|
if (try attemptSuccessorEviction(
|
|
request,
|
|
arena,
|
|
&locked_bytes,
|
|
&pages_made_writable,
|
|
&stats,
|
|
)) {
|
|
continue :requests;
|
|
}
|
|
|
|
if (try attemptNeighborEviction(
|
|
request,
|
|
arena,
|
|
&locked_bytes,
|
|
&pages_made_writable,
|
|
&instruction_starts,
|
|
&stats,
|
|
)) {
|
|
continue :requests;
|
|
}
|
|
|
|
stats.failed += 1;
|
|
}
|
|
|
|
// Change pages back to R|X.
|
|
var iter = pages_made_writable.keyIterator();
|
|
const protection = posix.PROT.READ | posix.PROT.EXEC;
|
|
while (iter.next()) |page_addr| {
|
|
const ptr: [*]align(page_size) u8 = @ptrFromInt(page_addr.*);
|
|
try posix.mprotect(ptr[0..page_size], protection);
|
|
}
|
|
|
|
assert(stats.total() == patch_requests.items.len);
|
|
log.info("{}", .{stats});
|
|
log.info("patched: {}/{}: {:2.2}%", .{
|
|
stats.successful(),
|
|
stats.total(),
|
|
stats.percentage() * 100,
|
|
});
|
|
log.info("patchRegion: Finished applying patches", .{});
|
|
}
|
|
}
|
|
|
|
fn attemptDirectOrPunning(
|
|
request: PatchRequest,
|
|
arena: mem.Allocator,
|
|
locked_bytes: *std.DynamicBitSetUnmanaged,
|
|
pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
|
|
stats: *Statistics,
|
|
) !bool {
|
|
const flicken: Flicken = if (request.flicken == .nop)
|
|
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
|
else
|
|
flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
|
|
|
|
var pii = PatchInstructionIterator.init(
|
|
request.bytes,
|
|
request.size,
|
|
flicken.size(),
|
|
);
|
|
// TODO: There is a "Ghost Page" edge case here. If `pii.next()` returns a range that
|
|
// spans multiple pages (Pages A and B), we might successfully mmap Page A but fail to
|
|
// mmap Page B. The loop will `continue` to the next candidate range, leaving Page A
|
|
// mapped. While harmless (it becomes an unused executable page), it is technically a
|
|
// memory leak. A future fix should track "current attempt" pages separately and unmap
|
|
// them on failure.
|
|
while (pii.next(.{ .count = 256 })) |allocated_range| {
|
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(allocated_range));
|
|
ensureRangeWritable(
|
|
allocated_range,
|
|
pages_made_writable,
|
|
) catch |err| switch (err) {
|
|
error.MappingAlreadyExists => continue,
|
|
else => return err,
|
|
};
|
|
|
|
applyPatch(
|
|
request,
|
|
flicken,
|
|
allocated_range,
|
|
pii.num_prefixes,
|
|
) catch |err| switch (err) {
|
|
error.RelocationOverflow => continue,
|
|
else => return err,
|
|
};
|
|
|
|
try address_allocator.block(gpa, allocated_range, 0);
|
|
const lock_size = jump_rel32_size + pii.num_prefixes;
|
|
locked_bytes.setRangeValue(
|
|
.{ .start = request.offset, .end = request.offset + lock_size },
|
|
true,
|
|
);
|
|
|
|
if (request.size >= 5) {
|
|
// assert(pii.num_prefixes == 0);
|
|
stats.jump += 1;
|
|
} else {
|
|
stats.punning[pii.num_prefixes] += 1;
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn attemptSuccessorEviction(
|
|
request: PatchRequest,
|
|
arena: mem.Allocator,
|
|
locked_bytes: *std.DynamicBitSetUnmanaged,
|
|
pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
|
|
stats: *Statistics,
|
|
) !bool {
|
|
// Disassemble Successor and create request and flicken for it.
|
|
const succ_instr = dis.disassembleInstruction(request.bytes[request.size..]) orelse return false;
|
|
const succ_request = PatchRequest{
|
|
.flicken = .nop,
|
|
.size = succ_instr.instruction.length,
|
|
.bytes = request.bytes[request.size..],
|
|
.offset = request.offset + request.size,
|
|
};
|
|
const succ_flicken = Flicken{
|
|
.name = "nop",
|
|
.bytes = succ_request.bytes[0..succ_request.size],
|
|
};
|
|
|
|
for (0..succ_request.size) |i| {
|
|
if (locked_bytes.isSet(succ_request.offset + i)) return false;
|
|
}
|
|
|
|
// Save original bytes for reverting the change.
|
|
var succ_orig_bytes: [15]u8 = undefined;
|
|
@memcpy(
|
|
succ_orig_bytes[0..succ_request.size],
|
|
succ_request.bytes[0..succ_request.size],
|
|
);
|
|
|
|
var succ_pii = PatchInstructionIterator.init(
|
|
succ_request.bytes,
|
|
succ_request.size,
|
|
succ_flicken.size(),
|
|
);
|
|
while (succ_pii.next(.{ .count = 16 })) |succ_range| {
|
|
// Ensure bytes match original before retry.
|
|
assert(mem.eql(
|
|
u8,
|
|
succ_request.bytes[0..succ_request.size],
|
|
succ_orig_bytes[0..succ_request.size],
|
|
));
|
|
|
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(succ_range));
|
|
ensureRangeWritable(
|
|
succ_range,
|
|
pages_made_writable,
|
|
) catch |err| switch (err) {
|
|
error.MappingAlreadyExists => continue,
|
|
else => return err,
|
|
};
|
|
|
|
applyPatch(
|
|
succ_request,
|
|
succ_flicken,
|
|
succ_range,
|
|
succ_pii.num_prefixes,
|
|
) catch |err| switch (err) {
|
|
error.RelocationOverflow => continue,
|
|
else => return err,
|
|
};
|
|
|
|
// Now that the successor is patched, we can patch the original request.
|
|
const flicken: Flicken = if (request.flicken == .nop)
|
|
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
|
else
|
|
flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
|
|
|
|
var orig_pii = PatchInstructionIterator.init(
|
|
request.bytes,
|
|
request.size,
|
|
flicken.size(),
|
|
);
|
|
while (orig_pii.next(.{ .count = 16 })) |orig_range| {
|
|
if (succ_range.touches(orig_range)) continue;
|
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(orig_range));
|
|
ensureRangeWritable(
|
|
orig_range,
|
|
pages_made_writable,
|
|
) catch |err| switch (err) {
|
|
error.MappingAlreadyExists => continue,
|
|
else => return err,
|
|
};
|
|
|
|
applyPatch(
|
|
request,
|
|
flicken,
|
|
orig_range,
|
|
orig_pii.num_prefixes,
|
|
) catch |err| switch (err) {
|
|
error.RelocationOverflow => continue,
|
|
else => return err,
|
|
};
|
|
|
|
try address_allocator.block(gpa, succ_range, 0);
|
|
try address_allocator.block(gpa, orig_range, 0);
|
|
const lock_size = request.size + jump_rel32_size + succ_pii.num_prefixes;
|
|
locked_bytes.setRangeValue(
|
|
.{ .start = request.offset, .end = request.offset + lock_size },
|
|
true,
|
|
);
|
|
stats.successor_eviction += 1;
|
|
return true;
|
|
}
|
|
|
|
// We couldn't patch with the bytes. So revert to original ones.
|
|
@memcpy(
|
|
succ_request.bytes[0..succ_request.size],
|
|
succ_orig_bytes[0..succ_request.size],
|
|
);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn attemptNeighborEviction(
|
|
request: PatchRequest,
|
|
arena: mem.Allocator,
|
|
locked_bytes: *std.DynamicBitSetUnmanaged,
|
|
pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
|
|
instruction_starts: *const std.DynamicBitSetUnmanaged,
|
|
stats: *Statistics,
|
|
) !bool {
|
|
// Valid neighbors must be within [-128, 127] range for a short jump.
|
|
// Since we patch back-to-front, we only look at neighbors *after* the current instruction
|
|
// (higher address) to avoid evicting an instruction we haven't processed/patched yet.
|
|
const start_offset = request.offset + 2;
|
|
const end_offset = @min(
|
|
start_offset + 128,
|
|
request.bytes.len + request.offset,
|
|
);
|
|
|
|
neighbor: for (start_offset..end_offset) |neighbor_offset| {
|
|
if (!instruction_starts.isSet(neighbor_offset)) continue;
|
|
|
|
const victim_bytes_all = request.bytes[neighbor_offset - request.offset ..];
|
|
|
|
// PERF: We could also search for the next set bit in instruction_starts
|
|
const victim_instr = dis.disassembleInstruction(victim_bytes_all) orelse continue;
|
|
const victim_size = victim_instr.instruction.length;
|
|
const victim_bytes = victim_bytes_all[0..victim_size];
|
|
|
|
for (0..victim_size) |i| {
|
|
if (locked_bytes.isSet(neighbor_offset + i)) {
|
|
continue :neighbor;
|
|
}
|
|
}
|
|
|
|
// Save original bytes to revert if constraints cannot be solved.
|
|
var victim_orig_bytes: [15]u8 = undefined;
|
|
@memcpy(victim_orig_bytes[0..victim_size], victim_bytes);
|
|
|
|
// OUTER LOOP: J_Patch
|
|
// Iterate possible offsets 'k' inside the victim for the patch jump.
|
|
var k: u8 = 1;
|
|
while (k < victim_size) : (k += 1) {
|
|
const target: i64 = @intCast(neighbor_offset + k);
|
|
const source: i64 = @intCast(request.offset + 2);
|
|
const disp = target - source;
|
|
if (disp > 127 or disp < -128) continue;
|
|
|
|
const patch_flicken: Flicken = if (request.flicken == .nop)
|
|
.{ .name = "nop", .bytes = request.bytes[0..request.size] }
|
|
else
|
|
flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
|
|
|
|
// Constraints for J_Patch:
|
|
// Bytes [0 .. victim_size - k] are free (inside victim).
|
|
// Bytes [victim_size - k .. ] are used (outside victim, immutable).
|
|
var patch_pii = PatchInstructionIterator.init(
|
|
victim_bytes_all[k..],
|
|
@intCast(victim_size - k),
|
|
patch_flicken.size(),
|
|
);
|
|
|
|
while (patch_pii.next(.{ .count = 16 })) |patch_range| {
|
|
// J_Patch MUST NOT use prefixes, because it's punned inside J_Victim.
|
|
// Adding prefixes would shift J_Patch relative to J_Victim, making constraints harder.
|
|
if (patch_pii.num_prefixes > 0) break;
|
|
|
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(patch_range));
|
|
ensureRangeWritable(patch_range, pages_made_writable) catch |err| switch (err) {
|
|
error.MappingAlreadyExists => continue,
|
|
else => return err,
|
|
};
|
|
|
|
// Tentatively write J_Patch to memory to set constraints for J_Victim.
|
|
// We only need to write the bytes of J_Patch that land inside the victim.
|
|
{
|
|
const jmp_target = patch_range.start;
|
|
const jmp_source: i64 = @intCast(@intFromPtr(&victim_bytes_all[k]) + 5);
|
|
const rel32: i32 = @intCast(jmp_target - jmp_source);
|
|
victim_bytes_all[k] = jump_rel32;
|
|
mem.writeInt(i32, victim_bytes_all[k + 1 ..][0..4], rel32, .little);
|
|
}
|
|
|
|
// INNER LOOP: J_Victim
|
|
// Constraints:
|
|
// Bytes [0 .. k] are free (before J_Patch).
|
|
// Bytes [k .. ] are used (overlap J_Patch).
|
|
const victim_flicken = Flicken{
|
|
.name = "nop",
|
|
.bytes = victim_orig_bytes[0..victim_size],
|
|
};
|
|
|
|
var victim_pii = PatchInstructionIterator.init(
|
|
victim_bytes_all,
|
|
k,
|
|
victim_flicken.size(),
|
|
);
|
|
|
|
while (victim_pii.next(.{ .count = 16 })) |victim_range| {
|
|
if (patch_range.touches(victim_range)) continue;
|
|
|
|
try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(victim_range));
|
|
ensureRangeWritable(victim_range, pages_made_writable) catch |err| switch (err) {
|
|
error.MappingAlreadyExists => continue,
|
|
else => return err,
|
|
};
|
|
|
|
// SUCCESS! Commit everything.
|
|
|
|
// 1. Write Patch Trampoline (J_Patch target)
|
|
{
|
|
const trampoline: [*]u8 = @ptrFromInt(patch_range.getStart(u64));
|
|
var reloc_info: ?RelocInfo = null;
|
|
if (request.flicken == .nop) {
|
|
reloc_info = .{
|
|
.instr = dis.disassembleInstruction(patch_flicken.bytes).?,
|
|
.old_addr = @intFromPtr(request.bytes.ptr),
|
|
};
|
|
}
|
|
commitTrampoline(
|
|
trampoline,
|
|
patch_flicken.bytes,
|
|
reloc_info,
|
|
@intFromPtr(request.bytes.ptr) + request.size,
|
|
) catch |err| switch (err) {
|
|
error.RelocationOverflow => continue,
|
|
else => return err,
|
|
};
|
|
}
|
|
|
|
// 2. Write Victim Trampoline (J_Victim target)
|
|
{
|
|
const trampoline: [*]u8 = @ptrFromInt(victim_range.getStart(u64));
|
|
commitTrampoline(
|
|
trampoline,
|
|
victim_orig_bytes[0..victim_size],
|
|
.{
|
|
.instr = dis.disassembleInstruction(victim_orig_bytes[0..victim_size]).?,
|
|
.old_addr = @intFromPtr(victim_bytes_all.ptr),
|
|
},
|
|
@intFromPtr(victim_bytes_all.ptr) + victim_size,
|
|
) catch |err| switch (err) {
|
|
error.RelocationOverflow => continue,
|
|
else => return err,
|
|
};
|
|
}
|
|
|
|
// 3. Write J_Victim (overwrites head of J_Patch which is fine)
|
|
commitJump(
|
|
victim_bytes_all.ptr,
|
|
@intCast(victim_range.start),
|
|
victim_pii.num_prefixes,
|
|
k, // Total size for padding is limited to k to preserve J_Patch tail
|
|
);
|
|
|
|
// 4. Write J_Short at request
|
|
request.bytes[0] = jump_rel8;
|
|
request.bytes[1] = @intCast(disp);
|
|
if (request.size > 2) {
|
|
@memset(request.bytes[2..request.size], int3);
|
|
}
|
|
|
|
// 5. Locking
|
|
try address_allocator.block(gpa, patch_range, 0);
|
|
try address_allocator.block(gpa, victim_range, 0);
|
|
|
|
locked_bytes.setRangeValue(
|
|
.{ .start = request.offset, .end = request.offset + request.size },
|
|
true,
|
|
);
|
|
// Lock victim range + any extension of J_Patch
|
|
const j_patch_end = neighbor_offset + k + 5;
|
|
const lock_end = @max(neighbor_offset + victim_size, j_patch_end);
|
|
locked_bytes.setRangeValue(
|
|
.{ .start = neighbor_offset, .end = lock_end },
|
|
true,
|
|
);
|
|
|
|
stats.neighbor_eviction += 1;
|
|
return true;
|
|
}
|
|
|
|
// Revert J_Patch write for next iteration
|
|
@memcpy(victim_bytes, victim_orig_bytes[0..victim_size]);
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// Applies a standard patch (T1/B1/B2) where the instruction is replaced by a jump to a trampoline.
|
|
///
|
|
/// This handles the logic of writing the trampoline content (including relocation) and
|
|
/// overwriting the original instruction with a `JMP` (plus prefixes/padding).
|
|
fn applyPatch(
|
|
request: PatchRequest,
|
|
flicken: Flicken,
|
|
allocated_range: Range,
|
|
num_prefixes: u8,
|
|
) !void {
|
|
const flicken_addr: [*]u8 = @ptrFromInt(allocated_range.getStart(u64));
|
|
|
|
// Commit Trampoline
|
|
var reloc_info: ?RelocInfo = null;
|
|
if (request.flicken == .nop) {
|
|
reloc_info = .{
|
|
.instr = dis.disassembleInstruction(request.bytes[0..request.size]).?,
|
|
.old_addr = @intFromPtr(request.bytes.ptr),
|
|
};
|
|
}
|
|
|
|
const ret_addr = @intFromPtr(request.bytes.ptr) + request.size;
|
|
try commitTrampoline(flicken_addr, flicken.bytes, reloc_info, ret_addr);
|
|
|
|
// Commit Jump (Patch)
|
|
commitJump(request.bytes.ptr, @intCast(allocated_range.start), num_prefixes, request.size);
|
|
}
|
|
|
|
const RelocInfo = struct {
|
|
instr: dis.BundledInstruction,
|
|
old_addr: u64,
|
|
};
|
|
|
|
/// Helper to write code into a trampoline.
|
|
///
|
|
/// It copies the original bytes (or flicken content), relocates any RIP-relative instructions
|
|
/// to be valid at the new address, and appends a jump back to the instruction stream.
|
|
fn commitTrampoline(
|
|
trampoline_ptr: [*]u8,
|
|
content: []const u8,
|
|
reloc_info: ?RelocInfo,
|
|
return_addr: u64,
|
|
) !void {
|
|
@memcpy(trampoline_ptr[0..content.len], content);
|
|
|
|
if (reloc_info) |info| {
|
|
try relocateInstruction(
|
|
info.instr,
|
|
@intFromPtr(trampoline_ptr),
|
|
trampoline_ptr[0..content.len],
|
|
);
|
|
}
|
|
|
|
// Write jump back
|
|
trampoline_ptr[content.len] = jump_rel32;
|
|
const jump_src = @intFromPtr(trampoline_ptr) + content.len + jump_rel32_size;
|
|
const jump_disp: i32 = @intCast(@as(i64, @intCast(return_addr)) - @as(i64, @intCast(jump_src)));
|
|
mem.writeInt(i32, trampoline_ptr[content.len + 1 ..][0..4], jump_disp, .little);
|
|
}
|
|
|
|
/// Helper to overwrite an instruction with a jump to a trampoline.
|
|
///
|
|
/// It handles writing optional prefixes (padding), the `0xE9` opcode, the relative offset,
|
|
/// and fills any remaining bytes of the original instruction with `INT3` to prevent
|
|
/// execution of garbage bytes.
|
|
fn commitJump(
|
|
from_ptr: [*]u8,
|
|
to_addr: u64,
|
|
num_prefixes: u8,
|
|
total_size: usize,
|
|
) void {
|
|
const prefixes_slice = from_ptr[0..num_prefixes];
|
|
@memcpy(prefixes_slice, prefixes[0..num_prefixes]);
|
|
|
|
from_ptr[num_prefixes] = jump_rel32;
|
|
|
|
const jump_src = @intFromPtr(from_ptr) + num_prefixes + jump_rel32_size;
|
|
const jump_disp: i32 = @intCast(@as(i64, @intCast(to_addr)) - @as(i64, @intCast(jump_src)));
|
|
mem.writeInt(i32, from_ptr[num_prefixes + 1 ..][0..4], jump_disp, .little);
|
|
|
|
const patch_end_index = num_prefixes + jump_rel32_size;
|
|
if (patch_end_index < total_size) {
|
|
@memset(from_ptr[patch_end_index..total_size], int3);
|
|
}
|
|
}
|
|
|
|
/// Only used for debugging.
|
|
fn printMaps() !void {
|
|
const path = "/proc/self/maps";
|
|
var reader = try std.fs.cwd().openFile(path, .{});
|
|
var buffer: [1024 * 1024]u8 = undefined;
|
|
const size = try reader.readAll(&buffer);
|
|
std.debug.print("\n{s}\n", .{buffer[0..size]});
|
|
}
|
|
|
|
/// Returns the number of pages that the given range touches.
|
|
fn touchedPageCount(range: Range) u32 {
|
|
const start_page = mem.alignBackward(u64, range.getStart(u64), page_size);
|
|
// alignBackward on (end - 1) handles the exclusive upper bound correctly
|
|
const end_page = mem.alignBackward(u64, range.getEnd(u64) - 1, page_size);
|
|
return @intCast((end_page - start_page) / page_size + 1);
|
|
}
|
|
|
|
/// Ensure `range` is mapped R|W. Assumes `pages_made_writable` has enough free capacity.
|
|
fn ensureRangeWritable(
|
|
range: Range,
|
|
pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
|
|
) !void {
|
|
const start_page = mem.alignBackward(u64, range.getStart(u64), page_size);
|
|
const end_page = mem.alignBackward(u64, range.getEnd(u64) - 1, page_size);
|
|
const protection = posix.PROT.READ | posix.PROT.WRITE;
|
|
var page_addr = start_page;
|
|
while (page_addr <= end_page) : (page_addr += page_size) {
|
|
// If the page is already writable, skip it.
|
|
if (pages_made_writable.get(page_addr)) |_| continue;
|
|
// If we mapped it already we have to do mprotect, else mmap.
|
|
const gop = try allocated_pages.getOrPut(gpa, page_addr);
|
|
if (gop.found_existing) {
|
|
const ptr: [*]align(page_size) u8 = @ptrFromInt(page_addr);
|
|
try posix.mprotect(ptr[0..page_addr], protection);
|
|
} else {
|
|
const addr = posix.mmap(
|
|
@ptrFromInt(page_addr),
|
|
page_size,
|
|
protection,
|
|
.{ .TYPE = .PRIVATE, .ANONYMOUS = true, .FIXED_NOREPLACE = true },
|
|
-1,
|
|
0,
|
|
) catch |err| switch (err) {
|
|
error.MappingAlreadyExists => {
|
|
// If the mapping exists this means that the someone else
|
|
// (executable, OS, dynamic loader,...) allocated something there.
|
|
// We block this so we don't try this page again in the future,
|
|
// saving a bunch of syscalls.
|
|
try address_allocator.block(
|
|
gpa,
|
|
.{ .start = @intCast(page_addr), .end = @intCast(page_addr + page_size) },
|
|
page_size,
|
|
);
|
|
return err;
|
|
},
|
|
else => return err,
|
|
};
|
|
assert(@as(u64, @intFromPtr(addr.ptr)) == page_addr);
|
|
// `gop.value_ptr.* = {};` not needed because it's void.
|
|
}
|
|
pages_made_writable.putAssumeCapacityNoClobber(page_addr, {});
|
|
}
|
|
}
|
|
|
|
const PatchInstructionIterator = struct {
|
|
bytes: []const u8, // first byte is first byte of instruction to patch.
|
|
instruction_size: u8,
|
|
flicken_size: u64,
|
|
|
|
// Internal state
|
|
num_prefixes: u8,
|
|
pli: PatchLocationIterator,
|
|
valid_range: Range,
|
|
allocated_count: u64,
|
|
|
|
fn init(
|
|
bytes: []const u8,
|
|
instruction_size: u8,
|
|
flicken_size: u64,
|
|
) PatchInstructionIterator {
|
|
const patch_bytes = getPatchBytes(bytes, instruction_size, 0);
|
|
var pli = PatchLocationIterator.init(patch_bytes, @intFromPtr(&bytes[5]));
|
|
const valid_range = pli.next() orelse Range{ .start = 0, .end = 0 };
|
|
return .{
|
|
.bytes = bytes,
|
|
.instruction_size = instruction_size,
|
|
.flicken_size = flicken_size,
|
|
.num_prefixes = 0,
|
|
.pli = pli,
|
|
.valid_range = valid_range,
|
|
.allocated_count = 0,
|
|
};
|
|
}
|
|
|
|
pub const Strategy = union(enum) {
|
|
/// Iterates through all possible ranges.
|
|
/// Useful for finding the optimal allocation (fewest prefixes).
|
|
exhaustive: void,
|
|
/// Limits the search to `count` allocation attempts per valid constraint range found by the
|
|
/// PatchLocationIterator.
|
|
///
|
|
/// This acts as a heuristic to prevent worst-case performance (scanning every byte of a 2GB
|
|
/// gap) while still offering better density than a purely greedy approach. A count of 1 is
|
|
/// equivalent to a greedy strategy.
|
|
count: u64,
|
|
};
|
|
|
|
fn next(
|
|
pii: *PatchInstructionIterator,
|
|
strategy: Strategy,
|
|
) ?Range {
|
|
const State = enum {
|
|
allocation,
|
|
range,
|
|
prefix,
|
|
};
|
|
blk: switch (State.allocation) {
|
|
.allocation => {
|
|
if (address_allocator.findAllocation(
|
|
pii.flicken_size,
|
|
pii.valid_range,
|
|
)) |allocated_range| {
|
|
assert(allocated_range.size() == pii.flicken_size);
|
|
pii.allocated_count += 1;
|
|
// Advancing the valid range, such that the next call to `findAllocation` won't
|
|
// find the same range again.
|
|
switch (strategy) {
|
|
.exhaustive => pii.valid_range.start = allocated_range.start + 1,
|
|
.count => |c| {
|
|
if (pii.allocated_count >= c) {
|
|
pii.valid_range.start = pii.valid_range.end;
|
|
pii.allocated_count = 0;
|
|
} else {
|
|
pii.valid_range.start = allocated_range.start + 1;
|
|
}
|
|
},
|
|
}
|
|
return allocated_range;
|
|
} else {
|
|
pii.allocated_count = 0;
|
|
continue :blk .range;
|
|
}
|
|
},
|
|
.range => {
|
|
// Valid range is used up, so get a new one from the pli.
|
|
if (pii.pli.next()) |valid_range| {
|
|
pii.valid_range = valid_range;
|
|
continue :blk .allocation;
|
|
} else {
|
|
continue :blk .prefix;
|
|
}
|
|
},
|
|
.prefix => {
|
|
if (pii.num_prefixes < @min(pii.instruction_size, prefixes.len)) {
|
|
pii.num_prefixes += 1;
|
|
const patch_bytes = getPatchBytes(pii.bytes, pii.instruction_size, pii.num_prefixes);
|
|
pii.pli = PatchLocationIterator.init(
|
|
patch_bytes,
|
|
@intFromPtr(&pii.bytes[pii.num_prefixes + 5]),
|
|
);
|
|
continue :blk .range;
|
|
} else {
|
|
return null;
|
|
}
|
|
},
|
|
}
|
|
comptime unreachable;
|
|
}
|
|
|
|
fn getPatchBytes(instruction_bytes: []const u8, instruction_size: u8, num_prefixes: u8) [4]PatchByte {
|
|
const offset_location = instruction_bytes[num_prefixes + 1 ..][0..4]; // +1 for e9
|
|
var patch_bytes: [4]PatchByte = undefined;
|
|
for (&patch_bytes, offset_location, num_prefixes + 1..) |*patch_byte, offset_byte, i| {
|
|
if (i < instruction_size) {
|
|
patch_byte.* = .free;
|
|
} else {
|
|
patch_byte.* = .{ .used = offset_byte };
|
|
}
|
|
}
|
|
return patch_bytes;
|
|
}
|
|
};
|
|
|
|
/// Fixes RIP-relative operands in an instruction that has been moved to a new address.
|
|
fn relocateInstruction(
|
|
instruction: dis.BundledInstruction,
|
|
address: u64,
|
|
buffer: []u8,
|
|
) !void {
|
|
const instr = instruction.instruction;
|
|
// Iterate all operands
|
|
for (0..instr.operand_count) |i| {
|
|
const operand = &instruction.operands[i];
|
|
|
|
// Check for RIP-relative memory operand
|
|
const is_rip_rel = operand.type == zydis.ZYDIS_OPERAND_TYPE_MEMORY and
|
|
operand.unnamed_0.mem.base == zydis.ZYDIS_REGISTER_RIP;
|
|
// Check for relative immediate (e.g. JMP rel32)
|
|
const is_rel_imm = operand.type == zydis.ZYDIS_OPERAND_TYPE_IMMEDIATE and
|
|
operand.unnamed_0.imm.is_relative == zydis.ZYAN_TRUE;
|
|
if (!is_rip_rel and !is_rel_imm) continue;
|
|
|
|
// We have to apply a relocation
|
|
var result_address: u64 = 0;
|
|
const status = zydis.ZydisCalcAbsoluteAddress(
|
|
instr,
|
|
operand,
|
|
instruction.address,
|
|
&result_address,
|
|
);
|
|
assert(zydis.ZYAN_SUCCESS(status)); // TODO: maybe return an error instead
|
|
|
|
// Calculate new displacement relative to the new address
|
|
// The instruction length remains the same.
|
|
const next_rip: i64 = @intCast(address + instr.length);
|
|
const new_disp = @as(i64, @intCast(result_address)) - next_rip;
|
|
|
|
var offset: u16 = 0;
|
|
var size_bits: u8 = 0;
|
|
|
|
if (is_rip_rel) {
|
|
offset = instr.raw.disp.offset;
|
|
size_bits = instr.raw.disp.size;
|
|
} else {
|
|
assert(is_rel_imm);
|
|
// For relative immediate, find the matching raw immediate.
|
|
var found = false;
|
|
for (&instr.raw.imm) |*imm| {
|
|
if (imm.is_relative == zydis.ZYAN_TRUE) {
|
|
offset = imm.offset;
|
|
size_bits = imm.size;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
assert(found);
|
|
}
|
|
|
|
assert(offset != 0);
|
|
assert(size_bits != 0);
|
|
const size_bytes = size_bits / 8;
|
|
|
|
if (offset + size_bytes > buffer.len) {
|
|
return error.RelocationFail;
|
|
}
|
|
|
|
const fits = switch (size_bits) {
|
|
8 => new_disp >= math.minInt(i8) and new_disp <= math.maxInt(i8),
|
|
16 => new_disp >= math.minInt(i16) and new_disp <= math.maxInt(i16),
|
|
32 => new_disp >= math.minInt(i32) and new_disp <= math.maxInt(i32),
|
|
64 => true,
|
|
else => unreachable,
|
|
};
|
|
|
|
if (!fits) {
|
|
return error.RelocationOverflow;
|
|
}
|
|
|
|
const ptr = buffer[offset..];
|
|
switch (size_bits) {
|
|
8 => ptr[0] = @as(u8, @bitCast(@as(i8, @intCast(new_disp)))),
|
|
16 => mem.writeInt(u16, ptr[0..2], @bitCast(@as(i16, @intCast(new_disp))), .little),
|
|
32 => mem.writeInt(u32, ptr[0..4], @bitCast(@as(i32, @intCast(new_disp))), .little),
|
|
64 => mem.writeInt(u64, ptr[0..8], @bitCast(@as(i64, @intCast(new_disp))), .little),
|
|
else => unreachable,
|
|
}
|
|
}
|
|
}
|