From 633c313513daaf43a962c191075f407b8da96aeb Mon Sep 17 00:00:00 2001
From: Pascal Zittlau <pascal.zittlau@gmail.com>
Date: Mon, 9 Mar 2026 11:04:15 +0100
Subject: [PATCH] constraint solving

---
 build.zig                     |    2 +-
 src/AddressAllocator.zig      | 1358 +++++++++++++++++++-------
 src/PatchLocationIterator.zig |  447 ---------
 src/Patcher.zig               | 1695 ++++++++++++++++-----------------
 src/Range.zig                 |   49 +-
 src/Statistics.zig            |   46 +
 src/backend.zig               |   49 +
 src/loader.zig                |   94 ++
 src/main.zig                  |  180 ++--
 src/relocation.zig            |   98 ++
 src/syscalls.zig              |   11 +-
 11 files changed, 2228 insertions(+), 1801 deletions(-)
 delete mode 100644 src/PatchLocationIterator.zig
 create mode 100644 src/Statistics.zig
 create mode 100644 src/backend.zig
 create mode 100644 src/loader.zig
 create mode 100644 src/relocation.zig

diff --git a/build.zig b/build.zig
index 152ecdf..5331dc9 100644
--- a/build.zig
+++ b/build.zig
@@ -51,7 +51,7 @@ pub fn build(b: *std.Build) !void {
     try compileTestApplications(b, target, optimize, false, true);
     try compileTestApplications(b, target, optimize, true, true);
 
-    const exe_tests = b.addTest(.{ .root_module = mod });
+    const exe_tests = b.addTest(.{ .root_module = mod, .use_llvm = true });
     const run_exe_tests = b.addRunArtifact(exe_tests);
     const test_step = b.step("test", "Run tests");
     test_step.dependOn(b.getInstallStep());
diff --git a/src/AddressAllocator.zig b/src/AddressAllocator.zig
index e879788..326468c 100644
--- a/src/AddressAllocator.zig
+++ b/src/AddressAllocator.zig
@@ -1,4 +1,5 @@
 const std = @import("std");
+const math = std.math;
 const mem = std.mem;
 const sort = std.sort;
 const testing = std.testing;
@@ -12,431 +13,1116 @@ const AddressAllocator = @This();
 
 /// The **sorted** list of `Range`s that are blocked.
 ranges: std.ArrayListUnmanaged(Range) = .empty,
+child_allocator: mem.Allocator,
 
-pub const empty = AddressAllocator{};
+// TODO: we should likely create an init function that blocks the entire negative address space
+pub fn init(child_allocator: mem.Allocator) !AddressAllocator {
+    var aa: AddressAllocator = .{ .child_allocator = child_allocator };
 
-pub fn deinit(address_allocator: *AddressAllocator, gpa: mem.Allocator) void {
-    address_allocator.ranges.deinit(gpa);
+    const ranges = try child_allocator.alloc(Range, std.heap.pageSize() / @sizeOf(Range));
+    aa.ranges = .initBuffer(ranges);
+
+    aa.block(.fromSlice(Range, ranges)) catch unreachable;
+
+    return aa;
+}
+
+pub fn deinit(self: *AddressAllocator) void {
+    self.ranges.deinit(self.child_allocator);
+}
+
+pub fn allocator(self: *AddressAllocator) mem.Allocator {
+    return .{
+        .ptr = self,
+        .vtable = &.{
+            .alloc = alloc,
+            .resize = resize,
+            .remap = remap,
+            .free = free,
+        },
+    };
+}
+
+fn alloc(ctx: *anyopaque, n: usize, alignment: std.mem.Alignment, ra: usize) ?[*]u8 {
+    const self: *AddressAllocator = @ptrCast(@alignCast(ctx));
+
+    const ptr = self.child_allocator.rawAlloc(n, alignment, ra) orelse return null;
+    self.block(.fromPtr(ptr, n)) catch @panic("OOM");
+    return ptr;
+}
+
+fn resize(
+    ctx: *anyopaque,
+    buf: []u8,
+    alignment: std.mem.Alignment,
+    new_len: usize,
+    ret_addr: usize,
+) bool {
+    const self: *AddressAllocator = @ptrCast(@alignCast(ctx));
+
+    const success = self.child_allocator.rawResize(buf, alignment, new_len, ret_addr);
+    if (success) {
+        self.block(.fromPtr(buf.ptr, new_len)) catch @panic("OOM");
+    }
+    return success;
+}
+
+fn remap(
+    context: *anyopaque,
+    memory: []u8,
+    alignment: std.mem.Alignment,
+    new_len: usize,
+    return_address: usize,
+) ?[*]u8 {
+    const self: *AddressAllocator = @ptrCast(@alignCast(context));
+
+    const ptr = self.child_allocator.rawRemap(memory, alignment, new_len, return_address) orelse
+        return null;
+
+    if (ptr != memory.ptr) { // new memory location
+        self.unblock(.fromSlice(u8, memory)) catch @panic("OOM");
+    }
+    self.block(.fromPtr(ptr, new_len)) catch @panic("OOM");
+    return ptr;
+}
+
+fn free(
+    ctx: *anyopaque,
+    buf: []u8,
+    alignment: std.mem.Alignment,
+    ret_addr: usize,
+) void {
+    const self: *AddressAllocator = @ptrCast(@alignCast(ctx));
+
+    self.unblock(.fromSlice(u8, buf)) catch @panic("OOM");
+    return self.child_allocator.rawFree(buf, alignment, ret_addr);
 }
 
 /// Block a range to not be used by the `allocate` function. This function will always succeed, if
 /// there is enough memory available.
-pub fn block(
-    address_allocator: *AddressAllocator,
-    gpa: mem.Allocator,
-    range: Range,
-    alignment: u64,
-) !void {
-    assert(address_allocator.isSorted());
-    defer assert(address_allocator.isSorted());
-
-    const aligned_range = if (alignment != 0) range.alignTo(alignment) else range;
-    assert(aligned_range.contains(range));
-    if (aligned_range.size() == 0) return;
+pub fn block(self: *AddressAllocator, range: Range) !void {
+    if (range.size() == 0) return;
 
     // Find the correct sorted position to insert the new range.
     const insert_idx = sort.lowerBound(
         Range,
-        address_allocator.ranges.items,
-        aligned_range,
-        Range.compare,
+        self.ranges.items,
+        range,
+        Range.compareTouching,
     );
     log.debug(
-        "block: range: {f}, alignment: {}, aligned_range: {f}, insert_idx: {}",
-        .{ range, alignment, aligned_range, insert_idx },
+        "block: range: {f}, insert_idx: {}",
+        .{ range, insert_idx },
     );
-    // If the new range is the greatest one OR if the entry at `insert_idx` is greater than the
-    // new range, we can just insert.
-    if (insert_idx == address_allocator.ranges.items.len or
-        address_allocator.ranges.items[insert_idx].compare(aligned_range) == .gt)
+    // If we don't overlap any existing one, we just insert.
+    if (insert_idx == self.ranges.items.len or
+        self.ranges.items[insert_idx].compareTouching(range) == .gt)
     {
-        log.debug("block: New range inserted", .{});
-        return address_allocator.ranges.insert(gpa, insert_idx, aligned_range);
+        return self.ranges.insert(self.child_allocator, insert_idx, range);
     }
     errdefer comptime unreachable;
-    assert(address_allocator.ranges.items.len > 0);
+    assert(self.ranges.items.len > 0);
 
-    // Now `insert_idx` points to the first entry, that touches `aligned_range`.
-    assert(address_allocator.ranges.items[insert_idx].touches(aligned_range));
-    if (insert_idx > 1 and address_allocator.ranges.items.len > 1) {
-        assert(!address_allocator.ranges.items[insert_idx - 1].touches(aligned_range));
+    // Now `insert_idx` points to the first entry, that touches `range`.
+    const first = &self.ranges.items[insert_idx];
+    assert(first.touches(range));
+    if (insert_idx > 0 and self.ranges.items.len > 0) {
+        assert(!self.ranges.items[insert_idx - 1].touches(range));
     }
-    log.debug("block: `aligned_range` touches at least one existing range.", .{});
+    log.debug("block: `range` touches at least one existing range.", .{});
 
-    // NOTE: We merge entries that touch eachother to speedup future traversals.
-    // There are a few cases how to handle the merging:
-    // 1. `aligned_range` is contained by the existing range. Then we have to do nothing and can
-    //    return early.
-    // 2. `aligned_range` contains the existing range. Then we have to overwrite `start` and `end`.
-    // 3. The existing range is before `aligned_range`. Set `existing.end` to `aligned_range.end`.
-    // 4. The existing range is after `aligned_range`. Set `existing.start` to `aligned.start`.
-    // After we have done this to the first range that touches, we will loop over the other ones
-    // that touch and just have to apply rule 4 repeatedly.
-    const first = &address_allocator.ranges.items[insert_idx];
-    if (first.contains(aligned_range)) {
-        log.debug("block: Existing range at index {} contains new range. No-op", .{insert_idx});
+    first.start = @min(first.start, range.start);
+    first.end = @max(first.end, range.end);
+
+    // Merge any following overlapping ranges into this one.
+    // NOTE: We "iterate" through the slice by removing unneeded items and moving all following ones
+    // back by one. That's why we always look at `insert_idx + 1`.
+    while (insert_idx + 1 < self.ranges.items.len and
+        self.ranges.items[insert_idx + 1].touches(range))
+    {
+        const neighbor = self.ranges.items[insert_idx + 1];
+        assert(range.end >= neighbor.start);
+        assert(range.start <= neighbor.start);
+        first.end = @max(first.end, neighbor.end);
+        _ = self.ranges.orderedRemove(insert_idx + 1);
+    }
+}
+
+pub fn unblock(
+    self: *AddressAllocator,
+    range: Range,
+) !void {
+
+    // Find the correct sorted position to remove the range.
+    var remove_idx = sort.lowerBound(
+        Range,
+        self.ranges.items,
+        range,
+        Range.compareOverlapping,
+    );
+    log.debug(
+        "unblock: range: {f}, remove_idx: {}",
+        .{ range, remove_idx },
+    );
+    // If we don't overlap any existing one, we just return.
+    if (remove_idx == self.ranges.items.len or
+        self.ranges.items[remove_idx].compareOverlapping(range) == .gt)
+    {
+        log.debug("unblock: Range to unblock overlaps nothing", .{});
+        for (self.ranges.items) |r| {
+            assert(!r.overlaps(range));
+        }
         return;
-    } else if (aligned_range.contains(first.*)) {
-        log.debug(
-            "block: New range contains existing range at index {}: {f} -> {f}",
-            .{ insert_idx, first, aligned_range },
-        );
-        first.* = aligned_range;
-    } else if (aligned_range.start <= first.end and aligned_range.end >= first.end) {
-        assert(aligned_range.start > first.start);
-        log.debug(
-            "block: Adjusting range end at index {}: {} -> {}",
-            .{ insert_idx, first.end, aligned_range.end },
-        );
-        first.*.end = aligned_range.end;
-    } else if (aligned_range.end >= first.start and aligned_range.start <= first.start) {
-        assert(aligned_range.end < first.end);
-        log.debug(
-            "block: Adjusting range start at index {}: {} -> {}",
-            .{ insert_idx, first.start, aligned_range.start },
-        );
-        first.*.start = aligned_range.start;
+    }
+    assert(self.ranges.items.len > 0);
+
+    // Now `remove_idx` points to the first entry, that touches `range`.
+    const first = &self.ranges.items[remove_idx];
+    assert(first.touches(range));
+    if (remove_idx > 0 and self.ranges.items.len > 0) {
+        assert(!self.ranges.items[remove_idx - 1].overlaps(range));
+    }
+    log.debug("unblock: `range` touches at least one existing range.", .{});
+
+    // We have multiple cases for the first touching range:
+    //
+    //          [ range to unblock ]
+    // 0   [           first            ]   -> split
+    //
+    //          [ range to unblock ]
+    // 1              [     first       ]
+    // 1        [         first         ]   -> change start
+    //
+    //          [ range to unblock ]
+    // 2        [ first ]
+    // 2             [ first ]
+    // 2                   [ first ]        -> remove
+    //
+    //          [ range to unblock ]
+    // 3   [     first       ]
+    // 3   [         first         ]        -> change end
+    //
+    // If it's cases 0 or 1 the operation is finished because we can't overlap another one. For cases 2
+    // and 3 we will have to remove the following ranges until we arrive at one of the following cases:
+    // 1.
+    //          [ range to unblock ]
+    //                         [ last ]
+    // 2.
+    //          [ range to unblock ]
+    //                               [ last ]
+    //
+    if (first.start < range.start and first.end > range.end) {
+        const old_end = first.end;
+        first.end = range.start;
+        try self.ranges.insert(self.child_allocator, remove_idx + 1, .{
+            .start = range.end,
+            .end = old_end,
+        });
+        return;
+    } else if (first.start >= range.start and first.start < range.end and first.end > range.end) {
+        first.start = range.end;
+        return;
+    } else if (first.start >= range.start and first.end <= range.end) {
+        _ = self.ranges.orderedRemove(remove_idx);
+    } else if (first.start < range.start and first.end > range.start and first.end <= range.end) {
+        first.end = range.start;
+        remove_idx += 1;
     } else {
         unreachable;
     }
 
-    // TODO: comment why we do this
-    if (insert_idx >= address_allocator.ranges.items.len - 1) return;
+    // NOTE: We "iterate" through the slice by removing unneeded items and moving all following ones
+    // back by one. That's why we always look at `insert_idx + 1`.
+    while (remove_idx < self.ranges.items.len) {
+        const next_range = &self.ranges.items[remove_idx];
+        if (next_range.start >= range.end) break;
 
-    var neighbor = &address_allocator.ranges.items[insert_idx + 1];
-    var i: u64 = 0;
-    while (neighbor.touches(aligned_range)) {
-        assert(aligned_range.end >= neighbor.start);
-        assert(aligned_range.start <= neighbor.start);
-
-        if (neighbor.end > first.end) {
-            log.debug(
-                "block: Merging neighbor range at index {}: {} -> {}.",
-                .{ insert_idx + 1, first.end, neighbor.end },
-            );
-            first.end = neighbor.end;
-        }
-        const removed = address_allocator.ranges.orderedRemove(insert_idx + 1);
-        log.debug("block: Removed merged range: {f}", .{removed});
-        i += 1;
-    }
-    log.debug("block: Removed {} ranges.", .{i});
-}
-
-/// Allocate and block a `Range` of size `size` which will lie inside the given `valid_range`. If no
-/// allocation of the given size is possible, return `null`.
-pub fn allocate(
-    address_allocator: *AddressAllocator,
-    gpa: mem.Allocator,
-    size: u64,
-    valid_range: Range,
-) !?Range {
-    const range = address_allocator.findAllocation(size, valid_range) orelse return null;
-    try address_allocator.block(gpa, range, 0);
-    return range;
-}
-
-/// Find a free `Range` of size `size` within `valid_range` without blocking it.
-pub fn findAllocation(
-    address_allocator: *AddressAllocator,
-    size: u64,
-    valid_range: Range,
-) ?Range {
-    log.debug("findAllocation: Allocating size {} in range {f}", .{ size, valid_range });
-    if (valid_range.size() < size) return null;
-    if (size == 0) return null;
-    const size_i: i64 = @intCast(size);
-
-    const start_idx = sort.lowerBound(
-        Range,
-        address_allocator.ranges.items,
-        valid_range,
-        Range.compare,
-    );
-    // `candidate_start` tracks the beginning of the current free region being examined.
-    var candidate_start = valid_range.start;
-    // If the range before the start index overlaps with our search start, we have to adjust.
-    if (start_idx > 0) {
-        const prev = address_allocator.ranges.items[start_idx - 1];
-        if (prev.end > candidate_start) {
-            candidate_start = prev.end;
-        }
-    }
-    for (address_allocator.ranges.items[start_idx..]) |reserved| {
-        if (candidate_start >= valid_range.end) {
-            log.debug("findAllocation: Searched past the valid range.", .{});
+        if (next_range.end <= range.end) {
+            _ = self.ranges.orderedRemove(remove_idx);
+        } else {
+            next_range.start = range.end;
             break;
         }
+    }
+}
 
-        // The potential allocation gap is before the current reserved block.
-        if (candidate_start < reserved.start) {
-            // Determine the actual available portion of the gap within our search `range`.
-            const gap_end = @min(reserved.start, valid_range.end);
-            if (gap_end >= candidate_start + size_i) {
-                const new_range = Range{
-                    .start = candidate_start,
-                    .end = candidate_start + size_i,
-                };
-                log.debug("findAllocation: Found free gap: {f}", .{new_range});
-                return new_range;
+test "fuzz against bitset" {
+    const iterations = 64 * 1024;
+    const size = 1024;
+
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
+
+    var bitset_ref = try std.bit_set.DynamicBitSetUnmanaged.initEmpty(testing.allocator, size);
+    defer bitset_ref.deinit(testing.allocator);
+
+    var prng = std.Random.DefaultPrng.init(testing.random_seed);
+    const random = prng.random();
+
+    var expected_ranges = try std.ArrayListUnmanaged(Range).initCapacity(testing.allocator, size / 2);
+    defer expected_ranges.deinit(testing.allocator);
+
+    var bitset_temp = try std.bit_set.DynamicBitSetUnmanaged.initEmpty(testing.allocator, size);
+    defer bitset_temp.deinit(testing.allocator);
+
+    for (0..iterations) |_| {
+        const is_block = random.boolean();
+        const start = random.intRangeLessThan(usize, 0, size);
+        const len = random.intRangeAtMost(usize, 1, size - start);
+        const end = start + len;
+
+        const range = Range{ .start = @intCast(start), .end = @intCast(end) };
+
+        if (is_block) {
+            try aa.block(range);
+            bitset_ref.setRangeValue(.{ .start = start, .end = end }, true);
+        } else {
+            try aa.unblock(range);
+            bitset_ref.setRangeValue(.{ .start = start, .end = end }, false);
+        }
+
+        bitset_temp.unsetAll();
+        for (aa.ranges.items) |r| {
+            bitset_temp.setRangeValue(.{ .start = @intCast(r.start), .end = @intCast(r.end) }, true);
+        }
+        try testing.expect(bitset_ref.eql(bitset_temp));
+    }
+}
+
+/// An internal iterator that cleanly yields unblocked memory holes.
+const HoleIterator = struct {
+    ranges: []const Range,
+    valid_range: Range,
+    size: i64,
+    candidate_start: i64,
+    idx: usize,
+
+    fn init(aa: *const AddressAllocator, valid_range: Range, size: u64) HoleIterator {
+        const start_idx = sort.lowerBound(
+            Range,
+            aa.ranges.items,
+            valid_range,
+            Range.compareOverlapping,
+        );
+        return .{
+            .ranges = aa.ranges.items,
+            .valid_range = valid_range,
+            .size = @intCast(size),
+            .candidate_start = valid_range.start,
+            .idx = start_idx,
+        };
+    }
+
+    fn next(self: *HoleIterator) ?Range {
+        while (self.idx < self.ranges.len) {
+            const reserved = self.ranges[self.idx];
+            if (self.candidate_start >= self.valid_range.end) return null;
+
+            if (self.candidate_start < reserved.start) {
+                const hole_end = @min(reserved.start, self.valid_range.end);
+                const hole_start = self.candidate_start;
+                self.candidate_start = reserved.end;
+
+                if (hole_end >= hole_start + self.size) {
+                    return Range{ .start = hole_start, .end = hole_end };
+                }
+            } else {
+                self.candidate_start = @max(self.candidate_start, reserved.end);
+            }
+            self.idx += 1;
+        }
+
+        if (self.candidate_start < self.valid_range.end) {
+            const hole_start = self.candidate_start;
+            const hole_end = self.valid_range.end;
+            self.candidate_start = self.valid_range.end; // Mark done to prevent infinite loops
+            if (hole_end >= hole_start + self.size) {
+                return Range{ .start = hole_start, .end = hole_end };
             }
         }
 
-        // The gap was not large enough. Move the candidate start past the current reserved block
-        // for the next iteration.
-        candidate_start = @max(candidate_start, reserved.end);
+        return null;
     }
 
-    // Check the remaining space at the end of the search range.
-    if (valid_range.end >= candidate_start + size_i) {
-        const new_range = Range{
-            .start = candidate_start,
-            .end = candidate_start + size_i,
-        };
-        log.debug("findAllocation: Found free gap at end: {f}", .{new_range});
-        return new_range;
+    test {
+        var aa = AddressAllocator{ .child_allocator = testing.allocator };
+        defer aa.deinit();
+
+        try aa.block(.{ .start = 100, .end = 200 });
+        try aa.block(.{ .start = 300, .end = 400 });
+
+        var it = HoleIterator.init(&aa, .{ .start = 0, .end = 500 }, 10);
+
+        try testing.expectEqual(Range{ .start = 0, .end = 100 }, it.next().?);
+        try testing.expectEqual(Range{ .start = 200, .end = 300 }, it.next().?);
+        try testing.expectEqual(Range{ .start = 400, .end = 500 }, it.next().?);
+        try testing.expectEqual(null, it.next());
+    }
+};
+
+const Constraint = struct {
+    min_rel: i32,
+    max_rel: i32,
+    mask: u32,
+    pattern: u32,
+};
+
+/// Solves a single 32-bit relative jump constraint in O(1) time.
+///
+/// Returns the smallest `rel32` such that
+/// - `min_rel <= rel32 <= max_rel` and
+/// - `(rel32 & mask) == pattern`
+///
+/// Context:
+/// During "Instruction Punning", we overwrite an instruction with a 5-byte jump (`E9 xx xx xx xx`).
+/// If the original instruction is smaller than 5 bytes, our jump offset (`xx xx xx xx`) will spill
+/// into the next instruction. To prevent crashing, the spilled bytes must form the successor
+/// instruction. This restricts certain bits/bytes of our `rel32` offset to fixed values.
+///
+/// The algorithm uses a bit-twiddling hack to isolate the "free" (unmasked) bits, increment them as
+/// a single continuous integer, and map them back around the fixed "pattern" bits, completely
+/// avoiding loops over the search space.
+///
+/// Visualization of the bit-twiddling constraint logic:
+/// -------------------------------------------------------------------------
+/// Mask:    1111 1111 0000 0000 1111 1111 0000 0000 (1 = Locked bits)
+/// Pattern: 0000 0000 0000 0000 1110 1001 0000 0000 (The forced values)
+/// Free:    0000 0000 1111 1111 0000 0000 1111 1111 (~Mask)
+///
+/// Current Candidate: [ Fixed A ] [ Free 1 ] [ Fixed B ] [ Free 0 ]
+///
+/// If `Current Candidate < min_rel`, we add 1 to the "Free" bits.
+/// The hack `(((candidate & free) | mask) + 1) & free` allows the arithmetic carry to jump over the
+/// fixed bits without corrupting them:
+///
+/// Next Valid Val:    [ Fixed A ][ Free 1 + carry ] [ Fixed B ] [ Free 0 + 1 ]
+/// -------------------------------------------------------------------------
+fn solveRelativeConstraint(c: Constraint) ?i32 {
+    log.debug(
+        "solveRelative: min: {x}, max: {x}, mask: {x}, pattern: {x}",
+        .{ c.min_rel, c.max_rel, c.mask, c.pattern },
+    );
+    assert((c.pattern & ~c.mask) == 0);
+    if (c.min_rel > c.max_rel) return null;
+
+    // Force the pattern onto the current minimum value
+    var candidate: u32 = (@as(u32, @bitCast(c.min_rel)) & ~c.mask) | c.pattern;
+    log.debug("  candidate (init): {x}", .{candidate});
+
+    // If forcing the pattern made the value smaller than min_rel, we must increment the "free" bits
+    // to find the next valid higher number.
+    if (@as(i32, @bitCast(candidate)) < c.min_rel) {
+        if (~c.mask == 0) {
+            log.debug("  failed: fully constrained", .{});
+            return null;
+        }
+
+        const incremented_free = (((candidate & ~c.mask) | c.mask) +% 1) & ~c.mask;
+        assert(incremented_free & c.mask == 0); // All constrained bits are 0
+        candidate = incremented_free | c.pattern;
+        log.debug("  candidate (incr): {x}", .{candidate});
     }
 
-    log.debug("findAllocation: No suitable gap found.", .{});
+    const result: i32 = @bitCast(candidate);
+    if (result >= c.min_rel and result <= c.max_rel) {
+        log.debug("  success: {x}", .{result});
+        return result;
+    }
+    log.debug("  failed: result {x} out of bounds", .{result});
     return null;
 }
 
-fn isSorted(address_allocator: *const AddressAllocator) bool {
-    return sort.isSorted(Range, address_allocator.ranges.items, {}, isSortedInner);
+test "solveRelativeConstraint basic" {
+    try testing.expectEqual(100, solveRelativeConstraint(.{
+        .min_rel = 100,
+        .max_rel = 200,
+        .mask = 0,
+        .pattern = 0,
+    }));
 }
-fn isSortedInner(_: void, lhs: Range, rhs: Range) bool {
-    return switch (lhs.compare(rhs)) {
-        .lt => true,
-        .gt => false,
-        .eq => unreachable,
+
+test "solveRelativeConstraint aligned" {
+    try testing.expectEqual(0x10E8, solveRelativeConstraint(.{
+        .min_rel = 0x1000,
+        .max_rel = 0x2000,
+        .mask = 0xFF,
+        .pattern = 0xE8,
+    }));
+    try testing.expectEqual(0x10E8, solveRelativeConstraint(.{
+        .min_rel = 0x10E8,
+        .max_rel = 0x2000,
+        .mask = 0xFF,
+        .pattern = 0xE8,
+    }));
+    try testing.expectEqual(0x11E8, solveRelativeConstraint(.{
+        .min_rel = 0x10E9,
+        .max_rel = 0x2000,
+        .mask = 0xFF,
+        .pattern = 0xE8,
+    }));
+}
+
+test "solveRelativeConstraint negative" {
+    try testing.expectEqual(@as(i32, @bitCast(@as(u32, 0xFFFFF0E8))), solveRelativeConstraint(.{
+        .min_rel = -0x1000,
+        .max_rel = 0,
+        .mask = 0xFF,
+        .pattern = 0xE8,
+    }));
+}
+
+test "solveRelativeConstraint impossible" {
+    try testing.expectEqual(null, solveRelativeConstraint(.{
+        .min_rel = 0x1000,
+        .max_rel = 0x10E7,
+        .mask = 0xFF,
+        .pattern = 0xE8,
+    }));
+    try testing.expectEqual(null, solveRelativeConstraint(.{
+        .min_rel = 0x10000000,
+        .max_rel = 0x11000000,
+        .mask = 0xFFFFFFFF,
+        .pattern = 0x12345678,
+    }));
+}
+
+test "solveRelativeConstraint overflow" {
+    try testing.expectEqual(0x12345678, solveRelativeConstraint(.{
+        .min_rel = 0x10000000,
+        .max_rel = 0x20000000,
+        .mask = 0xFFFFFFFF,
+        .pattern = 0x12345678,
+    }));
+
+    try testing.expectEqual(null, solveRelativeConstraint(.{
+        .min_rel = 2147483640,
+        .max_rel = 2147483647,
+        .mask = 0xFF,
+        .pattern = 0x00,
+    }));
+}
+
+pub const Request = struct {
+    source: u64,
+    size: u64,
+    valid_range: Range,
+    mask: u32 = 0,
+    pattern: u32 = 0,
+};
+
+/// Finds the first free range of `size` bytes within `valid_range` that also satisfies the relative
+/// 32-bit jump constraints `mask` and `pattern` from `jump_source`.
+/// Runs in `O(|H| + log(#R))` for
+/// - `H` being the set of holes in the valid range and
+/// - `#R` being the number of ranges in the AddressAllocator.
+pub fn findAllocation(
+    self: *AddressAllocator,
+    r: Request,
+) ?Range {
+    if (r.valid_range.size() < r.size) return null;
+    if (r.size == 0) return null;
+
+    var it = HoleIterator.init(self, r.valid_range, r.size);
+    while (it.next()) |hole| {
+        log.debug("findAllocation: Hole: {f}", .{hole});
+        const bounds = getRelativeBounds(hole, @intCast(r.size), r.source) orelse continue;
+        const rel32 = solveRelativeConstraint(.{
+            .min_rel = bounds.min,
+            .max_rel = bounds.max,
+            .mask = r.mask,
+            .pattern = r.pattern,
+        }) orelse continue;
+
+        const start = @as(i64, @intCast(r.source)) + rel32;
+        const end = start + @as(i64, @intCast(r.size));
+
+        assert(end - start == r.size);
+        assert(start >= r.valid_range.start);
+        assert(end <= r.valid_range.end);
+        return .{ .start = start, .end = end };
+    }
+
+    return null;
+}
+
+fn getRelativeBounds(hole: Range, size: i64, source: u64) ?struct { min: i32, max: i32 } {
+    if (hole.end - hole.start < size) return null;
+
+    const offset_to_min = hole.start - @as(i64, @intCast(source));
+    const offset_to_max = (hole.end - size) - @as(i64, @intCast(source));
+
+    const min_rel = @max(offset_to_min, math.minInt(i32));
+    const max_rel = @min(offset_to_max, math.maxInt(i32));
+    if (min_rel > max_rel) return null;
+
+    return .{
+        .min = @intCast(min_rel),
+        .max = @intCast(max_rel),
     };
 }
 
-test "block basic" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "findConstrainedAllocation" {
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, aa.ranges.items[0]);
+    try aa.block(.{ .start = 0x1000, .end = 0x2000 });
+    try aa.block(.{ .start = 0x3000, .end = 0x4000 });
 
-    try aa.block(testing.allocator, .{ .start = 200, .end = 300 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, aa.ranges.items[0]);
-    try testing.expectEqual(Range{ .start = 200, .end = 300 }, aa.ranges.items[1]);
-    try testing.expectEqual(2, aa.ranges.items.len);
+    try testing.expectEqual(
+        Range{ .start = 0x00AA, .end = 0x00BA },
+        aa.findAllocation(.{
+            .size = 0x10,
+            .valid_range = .{ .start = 0x0000, .end = 0x4000 },
+            .source = 0,
+            .mask = 0xFF,
+            .pattern = 0xAA,
+        }),
+    );
+
+    try testing.expectEqual(
+        Range{ .start = 0x20AA, .end = 0x20BA },
+        aa.findAllocation(.{
+            .size = 0x10,
+            .valid_range = .{ .start = 0x1000, .end = 0x4000 },
+            .source = 0,
+            .mask = 0xFF,
+            .pattern = 0xAA,
+        }),
+    );
+
+    try testing.expectEqual(
+        null,
+        aa.findAllocation(.{
+            .size = 0x10,
+            .valid_range = .{ .start = 0x2000, .end = 0x8000 },
+            .source = 0,
+            .mask = 0xFFFF,
+            .pattern = 0xAAAA,
+        }),
+    );
+
+    try testing.expectEqual(
+        Range{ .start = 0x40AA, .end = 0x50AA },
+        aa.findAllocation(.{
+            .size = 0x1000,
+            .valid_range = .{ .start = 0x2000, .end = 0x8000 },
+            .source = 0,
+            .mask = 0xFF,
+            .pattern = 0xAA,
+        }),
+    );
 }
 
-test "block in hole" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+pub const CoupledResult = struct {
+    rel1: i32,
+    rel2: i32,
+};
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, aa.ranges.items[0]);
+/// Attempts to find a joint bit-pattern that satisfies two overlapping jump constraints.
+///
+/// Context:
+/// In tactics like Successor Eviction, we overwrite two adjacent instructions with 5-byte jumps (J1
+/// and J2). If the distance between them is less than 5 bytes, their physical bytes overlap in
+/// memory.
+///
+/// `k` represents the physical distance (in bytes) between the start of J1 and J2 (1 <= k <= 4).
+/// Because x86_64 uses Little-Endian representation, the Most Significant Bytes (MSB) of J1's
+/// relative offset (`rel1`) physically overlap with the Least Significant Bytes (LSB) of J2's
+/// relative offset (`rel2`).
+///
+/// Furthermore, J2's opcode (`0xE9`) falls squarely inside the bytes of `rel1`.
+///
+/// Memory Layout & Endianness Overlap (Example where K = 2):
+/// -----------------------------------------------------------------------------------
+/// Memory Offset:   +0       +1       +2       +3       +4       +5       +6
+/// J1 Bytes:       [0xE9]   [ X0 ]   [ X1 ]   [ X2 ]   [ X3 ]
+/// J2 Bytes:                         [0xE9]   [ Y0 ]   [ Y1 ]   [ Y2 ]   [ Y3 ]
+///
+/// Consequences for `rel1` (X) and `rel2` (Y):
+/// 1. Opcode Constraint:  `X1` MUST exactly equal `0xE9`.
+/// 2. Shared Bytes (S):   `X2` MUST exactly equal `Y0`.
+///                        `X3` MUST exactly equal `Y1`.
+/// -----------------------------------------------------------------------------------
+///
+/// Algorithm ("The Squeeze"):
+/// Iterating possibly billions of combinations of X and Y is too slow. Instead, we use the
+/// constraints of the memory layout:
+///
+/// `rel1` is constrained to a physical memory hole `[min1, max1]`. Because memory holes are usually
+/// small (e.g., 4KB), the Most Significant Bytes of `rel1` (which are exactly our Shared Bytes 'S')
+/// are heavily restricted.
+///
+/// There are usually only a few possible values for S:
+/// 1. We extract the possible values for S from `min1..max1`.
+/// 2. We apply S as a strict constraint on the lower bytes of `rel2`.
+/// 3. We delegate the remaining independent bits (X0, Y2 and Y3) to the `solveRelativeConstraint`.
+///
+/// Parameters:
+/// `k`: The physical byte offset of J2 relative to J1 (1 <= k <= 4).
+/// `min1`, `max1`: The valid rel32 hardware bounds for J1.
+/// `min2`, `max2`: The valid rel32 hardware bounds for J2.
+/// `mask1`, `pattern1`: The original byte constraints on J1.
+/// `mask2`, `pattern2`: The original byte constraints on J2.
+pub fn solveCoupledConstraint(
+    k: u8,
+    c1: Constraint,
+    c2: Constraint,
+) ?CoupledResult {
+    log.debug("solveCoupled: k={}", .{k});
+    log.debug("  C1: min={x} max={x} mask={x} pat={x}", .{ c1.min_rel, c1.max_rel, c1.mask, c1.pattern });
+    log.debug("  C2: min={x} max={x} mask={x} pat={x}", .{ c2.min_rel, c2.max_rel, c2.mask, c2.pattern });
+    assert(k >= 1);
+    assert(k <= 4);
 
-    try aa.block(testing.allocator, .{ .start = 400, .end = 500 }, 0);
-    try testing.expectEqual(2, aa.ranges.items.len);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, aa.ranges.items[0]);
-    try testing.expectEqual(Range{ .start = 400, .end = 500 }, aa.ranges.items[1]);
+    // The opcode for J2 (0xE9) physically falls inside rel32 of J1 at byte index `k - 1` of rel1.
+    const e9_shift = @as(u5, @intCast(k - 1)) * 8;
+    const e9_mask = @as(u32, 0xFF) << e9_shift;
 
-    try aa.block(testing.allocator, .{ .start = 200, .end = 300 }, 0);
-    try testing.expectEqual(3, aa.ranges.items.len);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, aa.ranges.items[0]);
-    try testing.expectEqual(Range{ .start = 200, .end = 300 }, aa.ranges.items[1]);
-    try testing.expectEqual(Range{ .start = 400, .end = 500 }, aa.ranges.items[2]);
+    if ((c1.mask & e9_mask) != 0 and (c1.pattern & e9_mask) != (@as(u32, 0xE9) << e9_shift)) {
+        log.debug("  failed: opcode 0xE9 conflict in C1", .{});
+        return null; // Caller's pattern conflicts with the mandatory J2 opcode
+    }
+    const c_mask1 = c1.mask | e9_mask;
+    const c_pattern1 = (c1.pattern & ~e9_mask) | (@as(u32, 0xE9) << e9_shift);
+
+    if (k == 4) {
+        // J1 is completely resolved just with the 0xE9 constraint applied above.
+        log.debug("  Fast path K=4", .{});
+        const rel1 = solveRelativeConstraint(.{
+            .min_rel = c1.min_rel,
+            .max_rel = c1.max_rel,
+            .mask = c_mask1,
+            .pattern = c_pattern1,
+        }) orelse return null;
+        const rel2 = solveRelativeConstraint(.{
+            .min_rel = c2.min_rel,
+            .max_rel = c2.max_rel,
+            .mask = c2.mask,
+            .pattern = c2.pattern,
+        }) orelse return null;
+        return .{ .rel1 = rel1, .rel2 = rel2 };
+    }
+
+    // Determine the bitwise shift and mask for the Shared Bytes (S)
+    const s_shift = @as(u5, @intCast(k)) * 8;
+    const num_shared = @as(u5, @intCast(4 - k));
+    const s_mask = (@as(u32, 1) << (num_shared * 8)) - 1;
+
+    log.debug("  Shared Bytes: shift={}, mask={x}", .{ s_shift, s_mask });
+
+    var current_min = c1.min_rel;
+    while (current_min <= c1.max_rel) {
+        const u_rel: u32 = @bitCast(current_min);
+        const S = u_rel >> s_shift; // Extract shared bytes from top of rel1
+
+        // Calculate the maximum u32 value that shares this S
+        const max_u_rel_for_S = (S << s_shift) | ((@as(u32, 1) << s_shift) - 1);
+        const max_i_rel_for_S: i32 = @bitCast(max_u_rel_for_S);
+        const local_max1 = @min(c1.max_rel, max_i_rel_for_S);
+
+        // Does this S conflict with J2's requirements?
+        if ((c2.mask & s_mask) != 0) {
+            if ((c2.pattern & c2.mask & s_mask) != (S & c2.mask & s_mask)) {
+                // Advance to the next block of S.
+                log.debug("  Conflict at S={x} (min={x})", .{ S, current_min });
+                if (max_i_rel_for_S == std.math.maxInt(i32)) break;
+                const next_min = max_i_rel_for_S + 1;
+                if (next_min > c1.max_rel) break;
+                current_min = next_min;
+                continue;
+            }
+        }
+
+        log.debug("  Trying S={x} range [{x}, {x}]", .{ S, current_min, local_max1 });
+
+        // Apply S as a strict constraint on the lowest bytes of J2
+        const c_mask2 = c2.mask | s_mask;
+        const c_pattern2 = (c2.pattern & ~s_mask) | S;
+
+        // O(1) solver execution for this specific S value
+        const opt_rel1 = solveRelativeConstraint(.{
+            .min_rel = current_min,
+            .max_rel = local_max1,
+            .mask = c_mask1,
+            .pattern = c_pattern1,
+        });
+        const opt_rel2 = solveRelativeConstraint(.{
+            .min_rel = c2.min_rel,
+            .max_rel = c2.max_rel,
+            .mask = c_mask2,
+            .pattern = c_pattern2,
+        });
+        if (opt_rel1 != null and opt_rel2 != null) {
+            log.debug("  Success: rel1={x} rel2={x}", .{ opt_rel1.?, opt_rel2.? });
+            return .{ .rel1 = opt_rel1.?, .rel2 = opt_rel2.? };
+        }
+
+        if (max_i_rel_for_S == std.math.maxInt(i32)) break;
+        const next_min = max_i_rel_for_S + 1;
+        if (next_min > c1.max_rel) break;
+        current_min = next_min;
+    }
+
+    log.debug("  failed: no coupled solution found", .{});
+    return null;
 }
 
-test "block touch with previous" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "solveCoupledConstraint K=4 (Independent)" {
+    // If K=4, J1 and J2 don't share rel32 bytes, but byte 3 of rel1 MUST be 0xE9 (the J2 opcode).
+    // Let's force rel1 to be in[0x12000000, 0x120000FF].
+    // Since highest byte (byte 3) must be 0xE9, no value starting with 0x12 will work.
+    try testing.expectEqual(null, solveCoupledConstraint(
+        4,
+        .{
+            .min_rel = 0x12000000,
+            .max_rel = 0x120000FF,
+            .mask = 0,
+            .pattern = 0,
+        },
+        .{
+            .min_rel = 0,
+            .max_rel = 100,
+            .mask = 0,
+            .pattern = 0,
+        },
+    ));
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try aa.block(testing.allocator, .{ .start = 100, .end = 200 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 200 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
-
-    try aa.block(testing.allocator, .{ .start = 100, .end = 300 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 300 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
-
-    try aa.block(testing.allocator, .{ .start = 300, .end = 400 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 400 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
+    const res = solveCoupledConstraint(
+        4,
+        .{
+            .min_rel = @bitCast(@as(u32, 0xE8000000)),
+            .max_rel = @bitCast(@as(u32, 0xEA000000)),
+            .mask = 0,
+            .pattern = 0,
+        },
+        .{
+            .min_rel = 0x1234,
+            .max_rel = 0x1234,
+            .mask = 0,
+            .pattern = 0,
+        },
+    );
+    try testing.expect(res != null);
+    try testing.expectEqual(@as(i32, @bitCast(@as(u32, 0xE9000000))), res.?.rel1);
+    try testing.expectEqual(0x1234, res.?.rel2);
 }
 
-test "block touch with following" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
-
-    try aa.block(testing.allocator, .{ .start = 200, .end = 300 }, 0);
-    try aa.block(testing.allocator, .{ .start = 100, .end = 200 }, 0);
-    try testing.expectEqual(Range{ .start = 100, .end = 300 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
-
-    try aa.block(testing.allocator, .{ .start = 0, .end = 200 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 300 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
-
-    try aa.block(testing.allocator, .{ .start = -100, .end = 0 }, 0);
-    try testing.expectEqual(Range{ .start = -100, .end = 300 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
+test "solveCoupledConstraint K=2 (2 byte overlap)" {
+    // K=2 means the top 2 bytes of rel1 are the bottom 2 bytes of rel2.
+    // J2 opcode (0xE9) sits at byte 1 of rel1.
+    const res = solveCoupledConstraint(
+        2,
+        .{
+            .min_rel = 0x12340000,
+            .max_rel = 0x1234FFFF,
+            .mask = 0,
+            .pattern = 0,
+        },
+        .{
+            .min_rel = 0x00000000,
+            .max_rel = 0x0000FFFF,
+            .mask = 0,
+            .pattern = 0,
+        },
+    );
+    try testing.expect(res != null);
+    try testing.expectEqual(0x1234E900, res.?.rel1);
+    try testing.expectEqual(0x00001234, res.?.rel2);
 }
 
-test "block overlap with previous and following" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
-
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try aa.block(testing.allocator, .{ .start = 200, .end = 300 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, aa.ranges.items[0]);
-    try testing.expectEqual(Range{ .start = 200, .end = 300 }, aa.ranges.items[1]);
-    try testing.expectEqual(2, aa.ranges.items.len);
-
-    try aa.block(testing.allocator, .{ .start = 50, .end = 250 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 300 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
+test "solveCoupledConstraint K=2 conflict" {
+    // Same as above, but J2 explicitly forbids lower bytes from being 0x1234.
+    const res = solveCoupledConstraint(
+        2,
+        .{
+            .min_rel = 0x12340000,
+            .max_rel = 0x1234FFFF,
+            .mask = 0,
+            .pattern = 0,
+        },
+        .{
+            .min_rel = 0x00000000,
+            .max_rel = 0x0000FFFF,
+            .mask = 0x0000FFFF,
+            .pattern = 0x00005678,
+        },
+    );
+    try testing.expectEqual(null, res);
 }
 
-test "block contained by existing" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
-
-    try aa.block(testing.allocator, .{ .start = 100, .end = 300 }, 0);
-    try aa.block(testing.allocator, .{ .start = 200, .end = 250 }, 0);
-    try testing.expectEqual(Range{ .start = 100, .end = 300 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
+test "solveCoupledConstraint K=2 spans multiple S values" {
+    // We give J1 a wide range:[0x00000000, 0x00060000]. S can be 0 to 6.
+    // We force J2 to require lower bytes = 0x0004. This forces the solver to skip S=0 and similar
+    // and find S=4.
+    const res = solveCoupledConstraint(
+        2,
+        .{
+            .min_rel = 0,
+            .max_rel = 0x00060000,
+            .mask = 0,
+            .pattern = 0,
+        },
+        .{
+            .min_rel = 0,
+            .max_rel = 0x0000FFFF,
+            .mask = 0x0000FFFF,
+            .pattern = 0x00000004,
+        },
+    );
+    try testing.expect(res != null);
+    try testing.expectEqual(0x0004E900, res.?.rel1);
+    try testing.expectEqual(0x00000004, res.?.rel2);
 }
 
-test "block contains existing" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+/// Finds two allocations that simultaneously satisfy their individual offset constraints and the
+/// physical overlap constraints of their origin instructions.
+/// `r1` (for J1) and `r2` (for J2) separated by `k` bytes.
+///
+/// Runs in O(|H1| * |H2| + log(#R)) for
+/// - `H1` and `H2` being the set of holes in the valid ranges in `r1` and `r2`
+/// - `#R` being the number of ranges in the AddressAllocator.
+pub fn findCoupledAllocation(
+    self: *AddressAllocator,
+    k: u8,
+    r1: Request,
+    r2: Request,
+) ?[2]Range {
+    if (r1.valid_range.size() < r1.size or r1.size == 0) return null;
+    if (r2.valid_range.size() < r2.size or r2.size == 0) return null;
+    assert(r2.source > r1.source);
+    assert(r2.source - r1.source == k);
 
-    try aa.block(testing.allocator, .{ .start = 50, .end = 100 }, 0);
-    try aa.block(testing.allocator, .{ .start = 0, .end = 200 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 200 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
+    var it1 = HoleIterator.init(self, r1.valid_range, r1.size);
+    while (it1.next()) |hole1| {
+        log.debug("findCoupledAllocation: Hole1: {f}", .{hole1});
+        const b1 = getRelativeBounds(hole1, @intCast(r1.size), r1.source) orelse continue;
+
+        var it2 = HoleIterator.init(self, r2.valid_range, r2.size);
+        while (it2.next()) |hole2| {
+            log.debug("  Hole2: {f}", .{hole2});
+            const b2 = getRelativeBounds(hole2, @intCast(r2.size), r2.source) orelse continue;
+
+            const c1 = Constraint{
+                .min_rel = b1.min,
+                .max_rel = b1.max,
+                .mask = r1.mask,
+                .pattern = r1.pattern,
+            };
+            const c2 = Constraint{
+                .min_rel = b2.min,
+                .max_rel = b2.max,
+                .mask = r2.mask,
+                .pattern = r2.pattern,
+            };
+
+            if (solveCoupledConstraint(k, c1, c2)) |result| {
+                const start1 = @as(i64, @intCast(r1.source)) + result.rel1;
+                const end1 = start1 + @as(i64, @intCast(r1.size));
+
+                const start2 = @as(i64, @intCast(r2.source)) + result.rel2;
+                const end2 = start2 + @as(i64, @intCast(r2.size));
+
+                assert(end1 - start1 == r1.size);
+                assert(end2 - start2 == r2.size);
+
+                // If we used the same hole, we must ensure the actual allocations don't overlap.
+                const range1 = Range{ .start = start1, .end = end1 };
+                const range2 = Range{ .start = start2, .end = end2 };
+                // TODO: Support allocating both trampolines in the exact same memory hole.
+                // This requires dynamically partitioning the hole so the trampolines don't overlap
+                // each other. For now, simply skip this case.
+                if (range1.overlaps(range2)) continue;
+
+                return [2]Range{
+                    .{ .start = start1, .end = end1 },
+                    .{ .start = start2, .end = end2 },
+                };
+            }
+        }
+    }
+
+    return null;
 }
 
-test "block overlaps multiple" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+/// A generic helper to mechanically verify that a coupled allocation satisfies all bitwise and
+/// physical overlap constraints.
+fn verifyCoupled(k: u8, r1: Request, r2: Request, j1_range: Range, j2_range: Range) !void {
+    const rel1: i32 = @intCast(j1_range.start - @as(i64, @intCast(r1.source)));
+    const rel2: i32 = @intCast(j2_range.start - @as(i64, @intCast(r2.source)));
+    const u_rel1: u32 = @bitCast(rel1);
+    const u_rel2: u32 = @bitCast(rel2);
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try aa.block(testing.allocator, .{ .start = 150, .end = 200 }, 0);
-    try aa.block(testing.allocator, .{ .start = 250, .end = 300 }, 0);
-    try aa.block(testing.allocator, .{ .start = 350, .end = 400 }, 0);
-    try aa.block(testing.allocator, .{ .start = 450, .end = 500 }, 0);
-    try testing.expectEqual(5, aa.ranges.items.len);
+    // Opcode Constraint
+    const e9_shift = @as(u5, @intCast(k - 1)) * 8;
+    try testing.expectEqual(@as(u32, 0xE9), (u_rel1 >> e9_shift) & 0xFF);
 
-    try aa.block(testing.allocator, .{ .start = 50, .end = 475 }, 0);
-    try testing.expectEqual(Range{ .start = 0, .end = 500 }, aa.ranges.items[0]);
-    try testing.expectEqual(1, aa.ranges.items.len);
+    // Shared Bytes Constraint
+    if (k < 4) {
+        const shared_shift = @as(u5, @intCast(k)) * 8;
+        const shared_mask = (@as(u32, 1) << (@as(u5, @intCast(4 - k)) * 8)) - 1;
+        const shared1 = (u_rel1 >> shared_shift) & shared_mask;
+        const shared2 = u_rel2 & shared_mask;
+        try testing.expectEqual(shared1, shared2);
+    }
+
+    // Original User Constraints
+    try testing.expectEqual(r1.pattern, u_rel1 & r1.mask);
+    try testing.expectEqual(r2.pattern, u_rel2 & r2.mask);
 }
 
-test "allocate in empty allocator" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "findCoupledAllocation" {
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
 
-    const search_range = Range{ .start = 0, .end = 1000 };
-    const allocated = try aa.allocate(testing.allocator, 100, search_range);
-    try testing.expectEqual(1, aa.ranges.items.len);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, aa.ranges.items[0]);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, allocated);
+    // Block memory so we have distinct holes.
+    // We need a hole that allows `rel1` to have `0xE9` in its second byte.
+    // This means `rel1` needs to be around `0xE900`.
+    try aa.block(.{ .start = 0x2000, .end = 0xE000 });
+    try aa.block(.{ .start = 0xF000, .end = 0x10000 });
+
+    const r1 = Request{ .source = 0, .size = 10, .valid_range = .{ .start = 0, .end = 0x20000 } };
+    const r2 = Request{ .source = 2, .size = 10, .valid_range = .{ .start = 0, .end = 0x20000 } };
+    const res = aa.findCoupledAllocation(2, r1, r2);
+    try testing.expect(res != null);
+
+    const j1_range = res.?[0];
+    const j2_range = res.?[1];
+    try testing.expect(j1_range.start >= 0xE000 and j1_range.end <= 0xF000);
+    try testing.expect(j2_range.start >= 0x0000 and j2_range.end <= 0x2000);
+
+    try verifyCoupled(2, r1, r2, j1_range, j2_range);
 }
 
-test "allocate with no space" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "findCoupledAllocation K=1 (3 shared bytes)" {
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
 
-    const range = Range{ .start = 0, .end = 1000 };
-    try aa.block(testing.allocator, range, 0);
-    const allocated = try aa.allocate(testing.allocator, 100, range);
-    try testing.expect(allocated == null);
+    try aa.block(.{ .start = 0x2000, .end = 0x01000000 });
+
+    const r1 = Request{ .source = 0, .size = 10, .valid_range = .{ .start = 0, .end = 0x10000000 } };
+    const r2 = Request{ .source = 1, .size = 10, .valid_range = .{ .start = 0, .end = 0x10000000 } };
+    const res = aa.findCoupledAllocation(1, r1, r2);
+    try testing.expect(res != null);
+
+    // For K=1, rel1's lowest byte MUST be 0xE9.
+    // In Hole 1, the smallest valid rel1 is 0x000000E9.
+    // This makes the shared bytes (top 3 bytes) 0x000000.
+    try testing.expectEqual(0xE9, res.?[0].start);
+    try testing.expectEqual(0x01, res.?[1].start);
+
+    try verifyCoupled(1, r1, r2, res.?[0], res.?[1]);
 }
 
-test "allocate in a gap" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "findCoupledAllocation K=3 (1 shared byte)" {
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try aa.block(testing.allocator, .{ .start = 200, .end = 300 }, 0);
+    // K=3 means rel1 byte 2 MUST be 0xE9. rel1 looks like 0xXXE9XXXX.
+    // Smallest positive is ~0x00E90000. We need a hole there.
+    try aa.block(.{ .start = 0x2000, .end = 0x00E90000 });
 
-    const search_range = Range{ .start = 0, .end = 1000 };
-    const allocated = try aa.allocate(testing.allocator, 50, search_range);
-    try testing.expectEqual(Range{ .start = 100, .end = 150 }, allocated);
-    try testing.expectEqual(2, aa.ranges.items.len);
-    try testing.expectEqual(Range{ .start = 0, .end = 150 }, aa.ranges.items[0]);
-    try testing.expectEqual(Range{ .start = 200, .end = 300 }, aa.ranges.items[1]);
+    const r1 = Request{ .source = 0, .size = 10, .valid_range = .{ .start = 0, .end = 0x10000000 } };
+    const r2 = Request{ .source = 3, .size = 10, .valid_range = .{ .start = 0, .end = 0x10000000 } };
+    const res = aa.findCoupledAllocation(3, r1, r2);
+    try testing.expect(res != null);
+    try verifyCoupled(3, r1, r2, res.?[0], res.?[1]);
 }
 
-test "allocate at the end" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "findCoupledAllocation K=4 (Independent)" {
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
+    try aa.block(.{ .start = 0x2000, .end = 0x01000000 });
 
-    const search_range = Range{ .start = 0, .end = 1000 };
-    const allocated = try aa.allocate(testing.allocator, 200, search_range);
-    try testing.expectEqual(Range{ .start = 100, .end = 300 }, allocated);
-    try testing.expectEqual(1, aa.ranges.items.len);
-    try testing.expectEqual(Range{ .start = 0, .end = 300 }, aa.ranges.items[0]);
+    const r1 = Request{
+        .source = 0x50000000,
+        .size = 10,
+        .valid_range = .{ .start = 0, .end = 0x60000000 },
+    };
+    const r2 = Request{
+        .source = 0x50000004,
+        .size = 10,
+        .valid_range = .{ .start = 0, .end = 0x60000000 },
+    };
+
+    const res = aa.findCoupledAllocation(4, r1, r2);
+    try testing.expect(res != null);
+    try verifyCoupled(4, r1, r2, res.?[0], res.?[1]);
 }
 
-test "allocate within specific search range" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "findCoupledAllocation Negative Jumps (Both Backwards)" {
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try aa.block(testing.allocator, .{ .start = 400, .end = 500 }, 0);
+    // We block everything except two specific holes far behind the jump source.
+    try aa.block(.{ .start = 0, .end = 0x10000000 });
+    try aa.block(.{ .start = 0x10010000, .end = 0x20000000 });
+    try aa.block(.{ .start = 0x20010000, .end = 0x60000000 });
 
-    // Search range starts after first block and has a gap
-    const search_range = Range{ .start = 200, .end = 400 };
-    const allocated = try aa.allocate(testing.allocator, 100, search_range);
-    try testing.expectEqual(Range{ .start = 200, .end = 300 }, allocated);
-    try testing.expectEqual(3, aa.ranges.items.len);
-    try testing.expectEqual(Range{ .start = 0, .end = 100 }, aa.ranges.items[0]);
-    try testing.expectEqual(Range{ .start = 400, .end = 500 }, aa.ranges.items[2]);
-    try testing.expectEqual(Range{ .start = 200, .end = 300 }, aa.ranges.items[1]);
+    const r1 = Request{
+        .source = 0x50000000,
+        .size = 10,
+        .valid_range = .{ .start = 0, .end = 0x60000000 },
+    };
+    const r2 = Request{
+        .source = 0x50000002,
+        .size = 10,
+        .valid_range = .{ .start = 0, .end = 0x60000000 },
+    };
+
+    // The math solver natively handles the two's complement wraparound.
+    const res = aa.findCoupledAllocation(2, r1, r2);
+    try testing.expect(res != null);
+    try verifyCoupled(2, r1, r2, res.?[0], res.?[1]);
 }
 
-test "allocate exact gap size" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "findCoupledAllocation with Mask/Pattern Constraints" {
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try aa.block(testing.allocator, .{ .start = 200, .end = 300 }, 0);
+    try aa.block(.{ .start = 0, .end = 0x10000 });
+    try aa.block(.{ .start = 0x20000, .end = 0x44440000 });
+    try aa.block(.{ .start = 0x44450000, .end = 0x80000000 });
 
-    const search_range = Range{ .start = 0, .end = 1000 };
-    const allocated = try aa.allocate(testing.allocator, 100, search_range);
-    try testing.expectEqual(Range{ .start = 100, .end = 200 }, allocated);
-    try testing.expectEqual(1, aa.ranges.items.len);
-    try testing.expectEqual(Range{ .start = 0, .end = 300 }, aa.ranges.items[0]);
+    // K=2. We force the shared bytes to be exactly 0x4444.
+    const r1 = Request{ .source = 0, .size = 10, .valid_range = .{ .start = 0, .end = 0x80000000 } };
+    const r2 = Request{
+        .source = 2,
+        .size = 10,
+        .valid_range = .{ .start = 0, .end = 0x80000000 },
+        .mask = 0x0000FFFF,
+        .pattern = 0x00004444,
+    };
+
+    const res = aa.findCoupledAllocation(2, r1, r2);
+    try testing.expect(res != null);
+    try verifyCoupled(2, r1, r2, res.?[0], res.?[1]);
+
+    // Explicitly verify the constraint was propagated to J1
+    const rel1: i32 = @intCast(res.?[0].start);
+    const u_rel1: u32 = @bitCast(rel1);
+    try testing.expectEqual(@as(u32, 0x4444), (u_rel1 >> 16) & 0xFFFF);
 }
 
-test "allocate fails when too large" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
+test "findCoupledAllocation Fails on Math Impossibility" {
+    var aa = AddressAllocator{ .child_allocator = testing.allocator };
+    defer aa.deinit();
 
-    try aa.block(testing.allocator, .{ .start = 0, .end = 100 }, 0);
-    try aa.block(testing.allocator, .{ .start = 200, .end = 300 }, 0);
+    const r1 = Request{
+        .source = 0,
+        .size = 10,
+        .valid_range = .{ .start = 0, .end = 0x80000000 },
+        .mask = 0xFFFF0000,
+        .pattern = 0x11110000,
+    };
+    const r2 = Request{
+        .source = 2,
+        .size = 10,
+        .valid_range = .{ .start = 0, .end = 0x80000000 },
+        .mask = 0x0000FFFF,
+        .pattern = 0x00002222,
+    };
 
-    const search_range = Range{ .start = 0, .end = 400 };
-    const allocated = try aa.allocate(testing.allocator, 101, search_range);
-    try std.testing.expect(allocated == null);
-}
-
-test "allocate with zero size" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
-
-    const search_range = Range{ .start = 0, .end = 1000 };
-    const allocated = try aa.allocate(testing.allocator, 0, search_range);
-    try std.testing.expect(allocated == null);
-}
-
-test "allocate with size bigger than range" {
-    var aa = AddressAllocator{};
-    defer aa.deinit(testing.allocator);
-
-    const search_range = Range{ .start = 0, .end = 100 };
-    const allocated = try aa.allocate(testing.allocator, 1000, search_range);
-    try std.testing.expect(allocated == null);
+    const res = aa.findCoupledAllocation(2, r1, r2);
+    try testing.expectEqual(null, res);
 }
diff --git a/src/PatchLocationIterator.zig b/src/PatchLocationIterator.zig
deleted file mode 100644
index 0dca4cb..0000000
--- a/src/PatchLocationIterator.zig
+++ /dev/null
@@ -1,447 +0,0 @@
-//! Iterates through all possible valid address ranges for a `jmp rel33` instruction based on a
-//! 4-byte pattern of "free" and "used" bytes.
-//!
-//! This is the core utility for implementing E9Patch-style instruction punning (B2) and padded
-//! jumps (T1).
-const std = @import("std");
-const testing = std.testing;
-const assert = std.debug.assert;
-
-const log = std.log.scoped(.patch_location_iterator);
-
-const Range = @import("Range.zig");
-
-/// Represents a single byte in the 4-byte `rel32` offset pattern.
-pub const PatchByte = union(enum) {
-    /// This byte can be any value (0x00-0xFF).
-    free: void,
-    /// This byte is constrained to a specific value.
-    used: u8,
-
-    pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void {
-        switch (self) {
-            .free => try writer.print("free", .{}),
-            .used => |val| try writer.print("used({x})", .{val}),
-        }
-    }
-};
-
-const patch_size = 4;
-const PatchInt = std.meta.Int(.signed, patch_size * 8);
-const PatchLocationIterator = @This();
-/// The base address (e.g., RIP of the *next* instruction) that the 32-bit relative offset is
-/// calculated from.
-offset: i64,
-/// The 4-byte little-endian pattern of `used` and `free` bytes that constrain the `rel32` offset.
-patch_bytes: [patch_size]PatchByte,
-/// Internal state: the byte-level representation of the *start* of the current `rel32` offset being
-/// iterated.
-start: [patch_size]u8,
-/// Internal state: the byte-level representation of the *end* of the current `rel32` offset being
-/// iterated.
-end: [patch_size]u8,
-/// Internal state: flag to handle the first call to `next()` uniquely.
-first: bool,
-/// Internal state: optimization cache for the number of contiguous `.free` bytes at the *end* of
-/// `patch_bytes`.
-trailing_free_count: u8,
-
-/// Initializes the iterator.
-/// - `patch_bytes`: The 4-byte pattern of the `rel32` offset, in little-endian order.
-/// The base address (e.g., RIP of the *next* instruction) that the 32-bit relative offset is
-/// calculated from.
-pub fn init(patch_bytes: [patch_size]PatchByte, addr: u64) PatchLocationIterator {
-    log.debug("hi", .{});
-    assert(patch_bytes.len == patch_size);
-
-    // Find the number of contiguous free bytes at the end of the pattern.
-    var trailing_free: u8 = 0;
-    for (0..patch_bytes.len) |i| {
-        if (patch_bytes[i] == .free) {
-            trailing_free += 1;
-        } else {
-            break;
-        }
-    }
-
-    var start = std.mem.zeroes([patch_size]u8);
-    var end = std.mem.zeroes([patch_size]u8);
-    for (patch_bytes, 0..) |byte, i| {
-        switch (byte) {
-            .free => {
-                start[i] = 0;
-                end[i] = if (i < trailing_free) 0xff else 0;
-            },
-            .used => |val| {
-                start[i] = val;
-                end[i] = val;
-            },
-        }
-    }
-
-    const out = PatchLocationIterator{
-        .offset = @intCast(addr),
-        .patch_bytes = patch_bytes,
-        .trailing_free_count = trailing_free,
-        .start = start,
-        .end = end,
-        .first = true,
-    };
-    log.debug("init: {f}", .{out});
-    return out;
-}
-
-/// Returns the next valid `Range` of target addresses, or `null` if the iteration is complete.
-pub fn next(self: *PatchLocationIterator) ?Range {
-    // If all bytes are free we can just return the maximum range.
-    if (self.trailing_free_count == patch_size) {
-        defer self.first = false;
-        if (self.first) {
-            var range = Range{
-                .start = self.offset + std.math.minInt(i32),
-                .end = self.offset + std.math.maxInt(i32),
-            };
-            // Clamp to valid positive address space
-            if (range.start < 0) range.start = 0;
-            if (range.end <= 0) {
-                log.info("next: All bytes free, but range entirely negative.", .{});
-                return null;
-            }
-
-            log.debug("next: All bytes free, returning full range: {f}", .{range});
-            return range;
-        } else {
-            log.info("next: All bytes free, iteration finished.", .{});
-            return null;
-        }
-    }
-
-    while (true) {
-        var range: Range = undefined;
-
-        if (self.first) {
-            self.first = false;
-            const start = std.mem.readInt(PatchInt, self.start[0..], .little);
-            const end = std.mem.readInt(PatchInt, self.end[0..], .little);
-            range = Range{
-                .start = start + self.offset,
-                .end = end + self.offset,
-            };
-        } else {
-            var overflow: u1 = 1;
-            for (self.patch_bytes, 0..) |byte, i| {
-                if (i < self.trailing_free_count or byte == .used) {
-                    continue;
-                }
-                assert(byte == .free);
-                assert(self.start[i] == self.end[i]);
-                defer assert(self.start[i] == self.end[i]);
-
-                if (overflow == 1) {
-                    if (self.start[i] == std.math.maxInt(u8)) {
-                        self.start[i] = 0;
-                        self.end[i] = 0;
-                    } else {
-                        self.start[i] += 1;
-                        self.end[i] += 1;
-                        overflow = 0;
-                    }
-                }
-            }
-            if (overflow == 1) {
-                log.info("next: Iteration finished, no more ranges.", .{});
-                return null;
-            }
-
-            const start = std.mem.readInt(PatchInt, self.start[0..], .little);
-            const end = std.mem.readInt(PatchInt, self.end[0..], .little);
-            assert(end >= start);
-            range = Range{
-                .start = start + self.offset,
-                .end = end + self.offset,
-            };
-        }
-
-        // Filter out ranges that are entirely negative (invalid memory addresses).
-        if (range.end <= 0) continue;
-        // Clamp ranges that start negative but end positive.
-        if (range.start < 0) range.start = 0;
-
-        log.debug("next: new range: {f}", .{range});
-        return range;
-    }
-}
-
-pub fn format(self: PatchLocationIterator, writer: *std.Io.Writer) std.Io.Writer.Error!void {
-    try writer.print(".{{ ", .{});
-    try writer.print(".offset = {x}, ", .{self.offset});
-    try writer.print(
-        ".patch_bytes = .{{ {f}, {f}, {f}, {f} }}, ",
-        .{ self.patch_bytes[0], self.patch_bytes[1], self.patch_bytes[2], self.patch_bytes[3] },
-    );
-    try writer.print(
-        ".start: 0x{x}, .end: 0x{x}, first: {}, trailing_free_count: {}",
-        .{ self.start, self.end, self.first, self.trailing_free_count },
-    );
-}
-
-test "free bytes" {
-    const pattern = [_]PatchByte{
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .free = {} },
-    };
-    var it = PatchLocationIterator.init(pattern, 0);
-
-    try testing.expectEqual(
-        Range{ .start = 0, .end = std.math.maxInt(i32) },
-        it.next().?,
-    );
-    try testing.expectEqual(null, it.next());
-}
-
-test "predetermined negative" {
-    const pattern = [_]PatchByte{
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .used = 0xe9 },
-    };
-    var it = PatchLocationIterator.init(pattern, 0);
-    try testing.expectEqual(null, it.next());
-}
-
-test "trailing free bytes" {
-    const pattern = [_]PatchByte{
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .used = 0x79 },
-    };
-    var it = PatchLocationIterator.init(pattern, 0);
-
-    try testing.expectEqual(
-        Range{ .start = 0x79000000, .end = 0x79ffffff },
-        it.next().?,
-    );
-    try testing.expectEqual(null, it.next());
-}
-
-test "inner and trailing free bytes" {
-    const pattern = [_]PatchByte{
-        .{ .free = {} },
-        .{ .used = 0xe8 },
-        .{ .free = {} },
-        .{ .used = 0x79 },
-    };
-    var it = PatchLocationIterator.init(pattern, 0);
-
-    try testing.expectEqual(
-        Range{ .start = 0x7900e800, .end = 0x7900e8ff },
-        it.next().?,
-    );
-    try testing.expectEqual(
-        Range{ .start = 0x7901e800, .end = 0x7901e8ff },
-        it.next().?,
-    );
-
-    // Skip to the last range
-    var r_last: ?Range = null;
-    var count: u32 = 2; // We already consumed two
-    while (it.next()) |r| {
-        r_last = r;
-        count += 1;
-    }
-    try testing.expectEqual(
-        Range{ .start = 0x79ffe800, .end = 0x79ffe8ff },
-        r_last,
-    );
-    try testing.expectEqual(256, count);
-}
-
-test "no free bytes" {
-    const pattern = [_]PatchByte{
-        .{ .used = 0xe9 },
-        .{ .used = 0x00 },
-        .{ .used = 0x00 },
-        .{ .used = 0x78 },
-    };
-    var it = PatchLocationIterator.init(pattern, 0);
-
-    try testing.expectEqual(
-        Range{ .start = 0x780000e9, .end = 0x780000e9 },
-        it.next().?,
-    );
-    try testing.expectEqual(null, it.next());
-}
-
-test "inner and leading free bytes" {
-    const pattern = [_]PatchByte{
-        .{ .used = 0xe9 },
-        .{ .free = {} },
-        .{ .used = 0xe8 },
-        .{ .free = {} },
-    };
-    var it = PatchLocationIterator.init(pattern, 0);
-
-    try testing.expectEqual(
-        Range{ .start = 0x00e800e9, .end = 0x00e800e9 },
-        it.next().?,
-    );
-    try testing.expectEqual(
-        Range{ .start = 0x00e801e9, .end = 0x00e801e9 },
-        it.next().?,
-    );
-
-    // Skip to the last range
-    var r_last: ?Range = null;
-    var count: u32 = 2; // We already consumed two
-    while (it.next()) |r| {
-        r_last = r;
-        count += 1;
-    }
-    try testing.expectEqual(
-        Range{ .start = 0x7fe8ffe9, .end = 0x7fe8ffe9 },
-        r_last,
-    );
-    try testing.expectEqual(256 * 128, count);
-}
-
-test "only inner" {
-    const pattern = [_]PatchByte{
-        .{ .used = 0xe9 },
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .used = 0x78 },
-    };
-    var it = PatchLocationIterator.init(pattern, 0);
-
-    try testing.expectEqual(
-        Range{ .start = 0x780000e9, .end = 0x780000e9 },
-        it.next().?,
-    );
-    try testing.expectEqual(
-        Range{ .start = 0x780001e9, .end = 0x780001e9 },
-        it.next().?,
-    );
-
-    // Skip to the last range
-    var r_last: ?Range = null;
-    var count: u32 = 2; // We already consumed two
-    while (it.next()) |r| {
-        r_last = r;
-        count += 1;
-    }
-    try testing.expectEqual(
-        Range{ .start = 0x78ffffe9, .end = 0x78ffffe9 },
-        r_last,
-    );
-    try testing.expectEqual(256 * 256, count);
-}
-
-test "trailing free bytes offset" {
-    const pattern = [_]PatchByte{
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .used = 0x79 },
-    };
-    const offset = 0x12345678;
-    var it = PatchLocationIterator.init(pattern, offset);
-
-    try testing.expectEqual(
-        Range{ .start = offset + 0x79000000, .end = offset + 0x79ffffff },
-        it.next().?,
-    );
-    try testing.expectEqual(null, it.next());
-}
-
-test "trailing and leading offset" {
-    const pattern = [_]PatchByte{
-        .{ .free = {} },
-        .{ .used = 0xe9 },
-        .{ .used = 0xe8 },
-        .{ .free = {} },
-    };
-    const offset = 0x12345678;
-    var it = PatchLocationIterator.init(pattern, offset);
-
-    try testing.expectEqual(
-        Range{ .start = offset + 0x00e8e900, .end = offset + 0x00e8e9ff },
-        it.next().?,
-    );
-    try testing.expectEqual(
-        Range{ .start = offset + 0x01e8e900, .end = offset + 0x01e8e9ff },
-        it.next().?,
-    );
-
-    // Skip to the last range
-    var r_last: ?Range = null;
-    var count: u32 = 2; // We already consumed two
-    while (it.next()) |r| {
-        r_last = r;
-        count += 1;
-    }
-    try testing.expectEqual(
-        Range{
-            .start = offset + @as(i32, @bitCast(@as(u32, 0xffe8e900))),
-            .end = offset + @as(i32, @bitCast(@as(u32, 0xffe8e9ff))),
-        },
-        r_last,
-    );
-    try testing.expect(count > 128);
-}
-
-test "trailing free bytes large offset" {
-    const pattern = [_]PatchByte{
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .free = {} },
-        .{ .used = 0x79 },
-    };
-    const offset = 0x12345678;
-    var it = PatchLocationIterator.init(pattern, offset);
-
-    try testing.expectEqual(
-        Range{ .start = offset + 0x79000000, .end = offset + 0x79ffffff },
-        it.next().?,
-    );
-    try testing.expectEqual(null, it.next());
-}
-
-test "trailing and leading large offset" {
-    const pattern = [_]PatchByte{
-        .{ .free = {} },
-        .{ .used = 0xe9 },
-        .{ .used = 0xe8 },
-        .{ .free = {} },
-    };
-    const offset = 0x123456789a;
-    var it = PatchLocationIterator.init(pattern, offset);
-
-    try testing.expectEqual(
-        Range{ .start = offset + 0x00e8e900, .end = offset + 0x00e8e9ff },
-        it.next().?,
-    );
-    try testing.expectEqual(
-        Range{ .start = offset + 0x01e8e900, .end = offset + 0x01e8e9ff },
-        it.next().?,
-    );
-
-    // Skip to the last range
-    var r_last: ?Range = null;
-    var count: u32 = 2; // We already consumed two
-    while (it.next()) |r| {
-        r_last = r;
-        count += 1;
-    }
-    try testing.expectEqual(
-        Range{
-            .start = offset + @as(i64, @intCast(@as(i32, @bitCast(@as(u32, 0xffe8e900))))),
-            .end = offset + @as(i64, @intCast(@as(i32, @bitCast(@as(u32, 0xffe8e9ff))))),
-        },
-        r_last,
-    );
-    try testing.expectEqual(256, count);
-}
diff --git a/src/Patcher.zig b/src/Patcher.zig
index c95736d..dd7c469 100644
--- a/src/Patcher.zig
+++ b/src/Patcher.zig
@@ -1,28 +1,27 @@
 const std = @import("std");
-const builtin = @import("builtin");
-const testing = std.testing;
 const math = std.math;
 const mem = std.mem;
 const posix = std.posix;
-const zydis = @import("zydis").zydis;
-const dis = @import("disassembler.zig");
-const syscalls = @import("syscalls.zig");
+const testing = std.testing;
+
+const dis = @import("disassembler.zig");
+const reloc = @import("relocation.zig");
+const syscalls = @import("syscalls.zig");
+const zydis = @import("zydis").zydis;
 
-const log = std.log.scoped(.patcher);
 const AddressAllocator = @import("AddressAllocator.zig");
-const InstructionFormatter = dis.InstructionFormatter;
-const InstructionIterator = dis.InstructionIterator;
-const PatchLocationIterator = @import("PatchLocationIterator.zig");
-const PatchByte = PatchLocationIterator.PatchByte;
+const backend = @import("backend.zig").backend;
 const Range = @import("Range.zig");
+const Statistics = @import("Statistics.zig");
 
 const assert = std.debug.assert;
-
 const page_size = std.heap.pageSize();
-const jump_rel32: u8 = 0xe9;
-const jump_rel32_size = 5;
-const jump_rel8: u8 = 0xeb;
-const jump_rel8_size = 2;
+const log = std.log.scoped(.patcher);
+
+const j_rel32: u8 = 0xe9;
+const j_rel32_size = 5;
+const j_rel8: u8 = 0xeb;
+const j_rel8_size = 2;
 
 // TODO: Find an invalid instruction to use.
 // const invalid: u8 = 0xaa;
@@ -48,68 +47,40 @@ var syscall_flicken_bytes = [_]u8{
     0x41, 0xff, 0xd3, // call r11
 };
 
-pub var gpa: mem.Allocator = undefined;
-pub var flicken_templates: std.StringArrayHashMapUnmanaged(Flicken) = .empty;
-pub var address_allocator: AddressAllocator = .empty;
-/// Tracks the base addresses of pages we have mmap'd for Flicken.
-pub var allocated_pages: std.AutoHashMapUnmanaged(u64, void) = .empty;
-pub var mutex: std.Thread.Mutex = .{};
+const Patcher = @This();
 
-pub var target_exec_path_buf: [std.fs.max_path_bytes]u8 = @splat(0);
-pub var target_exec_path: []const u8 = undefined;
+mutex: std.Thread.Mutex = .{},
+address_allocator: AddressAllocator,
+flicken_templates: std.StringArrayHashMapUnmanaged(Flicken) = .empty,
 
-/// Initialize the patcher.
-/// NOTE: This should only be called **once**.
-pub fn init() !void {
-    gpa = std.heap.page_allocator;
+pub fn init(allocator: mem.Allocator) !Patcher {
+    var patcher: Patcher = .{
+        .address_allocator = .{ .child_allocator = allocator },
+    };
 
-    try flicken_templates.ensureTotalCapacity(
-        std.heap.page_allocator,
+    try patcher.flicken_templates.ensureTotalCapacity(
+        patcher.address_allocator.allocator(),
         page_size / @sizeOf(Flicken),
     );
-    flicken_templates.putAssumeCapacity("nop", .{ .name = "nop", .bytes = &.{} });
+    patcher.flicken_templates.putAssumeCapacity("nop", .{ .name = "nop", .bytes = &.{} });
     mem.writeInt(
         u64,
         syscall_flicken_bytes[2..][0..8],
         @intFromPtr(&syscalls.syscallEntry),
         .little,
     );
-    flicken_templates.putAssumeCapacity("syscall", .{ .name = "syscall", .bytes = &syscall_flicken_bytes });
+    patcher.flicken_templates.putAssumeCapacity(
+        "syscall",
+        .{ .name = "syscall", .bytes = &syscall_flicken_bytes },
+    );
 
-    {
-        // Read mmap_min_addr to block the low memory range. This prevents us from allocating
-        // trampolines in the forbidden low address range.
-        var min_addr: u64 = 0x10000; // Default safe fallback (64KB)
-        if (std.fs.openFileAbsolute("/proc/sys/vm/mmap_min_addr", .{})) |file| {
-            defer file.close();
-            var buf: [32]u8 = undefined;
-            if (file.readAll(&buf)) |len| {
-                const trimmed = std.mem.trim(u8, buf[0..len], " \n\r\t");
-                if (std.fmt.parseInt(u64, trimmed, 10)) |val| {
-                    min_addr = val;
-                } else |_| {}
-            } else |_| {}
-        } else |_| {}
-        try address_allocator.block(gpa, .{ .start = 0, .end = @intCast(min_addr) }, 0);
-    }
+    return patcher;
 }
 
-/// Flicken name and bytes have to be valid for the lifetime it's used. If a trampoline with the
-/// name is already registered it gets overwritten.
-/// NOTE: The name "nop" is reserved and always has the ID 0.
-pub fn addFlicken(trampoline: Flicken) !FlickenId {
-    assert(!mem.eql(u8, "nop", trampoline.name));
-    assert(!mem.eql(u8, "syscall", trampoline.name));
-    try flicken_templates.ensureUnusedCapacity(gpa, 1);
-    errdefer comptime unreachable;
-
-    const gop = flicken_templates.getOrPutAssumeCapacity(trampoline.name);
-    if (gop.found_existing) {
-        log.warn("addTrampoline: Overwriting existing trampoline: {s}", .{trampoline.name});
-    }
-    gop.key_ptr.* = trampoline.name;
-    gop.value_ptr.* = trampoline;
-    return @enumFromInt(gop.index);
+pub fn deinit(patcher: *Patcher) void {
+    const allocator = patcher.address_allocator.allocator();
+    patcher.flicken_templates.deinit(allocator);
+    patcher.address_allocator.deinit();
 }
 
 pub const Flicken = struct {
@@ -117,11 +88,11 @@ pub const Flicken = struct {
     bytes: []const u8,
 
     pub fn size(flicken: *const Flicken) u64 {
-        return flicken.bytes.len + jump_rel32_size;
+        return flicken.bytes.len + j_rel32_size;
     }
 };
 
-pub const FlickenId = enum(u64) {
+pub const FlickenId = enum(u32) {
     /// The nop flicken is special. It just does the patched instruction and immediately jumps back
     /// to the normal instruction stream. It **cannot** be changed.
     /// The bytes are always empty, meaning that `bytes.len == 0`.
@@ -141,8 +112,7 @@ pub const PatchRequest = struct {
     offset: u64,
     /// Number of bytes of instruction.
     size: u8,
-    /// A byte slice from the start of the offset to the end of the region. This isn't necessary to
-    /// have but makes things more accessible.
+    /// The bytes of the original code, starting at this instruction.
     bytes: []u8,
 
     pub fn desc(_: void, lhs: PatchRequest, rhs: PatchRequest) bool {
@@ -160,100 +130,41 @@ pub const PatchRequest = struct {
     }
 };
 
-pub const Statistics = struct {
-    /// Direct jumps
-    jump: u64,
-    /// Punning - index represents number of prefixes used
-    punning: [4]u64,
-    /// Successor Eviction
-    successor_eviction: u64,
-    /// Neighbor Eviction
-    neighbor_eviction: u64,
-    /// Failed to patch
-    failed: u64,
-
-    pub const empty = mem.zeroes(Statistics);
-
-    pub fn punningSum(stats: *const Statistics) u64 {
-        return stats.punning[0] + stats.punning[1] +
-            stats.punning[2] + stats.punning[3];
-    }
-
-    pub fn successful(stats: *const Statistics) u64 {
-        return stats.jump + stats.punningSum() +
-            stats.successor_eviction + stats.neighbor_eviction;
-    }
-
-    pub fn total(stats: *const Statistics) u64 {
-        return stats.successful() + stats.failed;
-    }
-
-    pub fn percentage(stats: *const Statistics) f64 {
-        if (stats.total() == 0) return 1;
-        const s: f64 = @floatFromInt(stats.successful());
-        const t: f64 = @floatFromInt(stats.total());
-        return s / t;
-    }
-
-    pub fn add(self: *Statistics, other: *const Statistics) void {
-        self.jump += other.jump;
-        for (0..self.punning.len) |i| {
-            self.punning[i] += other.punning[i];
-        }
-        self.successor_eviction += other.successor_eviction;
-        self.neighbor_eviction += other.neighbor_eviction;
-        self.failed += other.failed;
-    }
-};
-
-/// Scans a memory region for instructions that require patching and applies the patches
-/// using a hierarchy of tactics (Direct/Punning -> Successor Eviction -> Neighbor Eviction).
+/// Scans a memory region for instructions that require patching and applies the patches using a
+/// hierarchy of tactics (Direct/Punning -> Successor Eviction -> Neighbor Eviction).
 ///
-/// NOTE: This function leaves the region as R|W and the caller is responsible for changing it to
-/// the desired protection
-pub fn patchRegion(region: []align(page_size) u8) !void {
+/// Assert that the region is already mapped as R|W. The caller is responsible for changing it to
+/// the desired protection after patching is done.
+pub fn patchRegion(patcher: *Patcher, region: []align(page_size) u8) !void {
     log.info(
-        "Patching region: 0x{x} - 0x{x}",
+        "patchRegion: 0x{x} - 0x{x}",
         .{ @intFromPtr(region.ptr), @intFromPtr(&region[region.len - 1]) },
     );
-    // For now just do a coarse lock.
-    // TODO: should we make this more fine grained?
-    mutex.lock();
-    defer mutex.unlock();
 
-    {
-        // Block the region, such that we don't try to allocate there anymore.
-        const start: i64 = @intCast(@intFromPtr(region.ptr));
-        try address_allocator.block(
-            gpa,
-            .{ .start = start, .end = start + @as(i64, @intCast(region.len)) },
-            page_size,
-        );
-    }
+    patcher.mutex.lock();
+    defer patcher.mutex.unlock();
 
-    var arena_impl = std.heap.ArenaAllocator.init(gpa);
+    // Make the application code writable so we can inject our jumps.
+    try backend.mprotect(region, posix.PROT.READ | posix.PROT.WRITE);
+
+    try patcher.address_allocator.block(.fromPtr(region.ptr, region.len));
+
+    var arena_impl = std.heap.ArenaAllocator.init(patcher.address_allocator.allocator());
     const arena = arena_impl.allocator();
     defer arena_impl.deinit();
 
     var patch_requests: std.ArrayListUnmanaged(PatchRequest) = .empty;
-    // We save the bytes where instructions start to be able to disassemble them on the fly. This is
-    // necessary for the neighbor eviction, since we can't just iterate forwards from a target
-    // instruction and disassemble happily. This is because some bytes may already be the patched
-    // ones which means that we might disassemble garbage or something different that wasn't there
-    // before. This means that we would need to stop disassembling on the first byte that is locked,
-    // which kind of defeats the purpose of neighbor eviction.
-    var instruction_starts = try std.DynamicBitSetUnmanaged.initEmpty(arena, region.len);
+    var instruction_starts: std.DynamicBitSetUnmanaged = try .initEmpty(arena, region.len);
 
     {
-        // Get where to patch.
-        var instruction_iterator = InstructionIterator.init(region);
-        while (instruction_iterator.next()) |instruction| {
+        log.info("patchRegion: Collecting patch requests", .{});
+        var instruction_iter = dis.InstructionIterator.init(region);
+        while (instruction_iter.next()) |instruction| {
             const offset = instruction.address - @intFromPtr(region.ptr);
             instruction_starts.set(offset);
 
             const is_syscall = instruction.instruction.mnemonic == zydis.ZYDIS_MNEMONIC_SYSCALL;
-            const should_patch = is_syscall or
-                instruction.instruction.attributes & zydis.ZYDIS_ATTRIB_HAS_LOCK > 0;
+            const should_patch = is_syscall;
             if (should_patch) {
                 const request: PatchRequest = .{
                     .flicken = if (is_syscall) .syscall else .nop,
@@ -280,814 +191,842 @@ pub fn patchRegion(region: []align(page_size) u8) !void {
                     "patchRegion: Found duplicate patch requests for instruction: {s}",
                     .{fmt},
                 );
-                log.err("patchRegion: request 1: {f}", .{patch_requests.items[i - 1]});
-                log.err("patchRegion: request 2: {f}", .{patch_requests.items[i]});
+                log.err("  request 1: {f}", .{patch_requests.items[i - 1]});
+                log.err("  request 2: {f}", .{patch_requests.items[i]});
                 return error.DuplicatePatchRequest;
             }
             last_offset = request.offset;
 
-            if (@as(u64, @intFromEnum(request.flicken)) >= flicken_templates.count()) {
+            if (@as(u64, @intFromEnum(request.flicken)) >= patcher.flicken_templates.count()) {
                 const fmt = dis.formatBytes(request.bytes[0..request.size]);
                 log.err(
                     "patchRegion: Usage of undefined flicken in request {f} for instruction: {s}",
                     .{ request, fmt },
                 );
-                return error.undefinedFlicken;
+                return error.UndefinedFlicken;
             }
         }
     }
 
-    {
-        // Apply patches.
-        try posix.mprotect(region, posix.PROT.READ | posix.PROT.WRITE);
-
-        var stats = Statistics.empty;
-        // Used to track which bytes have been modified or used for constraints (punning),
-        // to prevent future patches (from neighbor/successor eviction) from corrupting them.
-        var locked_bytes = try std.DynamicBitSetUnmanaged.initEmpty(arena, region.len);
-        // PERF: A set of the pages for the patches/flicken we made writable. This way we don't
-        // repeatedly change call `mprotect` on the same page to switch it from R|W to R|X and back.
-        // At the end we `mprotect` all pages in this set back to being R|X.
-        var pages_made_writable: std.AutoHashMapUnmanaged(u64, void) = .empty;
-
-        requests: for (patch_requests.items) |request| {
-            for (0..request.size) |i| {
-                if (locked_bytes.isSet(request.offset + i)) {
-                    log.warn("patchRegion: Skipping request at offset 0x{x} because it is locked", .{request.offset});
-                    stats.failed += 1;
-                    continue :requests;
-                }
-            }
-
-            if (try attemptDirectOrPunning(
-                request,
-                arena,
-                &locked_bytes,
-                &pages_made_writable,
-                &stats,
-            )) {
-                continue :requests;
-            }
-
-            if (try attemptSuccessorEviction(
-                request,
-                arena,
-                &locked_bytes,
-                &pages_made_writable,
-                &stats,
-            )) {
-                continue :requests;
-            }
-
-            if (try attemptNeighborEviction(
-                request,
-                arena,
-                &locked_bytes,
-                &pages_made_writable,
-                &instruction_starts,
-                &stats,
-            )) {
+    // Used to track which bytes have been modified or used for constraints (punning), to
+    // prevent future patches (neighbor/successor eviction) from corrupting them.
+    var locked_bytes = try std.DynamicBitSetUnmanaged.initEmpty(arena, region.len);
+    // A set of the pages for the patches/flicken we made writable. This way we don't repeatedly
+    // change call `mprotect` on the same page to switch it from R|W to R|X and back. At the end
+    // we `mprotect` all pages in this set back to being R|X.
+    var pages_made_writable: std.AutoHashMapUnmanaged(u64, void) = .empty;
+    var stats: Statistics = .empty;
+    requests: for (patch_requests.items) |request| {
+        for (0..request.size) |i| {
+            if (locked_bytes.isSet(request.offset + i)) {
+                log.warn(
+                    "patchRegion: Skipping request at offset 0x{x} because it is locked",
+                    .{request.offset},
+                );
                 continue :requests;
             }
+        }
 
+        const result = patcher.patchRequest(request, region, instruction_starts, locked_bytes) catch |err| {
+            log.err("patchRegion: Failed to patch request at offset 0x{x}: {}", .{ request.offset, err });
             stats.failed += 1;
+            continue;
+        };
+
+        switch (result.tactic) {
+            .jump => stats.jump += 1,
+            .punning => |n| stats.punning[n] += 1,
+            .successor_eviction => stats.successor_eviction += 1,
+            .neighbor_eviction => stats.neighbor_eviction += 1,
         }
 
-        // Change pages back to R|X.
-        var iter = pages_made_writable.keyIterator();
-        const protection = posix.PROT.READ | posix.PROT.EXEC;
-        while (iter.next()) |page_addr| {
-            const ptr: [*]align(page_size) u8 = @ptrFromInt(page_addr.*);
-            try posix.mprotect(ptr[0..page_size], protection);
-        }
+        // Now nothing should error anymore, so we "commit" the patches
+        for (result.patches) |p| {
+            if (p.kind == .empty) continue;
 
-        assert(stats.total() == patch_requests.items.len);
-        log.info("{}", .{stats});
-        log.info("patched: {}/{}: {:2.2}%", .{
-            stats.successful(),
-            stats.total(),
-            stats.percentage() * 100,
-        });
-        log.info("patchRegion: Finished applying patches", .{});
+            if (p.trampoline_addr != 0 and p.trampoline_len > 0) {
+                try patcher.address_allocator.block(.{
+                    .start = @intCast(p.trampoline_addr),
+                    .end = @intCast(p.trampoline_addr + p.trampoline_len),
+                });
+
+                const start_page = mem.alignBackward(u64, p.trampoline_addr, page_size);
+                const end_page = mem.alignForward(u64, p.trampoline_addr + p.trampoline_len, page_size);
+
+                {
+                    var page = start_page;
+                    const prot = posix.PROT.READ | posix.PROT.WRITE;
+                    const flags: posix.MAP = .{
+                        .TYPE = .PRIVATE,
+                        .ANONYMOUS = true,
+                        .FIXED_NOREPLACE = true,
+                    };
+                    while (page < end_page) : (page += page_size) {
+                        const gop = try pages_made_writable.getOrPut(arena, page);
+                        if (gop.found_existing) continue;
+
+                        const ptr: [*]align(page_size) u8 = @ptrFromInt(page);
+                        _ = backend.mmap(ptr, page_size, prot, flags, -1, 0) catch |err| switch (err) {
+                            error.MappingAlreadyExists => {
+                                try backend.mprotect(ptr[0..page_size], prot);
+                            },
+                            else => return err,
+                        };
+                    }
+                }
+
+                const dest: [*]u8 = @ptrFromInt(p.trampoline_addr);
+                @memcpy(dest[0..p.trampoline_len], p.trampoline_bytes[0..p.trampoline_len]);
+            }
+
+            if (p.source_addr != 0 and p.source_len > 0) {
+                const dest: [*]u8 = @ptrFromInt(p.source_addr);
+                @memcpy(dest[0..p.source_len], p.source_bytes[0..p.source_len]);
+            }
+
+            if (p.lock_len > 0) {
+                locked_bytes.setRangeValue(
+                    .{ .start = p.lock_offset, .end = p.lock_offset + p.lock_len },
+                    true,
+                );
+            }
+        }
     }
+
+    var iter = pages_made_writable.keyIterator();
+    const prot = posix.PROT.READ | posix.PROT.EXEC;
+    while (iter.next()) |page_addr| {
+        const ptr: [*]align(page_size) u8 = @ptrFromInt(page_addr.*);
+        try backend.mprotect(ptr[0..page_size], prot);
+    }
+
+    log.info("{}", .{stats});
+    log.info("patched: {}/{}: {d:.2}%", .{
+        stats.successful(),
+        stats.total(),
+        stats.percentage() * 100.0,
+    });
+}
+
+pub const Tactic = union(enum) {
+    jump,
+    punning: u8,
+    successor_eviction,
+    neighbor_eviction,
+};
+
+pub const PatchResult = struct {
+    patches: [2]Patch,
+    tactic: Tactic,
+};
+
+/// Informations to "commit" a patch.
+pub const Patch = struct {
+    kind: enum { empty, active } = .empty,
+
+    /// Information for the jump overwrite
+    source_addr: u64 = 0,
+    source_bytes: [15]u8 = undefined,
+    source_len: u8 = 0,
+
+    /// Information for the trampoline
+    trampoline_addr: u64 = 0,
+    trampoline_bytes: [128]u8 = undefined,
+    trampoline_len: u8 = 0,
+
+    /// Offset inside the region to lock so future patches don't touch them.
+    lock_offset: u64 = 0,
+    lock_len: u64 = 0,
+};
+
+fn patchRequest(
+    patcher: *Patcher,
+    /// What to patch.
+    request: PatchRequest,
+    /// Where to patch it.
+    region: []align(page_size) u8,
+    /// Needed to get the size of instructions for the successor and neighbor eviction.
+    instruction_starts: std.DynamicBitSetUnmanaged,
+    /// Needed to not repeatedly patch the same instructions with successor and neighbor eviction.
+    locked_bytes: std.DynamicBitSetUnmanaged,
+) !PatchResult {
+    if (try attemptDirectOrPunning(patcher, request, region, locked_bytes)) |result| {
+        return result;
+    }
+    if (try attemptSuccessorEviction(patcher, request, region, locked_bytes)) |result| {
+        return result;
+    }
+    if (try attemptNeighborEviction(patcher, request, region, instruction_starts, locked_bytes)) |result| {
+        return result;
+    }
+    return error.PatchFailed;
 }
 
 fn attemptDirectOrPunning(
+    patcher: *Patcher,
     request: PatchRequest,
-    arena: mem.Allocator,
-    locked_bytes: *std.DynamicBitSetUnmanaged,
-    pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
-    stats: *Statistics,
-) !bool {
+    region: []align(page_size) u8,
+    locked_bytes: std.DynamicBitSetUnmanaged,
+) !?PatchResult {
     const flicken: Flicken = if (request.flicken == .nop)
         .{ .name = "nop", .bytes = request.bytes[0..request.size] }
     else
-        flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
+        patcher.flicken_templates.values()[@intFromEnum(request.flicken)];
 
-    var pii = PatchInstructionIterator.init(
-        request.bytes,
-        request.size,
-        flicken.size(),
-    );
-    // TODO: There is a "Ghost Page" edge case here. If `pii.next()` returns a range that
-    // spans multiple pages (Pages A and B), we might successfully mmap Page A but fail to
-    // mmap Page B. The loop will `continue` to the next candidate range, leaving Page A
-    // mapped. While harmless (it becomes an unused executable page), it is technically a
-    // memory leak. A future fix should track "current attempt" pages separately and unmap
-    // them on failure.
-    while (pii.next(.{ .count = 256 })) |allocated_range| {
-        try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(allocated_range));
-        ensureRangeWritable(
-            allocated_range,
-            pages_made_writable,
-        ) catch |err| switch (err) {
-            error.MappingAlreadyExists => continue,
-            else => return err,
-        };
+    const flicken_size = flicken.size(); // bytes.len + 5
+    const source_addr = @intFromPtr(region.ptr) + request.offset;
 
-        applyPatch(
-            request,
-            flicken,
-            allocated_range,
-            pii.num_prefixes,
-        ) catch |err| switch (err) {
-            error.RelocationOverflow => continue,
-            else => return err,
-        };
+    for (0..prefixes.len + 1) |num_prefixes_usize| {
+        const num_prefixes: u8 = @intCast(num_prefixes_usize);
 
-        try address_allocator.block(gpa, allocated_range, 0);
-        const lock_size = jump_rel32_size + pii.num_prefixes;
-        locked_bytes.setRangeValue(
-            .{ .start = request.offset, .end = request.offset + lock_size },
-            true,
-        );
+        // Tactics T1 pads with prefixes. 5 is the size of `jmp rel32`.
+        const lock_size = j_rel32_size + num_prefixes;
+        if (request.offset + lock_size > region.len) continue;
+        if (num_prefixes + 1 > request.size) continue;
 
-        if (request.size >= 5) {
-            // assert(pii.num_prefixes == 0);
-            stats.jump += 1;
-        } else {
-            stats.punning[pii.num_prefixes] += 1;
+        for (0..lock_size) |i| {
+            if (locked_bytes.isSet(request.offset + i)) {
+                return null;
+            }
         }
-        return true;
+
+        // Construct bitwise constraint if our jump spills over the instruction bounds
+        var mask: u32 = 0;
+        var pattern: u32 = 0;
+        for (0..4) |i| {
+            const byte_offset = num_prefixes + 1 + i;
+            if (byte_offset >= request.size) {
+                const existing_byte = request.bytes[byte_offset];
+                mask |= @as(u32, 0xFF) << @intCast(i * 8);
+                pattern |= @as(u32, existing_byte) << @intCast(i * 8);
+            }
+        }
+
+        const jump_source = source_addr + num_prefixes + j_rel32_size;
+
+        const alloc_request = AddressAllocator.Request{
+            .source = jump_source,
+            .size = flicken_size,
+            .valid_range = .{
+                // TODO: calculate from flicken size
+                // TODO: use relocation information if needed
+                .start = @max(0, @as(i64, @intCast(source_addr)) - 0x7FFF0000), // ~2GB
+                .end = @as(i64, @intCast(source_addr)) + 0x7FFF0000,
+            },
+            .mask = mask,
+            .pattern = pattern,
+        };
+
+        const tramp_range = patcher.address_allocator.findAllocation(alloc_request) orelse continue;
+        var patch = Patch{ .kind = .active };
+
+        // Populate Trampoline
+        patch.trampoline_addr = @intCast(tramp_range.start);
+        patch.trampoline_len = @intCast(flicken_size);
+        @memcpy(patch.trampoline_bytes[0..flicken.bytes.len], flicken.bytes);
+
+        // Relocate if NOP
+        if (request.flicken == .nop) {
+            const instr = dis.disassembleInstruction(request.bytes[0..request.size]).?;
+            const reloc_info = reloc.RelocInfo{
+                .instr = instr,
+                .old_addr = source_addr,
+            };
+            reloc.relocateInstruction(
+                reloc_info.instr,
+                patch.trampoline_addr,
+                patch.trampoline_bytes[0..flicken.bytes.len],
+            ) catch |err| switch (err) {
+                // TODO: when we use relocation information to restrict the range for the request
+                // this shouldn't happen anymore.
+                error.RelocationOverflow => continue, // try next prefix/hole
+                else => return err,
+            };
+        }
+
+        // Jump back from trampoline to original stream
+        const ret_addr = source_addr + request.size;
+        const tramp_jump_source = patch.trampoline_addr + flicken.bytes.len + j_rel32_size;
+        const tramp_disp: i32 = @intCast(@as(i64, @intCast(ret_addr)) - @as(i64, @intCast(tramp_jump_source)));
+
+        patch.trampoline_bytes[flicken.bytes.len] = j_rel32;
+        mem.writeInt(i32, patch.trampoline_bytes[flicken.bytes.len + 1 ..][0..4], tramp_disp, .little);
+
+        // Populate Source Jump
+        patch.source_addr = source_addr;
+        patch.source_len = @intCast(@max(request.size, lock_size));
+        @memset(patch.source_bytes[0..patch.source_len], int3); // Clean padding
+
+        if (num_prefixes > 0) {
+            @memcpy(patch.source_bytes[0..num_prefixes], prefixes[0..num_prefixes]);
+        }
+        patch.source_bytes[num_prefixes] = j_rel32;
+        const source_disp: i32 = @intCast(tramp_range.start - @as(i64, @intCast(jump_source)));
+        mem.writeInt(i32, patch.source_bytes[num_prefixes + 1 ..][0..4], source_disp, .little);
+
+        patch.lock_offset = request.offset;
+        patch.lock_len = lock_size;
+
+        const tactic: Tactic = if (num_prefixes == 0 and request.size >= 5)
+            .jump
+        else
+            .{ .punning = num_prefixes };
+        return .{ .patches = .{ patch, .{} }, .tactic = tactic };
     }
-    return false;
+    return null;
+}
+
+test "attemptDirectOrPunning - Direct Jump (>= 5 bytes)" {
+    var patcher = try Patcher.init(testing.allocator);
+    defer patcher.deinit();
+
+    // Simulate code memory at a known location
+    var region: [1024]u8 align(page_size) = undefined;
+    @memset(&region, nop);
+    // Put a 5-byte instruction at offset 0: mov eax, 1 (B8 01 00 00 00)
+    const instr = "\xB8\x01\x00\x00\x00";
+    @memcpy(region[0..instr.len], instr);
+
+    const source_addr = @intFromPtr(&region);
+
+    // Block everything except a hole at offset 0x2000
+    try patcher.address_allocator.block(.{ .start = 0, .end = @intCast(source_addr + 0x2000) });
+    try patcher.address_allocator.block(.{
+        .start = @intCast(source_addr + 0x3000),
+        .end = @intCast(source_addr + 0x10000000),
+    });
+
+    const request = PatchRequest{
+        .flicken = .nop,
+        .offset = 0,
+        .size = instr.len,
+        .bytes = region[0..],
+    };
+
+    var locked_bytes = try std.DynamicBitSetUnmanaged.initEmpty(testing.allocator, region.len);
+    defer locked_bytes.deinit(testing.allocator);
+
+    const patch_opt = try attemptDirectOrPunning(&patcher, request, &region, locked_bytes);
+    try testing.expect(patch_opt != null);
+    const patch = patch_opt.?.patches[0];
+
+    try testing.expectEqual(.active, patch.kind);
+
+    try testing.expectEqual(source_addr, patch.source_addr);
+    try testing.expectEqual(5, patch.source_len);
+    try testing.expectEqual(0xE9, patch.source_bytes[0]);
+
+    try testing.expectEqual(source_addr + 0x2000, patch.trampoline_addr);
+
+    // Trampoline bytes should be [B8 01 00 00 00][E9 xx xx xx xx]
+    try testing.expectEqual(instr.len + 5, patch.trampoline_len);
+    try testing.expectEqualSlices(u8, instr, patch.trampoline_bytes[0..5]);
+    try testing.expectEqual(0xE9, patch.trampoline_bytes[5]);
+}
+
+test "attemptDirectOrPunning - Punning (< 5 bytes)" {
+    var patcher = try Patcher.init(testing.allocator);
+    defer patcher.deinit();
+
+    var region: [1024]u8 align(page_size) = undefined;
+    @memset(&region, nop);
+    // Put a 2-byte instruction at offset 0: xor eax, eax (31 C0)
+    // Followed by 3 bytes of a successor we MUST pun into: 0xAA 0xBB 0xCC
+    const instr = "\x31\xC0\x11\x22\x33";
+    @memcpy(region[0..instr.len], instr);
+    const target_addr = @intFromPtr(&region) + 5 + 0x33221100;
+
+    try patcher.address_allocator.block(.{ .start = 0, .end = @intCast(target_addr) });
+    try patcher.address_allocator.block(.{
+        .start = @intCast(target_addr + 100),
+        .end = math.maxInt(i64),
+    });
+
+    const request = PatchRequest{
+        .flicken = .nop,
+        .offset = 0,
+        .size = 2,
+        .bytes = region[0..],
+    };
+
+    var locked_bytes = try std.DynamicBitSetUnmanaged.initEmpty(testing.allocator, region.len);
+    defer locked_bytes.deinit(testing.allocator);
+
+    const patch_opt = try attemptDirectOrPunning(&patcher, request, &region, locked_bytes);
+    try testing.expect(patch_opt != null);
+
+    const p = patch_opt.?.patches[0];
+
+    try testing.expectEqual(5, p.source_len); // 5 bytes overwritten
+    try testing.expectEqual(0xE9, p.source_bytes[0]);
+
+    // The jump offset MUST exactly match the 3 bytes we spilled into!
+    try testing.expectEqual(0x11, p.source_bytes[2]);
+    try testing.expectEqual(0x22, p.source_bytes[3]);
+    try testing.expectEqual(0x33, p.source_bytes[4]);
+    try testing.expectEqual(target_addr, p.trampoline_addr);
 }
 
 fn attemptSuccessorEviction(
+    patcher: *Patcher,
     request: PatchRequest,
-    arena: mem.Allocator,
-    locked_bytes: *std.DynamicBitSetUnmanaged,
-    pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
-    stats: *Statistics,
-) !bool {
-    // Disassemble Successor and create request and flicken for it.
-    const succ_instr = dis.disassembleInstruction(request.bytes[request.size..]) orelse return false;
-    const succ_request = PatchRequest{
-        .flicken = .nop,
-        .size = succ_instr.instruction.length,
-        .bytes = request.bytes[request.size..],
-        .offset = request.offset + request.size,
-    };
-    const succ_flicken = Flicken{
-        .name = "nop",
-        .bytes = succ_request.bytes[0..succ_request.size],
-    };
+    region: []align(page_size) u8,
+    locked_bytes: std.DynamicBitSetUnmanaged,
+) !?PatchResult {
+    const k = request.size;
+    assert(k < 5);
+    assert(k > 0);
 
-    for (0..succ_request.size) |i| {
-        if (locked_bytes.isSet(succ_request.offset + i)) return false;
+    const source_addr = @intFromPtr(region.ptr) + request.offset;
+    const succ_offset = request.offset + k;
+    if (succ_offset >= region.len) return null;
+
+    // Disassemble the Successor Instruction
+    const succ_instr_bundle = dis.disassembleInstruction(region[succ_offset..]) orelse return null;
+    const succ_size = succ_instr_bundle.instruction.length;
+
+    // The total physical bytes we will overwrite.
+    // k + 5 covers both jumps. We may need to pad up to the end of the successor.
+    const lock_size = @max(k + 5, k + succ_size);
+    if (request.offset + lock_size > region.len) return null;
+
+    for (0..lock_size) |i| {
+        if (locked_bytes.isSet(request.offset + i)) {
+            return null;
+        }
     }
 
-    // Save original bytes for reverting the change.
-    var succ_orig_bytes: [15]u8 = undefined;
-    @memcpy(
-        succ_orig_bytes[0..succ_request.size],
-        succ_request.bytes[0..succ_request.size],
-    );
+    const flicken: Flicken = if (request.flicken == .nop)
+        .{ .name = "nop", .bytes = request.bytes[0..request.size] }
+    else
+        patcher.flicken_templates.values()[@intFromEnum(request.flicken)];
+    const flicken_size = flicken.size();
 
-    var succ_pii = PatchInstructionIterator.init(
-        succ_request.bytes,
-        succ_request.size,
-        succ_flicken.size(),
-    );
-    while (succ_pii.next(.{ .count = 16 })) |succ_range| {
-        // Ensure bytes match original before retry.
-        assert(mem.eql(
-            u8,
-            succ_request.bytes[0..succ_request.size],
-            succ_orig_bytes[0..succ_request.size],
-        ));
+    const succ_flicken = Flicken{
+        .name = "nop",
+        .bytes = region[succ_offset .. succ_offset + succ_size],
+    };
+    const succ_flicken_size = succ_flicken.size();
 
-        try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(succ_range));
-        ensureRangeWritable(
-            succ_range,
-            pages_made_writable,
+    const jump_source1 = source_addr + j_rel32_size;
+    const jump_source2 = source_addr + k + j_rel32_size;
+
+    // If the successor jump (5 bytes) spills over the successor instruction bounds, we must
+    // constrain R2 to not corrupt the instruction after the successor.
+    var r2_mask: u32 = 0;
+    var r2_pattern: u32 = 0;
+    for (0..4) |i| {
+        if (1 + i >= succ_size) {
+            const existing_byte = region[succ_offset + 1 + i];
+            r2_mask |= @as(u32, 0xFF) << @intCast(i * 8);
+            r2_pattern |= @as(u32, existing_byte) << @intCast(i * 8);
+        }
+    }
+
+    // Both requests look in the ~2GB window.
+    // TODO: Adjust window using RIP-relative relocation information
+    const window: i64 = 0x7FFF0000;
+    const valid_range1 = Range{
+        .start = @max(0, @as(i64, @intCast(jump_source1)) - window),
+        .end = @as(i64, @intCast(jump_source1)) + window,
+    };
+    const valid_range2 = Range{
+        .start = @max(0, @as(i64, @intCast(jump_source2)) - window),
+        .end = @as(i64, @intCast(jump_source2)) + window,
+    };
+
+    const r1 = AddressAllocator.Request{
+        .source = jump_source1,
+        .size = flicken_size,
+        .valid_range = valid_range1,
+        .mask = 0,
+        .pattern = 0,
+    };
+    const r2 = AddressAllocator.Request{
+        .source = jump_source2,
+        .size = succ_flicken_size,
+        .valid_range = valid_range2,
+        .mask = r2_mask,
+        .pattern = r2_pattern,
+    };
+
+    const coupled_alloc = patcher.address_allocator.findCoupledAllocation(k, r1, r2) orelse return null;
+    const tramp1_range = coupled_alloc[0];
+    const tramp2_range = coupled_alloc[1];
+
+    var patch1 = Patch{ .kind = .active };
+    var patch2 = Patch{ .kind = .active };
+
+    // Populate Successor Trampoline
+    patch2.trampoline_addr = @intCast(tramp2_range.start);
+    patch2.trampoline_len = @intCast(succ_flicken_size);
+    @memcpy(patch2.trampoline_bytes[0..succ_size], succ_flicken.bytes);
+
+    const reloc_info2 = reloc.RelocInfo{
+        .instr = succ_instr_bundle,
+        .old_addr = source_addr + k,
+    };
+    reloc.relocateInstruction(
+        reloc_info2.instr,
+        patch2.trampoline_addr,
+        patch2.trampoline_bytes[0..succ_size],
+    ) catch |err| switch (err) {
+        error.RelocationOverflow => return null,
+        else => return err,
+    };
+
+    const tramp2_jump_source = patch2.trampoline_addr + succ_size + j_rel32_size;
+    const tramp2_disp: i32 = @intCast(@as(i64, @intCast(source_addr + k + succ_size)) - @as(i64, @intCast(tramp2_jump_source)));
+    patch2.trampoline_bytes[succ_size] = j_rel32;
+    mem.writeInt(i32, patch2.trampoline_bytes[succ_size + 1 ..][0..4], tramp2_disp, .little);
+
+    // Populate Original Trampoline and Source Replacements
+    patch1.trampoline_addr = @intCast(tramp1_range.start);
+    patch1.trampoline_len = @intCast(flicken_size);
+    @memcpy(patch1.trampoline_bytes[0..flicken.bytes.len], flicken.bytes);
+
+    if (request.flicken == .nop) {
+        const instr_bundle = dis.disassembleInstruction(request.bytes[0..k]).?;
+        const reloc_info1 = reloc.RelocInfo{
+            .instr = instr_bundle,
+            .old_addr = source_addr,
+        };
+        reloc.relocateInstruction(
+            reloc_info1.instr,
+            patch1.trampoline_addr,
+            patch1.trampoline_bytes[0..flicken.bytes.len],
         ) catch |err| switch (err) {
-            error.MappingAlreadyExists => continue,
+            error.RelocationOverflow => return null,
             else => return err,
         };
+    }
 
-        applyPatch(
-            succ_request,
-            succ_flicken,
-            succ_range,
-            succ_pii.num_prefixes,
-        ) catch |err| switch (err) {
-            error.RelocationOverflow => continue,
-            else => return err,
-        };
+    // T1 returns to the Successor's jump (which is at source_addr + k)
+    const tramp1_jump_source: i64 = @intCast(patch1.trampoline_addr + flicken.bytes.len + j_rel32_size);
+    const tramp1_disp: i32 = @intCast(@as(i64, @intCast(source_addr + k)) -
+        @as(i64, @intCast(tramp1_jump_source)));
+    patch1.trampoline_bytes[flicken.bytes.len] = j_rel32;
+    mem.writeInt(i32, patch1.trampoline_bytes[flicken.bytes.len + 1 ..][0..4], tramp1_disp, .little);
 
-        // Now that the successor is patched, we can patch the original request.
-        const flicken: Flicken = if (request.flicken == .nop)
-            .{ .name = "nop", .bytes = request.bytes[0..request.size] }
-        else
-            flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
+    // Populate the overlapping jumps in the original code stream
+    // Because they physically overlap, Patch 1 handles both J1 and J2 writing.
+    patch1.source_addr = source_addr;
+    patch1.source_len = @intCast(lock_size);
+    @memset(patch1.source_bytes[0..lock_size], int3);
 
-        var orig_pii = PatchInstructionIterator.init(
-            request.bytes,
-            request.size,
-            flicken.size(),
-        );
-        while (orig_pii.next(.{ .count = 16 })) |orig_range| {
-            if (succ_range.touches(orig_range)) continue;
-            try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(orig_range));
-            ensureRangeWritable(
-                orig_range,
-                pages_made_writable,
-            ) catch |err| switch (err) {
-                error.MappingAlreadyExists => continue,
-                else => return err,
+    // Write Successor Jump First
+    patch1.source_bytes[k] = j_rel32;
+    const rel2: i32 = @intCast(tramp2_range.start - @as(i64, @intCast(jump_source2)));
+    mem.writeInt(i32, patch1.source_bytes[k + 1 ..][0..4], rel2, .little);
+
+    // Write Original Jump Over The Top
+    patch1.source_bytes[0] = j_rel32;
+    const rel1: i32 = @intCast(tramp1_range.start - @as(i64, @intCast(jump_source1)));
+    mem.writeInt(i32, patch1.source_bytes[1..][0..4], rel1, .little);
+
+    patch1.lock_offset = request.offset;
+    patch1.lock_len = lock_size;
+
+    return .{ .patches = .{ patch1, patch2 }, .tactic = .successor_eviction };
+}
+
+test "attemptSuccessorEviction - K=2" {
+    var patcher = try Patcher.init(testing.allocator);
+    defer patcher.deinit();
+
+    var region: [1024]u8 align(page_size) = undefined;
+    @memset(&region, nop);
+
+    // Instruction 1 (J1): xor eax, eax (31 C0) -> 2 bytes
+    // Instruction 2 (J2): mov eax, 1 (B8 01 00 00 00) -> 5 bytes
+    const instr = "\x31\xC0\xB8\x01\x00\x00\x00";
+    @memcpy(region[0..instr.len], instr);
+
+    const request = PatchRequest{
+        .flicken = .nop,
+        .offset = 0,
+        .size = 2,
+        .bytes = region[0..],
+    };
+
+    const source_addr = @intFromPtr(&region);
+
+    // We block the immediate area to force the solver to search for a coupled solution.
+    try patcher.address_allocator.block(.{ .start = 0, .end = @intCast(source_addr + 0x2000) });
+
+    var locked_bytes = try std.DynamicBitSetUnmanaged.initEmpty(testing.allocator, region.len);
+    defer locked_bytes.deinit(testing.allocator);
+
+    var instruction_starts = try std.DynamicBitSetUnmanaged.initEmpty(testing.allocator, region.len);
+    defer instruction_starts.deinit(testing.allocator);
+    instruction_starts.set(0);
+    instruction_starts.set(2);
+
+    const patches_opt = try attemptSuccessorEviction(&patcher, request, &region, locked_bytes);
+    try testing.expect(patches_opt != null);
+    const patches = patches_opt.?.patches;
+
+    try testing.expectEqual(.active, patches[0].kind);
+    try testing.expectEqual(.active, patches[1].kind);
+
+    const p1 = patches[0];
+    try testing.expectEqual(source_addr, p1.source_addr);
+
+    // k=2, succ_size=5 -> lock_size = max(2+5, 2+5) = 7
+    try testing.expectEqual(7, p1.source_len);
+
+    // Verify mathematical overlap worked
+    try testing.expectEqual(0xE9, p1.source_bytes[0]); // J1 Opcode
+    try testing.expectEqual(0xE9, p1.source_bytes[2]); // J2 Opcode is perfectly preserved!
+
+    const rel1 = mem.readInt(i32, p1.source_bytes[1..5], .little);
+    const rel2 = mem.readInt(i32, p1.source_bytes[3..7], .little);
+
+    // The top 2 bytes of rel1 MUST exactly match the bottom 2 bytes of rel2
+    const u_rel1: u32 = @bitCast(rel1);
+    const u_rel2: u32 = @bitCast(rel2);
+    try testing.expectEqual((u_rel1 >> 16) & 0xFFFF, u_rel2 & 0xFFFF);
+}
+
+fn attemptNeighborEviction(
+    patcher: *Patcher,
+    request: PatchRequest,
+    region: []align(page_size) u8,
+    instruction_starts: std.DynamicBitSetUnmanaged,
+    locked_bytes: std.DynamicBitSetUnmanaged,
+) !?PatchResult {
+    // Neighbor Eviction requires at least 2 bytes for the short jump (0xEB <disp>)
+    if (request.size < 2) return null;
+
+    const source_addr = @intFromPtr(region.ptr) + request.offset;
+    const start_offset = request.offset + 2;
+    // Valid short jump displacement is [-128, 127]. We only look forward to avoid evicting
+    // instructions we haven't patched yet.
+    const end_offset = @min(start_offset + 128, region.len);
+
+    const flicken: Flicken = if (request.flicken == .nop)
+        .{ .name = "nop", .bytes = request.bytes[0..request.size] }
+    else
+        patcher.flicken_templates.values()[@intFromEnum(request.flicken)];
+    const flicken_size = flicken.size();
+
+    neighbor: for (start_offset..end_offset) |neighbor_offset| {
+        if (!instruction_starts.isSet(neighbor_offset)) continue;
+
+        const victim_bytes_all = region[neighbor_offset..];
+        const victim_instr_bundle = dis.disassembleInstruction(victim_bytes_all) orelse continue;
+        const victim_size = victim_instr_bundle.instruction.length;
+
+        for (0..victim_size) |i| {
+            if (locked_bytes.isSet(neighbor_offset + i)) continue :neighbor;
+        }
+
+        const neighbor_addr = source_addr + (neighbor_offset - request.offset);
+
+        // Try to split the victim instruction at offset `k`
+        var k: u8 = 1;
+        while (k < victim_size) : (k += 1) {
+            const victim_lock_size = @max(victim_size, k + j_rel32_size);
+            if (neighbor_offset + victim_lock_size > region.len) continue;
+
+            // Calculate short jump displacement (from end of original instruction to J_P)
+            const target_offset: i64 = @intCast(neighbor_offset + k);
+            const source_end_offset: i64 = @intCast(request.offset + 2);
+            const disp = target_offset - source_end_offset;
+            if (disp > 127 or disp < -128) continue;
+
+            // Ensure our J_P spill doesn't corrupt already locked bytes
+            for (victim_size..victim_lock_size) |i| {
+                if (locked_bytes.isSet(neighbor_offset + i)) continue;
+            }
+
+            // Build constraint for J_P (the Patch jump)
+            var rp_mask: u32 = 0;
+            var rp_pattern: u32 = 0;
+            for (0..4) |i| {
+                const byte_offset = k + 1 + i;
+                if (byte_offset >= victim_size) {
+                    const existing_byte = region[neighbor_offset + byte_offset];
+                    rp_mask |= @as(u32, 0xFF) << @intCast(i * 8);
+                    rp_pattern |= @as(u32, existing_byte) << @intCast(i * 8);
+                }
+            }
+
+            const jump_source_V = neighbor_addr + j_rel32_size;
+            const jump_source_P = neighbor_addr + k + j_rel32_size;
+
+            // Look in the ~2GB window
+            const window: i64 = 0x7FFF0000;
+            const r_V = AddressAllocator.Request{
+                .source = jump_source_V,
+                .size = victim_size + j_rel32_size,
+                .valid_range = .{
+                    .start = @max(0, @as(i64, @intCast(jump_source_V)) - window),
+                    .end = @as(i64, @intCast(jump_source_V)) + window,
+                },
+                .mask = 0,
+                .pattern = 0,
+            };
+            const r_P = AddressAllocator.Request{
+                .source = jump_source_P,
+                .size = flicken_size,
+                .valid_range = .{
+                    .start = @max(0, @as(i64, @intCast(jump_source_P)) - window),
+                    .end = @as(i64, @intCast(jump_source_P)) + window,
+                },
+                .mask = rp_mask,
+                .pattern = rp_pattern,
             };
 
-            applyPatch(
-                request,
-                flicken,
-                orig_range,
-                orig_pii.num_prefixes,
+            const coupled_alloc = patcher.address_allocator.findCoupledAllocation(k, r_V, r_P) orelse continue;
+            const tramp_V_range = coupled_alloc[0];
+            const tramp_P_range = coupled_alloc[1];
+
+            var patch1 = Patch{ .kind = .active };
+            var patch2 = Patch{ .kind = .active };
+
+            // Patch 1: Original Short Jump + Flicken Trampoline
+            patch1.source_addr = source_addr;
+            patch1.source_len = request.size;
+            @memset(patch1.source_bytes[0..patch1.source_len], int3);
+            patch1.source_bytes[0] = j_rel8;
+            patch1.source_bytes[1] = @intCast(disp);
+
+            patch1.trampoline_addr = @intCast(tramp_P_range.start);
+            patch1.trampoline_len = @intCast(flicken_size);
+            @memcpy(patch1.trampoline_bytes[0..flicken.bytes.len], flicken.bytes);
+
+            if (request.flicken == .nop) {
+                const reloc_info_p = reloc.RelocInfo{
+                    .instr = dis.disassembleInstruction(request.bytes[0..request.size]).?,
+                    .old_addr = source_addr,
+                };
+                reloc.relocateInstruction(
+                    reloc_info_p.instr,
+                    patch1.trampoline_addr,
+                    patch1.trampoline_bytes[0..flicken.bytes.len],
+                ) catch |err| switch (err) {
+                    error.RelocationOverflow => continue,
+                    else => return err,
+                };
+            }
+
+            const tramp_P_jump_source = patch1.trampoline_addr + flicken.bytes.len + j_rel32_size;
+            const tramp_P_disp: i32 = @intCast(@as(i64, @intCast(source_addr + request.size)) - @as(i64, @intCast(tramp_P_jump_source)));
+            patch1.trampoline_bytes[flicken.bytes.len] = j_rel32;
+            mem.writeInt(i32, patch1.trampoline_bytes[flicken.bytes.len + 1 ..][0..4], tramp_P_disp, .little);
+
+            patch1.lock_offset = request.offset;
+            patch1.lock_len = request.size;
+
+            // Patch 2: Victim Coupled Jump + Victim Trampoline
+            patch2.source_addr = neighbor_addr;
+            patch2.source_len = @intCast(victim_lock_size);
+            @memset(patch2.source_bytes[0..patch2.source_len], int3);
+
+            // Write J_P (The jump targeted by our short jump) at offset k
+            patch2.source_bytes[k] = j_rel32;
+            const rel_P: i32 = @intCast(tramp_P_range.start - @as(i64, @intCast(jump_source_P)));
+            mem.writeInt(i32, patch2.source_bytes[k + 1 ..][0..4], rel_P, .little);
+
+            // Write J_V (The victim's jump) at offset 0
+            patch2.source_bytes[0] = j_rel32;
+            const rel_V: i32 = @intCast(tramp_V_range.start - @as(i64, @intCast(jump_source_V)));
+            mem.writeInt(i32, patch2.source_bytes[1..][0..4], rel_V, .little);
+
+            patch2.trampoline_addr = @intCast(tramp_V_range.start);
+            patch2.trampoline_len = @intCast(victim_size + j_rel32_size);
+            @memcpy(patch2.trampoline_bytes[0..victim_size], victim_bytes_all[0..victim_size]);
+
+            const reloc_info_v = reloc.RelocInfo{
+                .instr = victim_instr_bundle,
+                .old_addr = neighbor_addr,
+            };
+            reloc.relocateInstruction(
+                reloc_info_v.instr,
+                patch2.trampoline_addr,
+                patch2.trampoline_bytes[0..victim_size],
             ) catch |err| switch (err) {
                 error.RelocationOverflow => continue,
                 else => return err,
             };
 
-            try address_allocator.block(gpa, succ_range, 0);
-            try address_allocator.block(gpa, orig_range, 0);
-            const lock_size = request.size + jump_rel32_size + succ_pii.num_prefixes;
-            locked_bytes.setRangeValue(
-                .{ .start = request.offset, .end = request.offset + lock_size },
-                true,
-            );
-            stats.successor_eviction += 1;
-            return true;
-        }
+            const tramp_V_jump_source = patch2.trampoline_addr + victim_size + j_rel32_size;
+            const tramp_V_disp: i32 = @intCast(@as(i64, @intCast(neighbor_addr + victim_size)) - @as(i64, @intCast(tramp_V_jump_source)));
+            patch2.trampoline_bytes[victim_size] = j_rel32;
+            mem.writeInt(i32, patch2.trampoline_bytes[victim_size + 1 ..][0..4], tramp_V_disp, .little);
 
-        // We couldn't patch with the bytes. So revert to original ones.
-        @memcpy(
-            succ_request.bytes[0..succ_request.size],
-            succ_orig_bytes[0..succ_request.size],
-        );
-    }
-    return false;
-}
+            patch2.lock_offset = neighbor_offset;
+            patch2.lock_len = victim_lock_size;
 
-fn attemptNeighborEviction(
-    request: PatchRequest,
-    arena: mem.Allocator,
-    locked_bytes: *std.DynamicBitSetUnmanaged,
-    pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
-    instruction_starts: *const std.DynamicBitSetUnmanaged,
-    stats: *Statistics,
-) !bool {
-    // Valid neighbors must be within [-128, 127] range for a short jump.
-    // Since we patch back-to-front, we only look at neighbors *after* the current instruction
-    // (higher address) to avoid evicting an instruction we haven't processed/patched yet.
-    const start_offset = request.offset + 2;
-    const end_offset = @min(
-        start_offset + 128,
-        request.bytes.len + request.offset,
-    );
-
-    neighbor: for (start_offset..end_offset) |neighbor_offset| {
-        if (!instruction_starts.isSet(neighbor_offset)) continue;
-
-        const victim_bytes_all = request.bytes[neighbor_offset - request.offset ..];
-
-        // PERF: We could also search for the next set bit in instruction_starts
-        const victim_instr = dis.disassembleInstruction(victim_bytes_all) orelse continue;
-        const victim_size = victim_instr.instruction.length;
-        const victim_bytes = victim_bytes_all[0..victim_size];
-
-        for (0..victim_size) |i| {
-            if (locked_bytes.isSet(neighbor_offset + i)) {
-                continue :neighbor;
-            }
-        }
-
-        // Save original bytes to revert if constraints cannot be solved.
-        var victim_orig_bytes: [15]u8 = undefined;
-        @memcpy(victim_orig_bytes[0..victim_size], victim_bytes);
-
-        // OUTER LOOP: J_Patch
-        // Iterate possible offsets 'k' inside the victim for the patch jump.
-        var k: u8 = 1;
-        while (k < victim_size) : (k += 1) {
-            const target: i64 = @intCast(neighbor_offset + k);
-            const source: i64 = @intCast(request.offset + 2);
-            const disp = target - source;
-            if (disp > 127 or disp < -128) continue;
-
-            const patch_flicken: Flicken = if (request.flicken == .nop)
-                .{ .name = "nop", .bytes = request.bytes[0..request.size] }
-            else
-                flicken_templates.entries.get(@intFromEnum(request.flicken)).value;
-
-            // Constraints for J_Patch:
-            // Bytes [0 .. victim_size - k] are free (inside victim).
-            // Bytes [victim_size - k .. ] are used (outside victim, immutable).
-            var patch_pii = PatchInstructionIterator.init(
-                victim_bytes_all[k..],
-                @intCast(victim_size - k),
-                patch_flicken.size(),
-            );
-
-            while (patch_pii.next(.{ .count = 16 })) |patch_range| {
-                // J_Patch MUST NOT use prefixes, because it's punned inside J_Victim.
-                // Adding prefixes would shift J_Patch relative to J_Victim, making constraints harder.
-                if (patch_pii.num_prefixes > 0) break;
-
-                try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(patch_range));
-                ensureRangeWritable(patch_range, pages_made_writable) catch |err| switch (err) {
-                    error.MappingAlreadyExists => continue,
-                    else => return err,
-                };
-
-                // Tentatively write J_Patch to memory to set constraints for J_Victim.
-                // We only need to write the bytes of J_Patch that land inside the victim.
-                {
-                    const jmp_target = patch_range.start;
-                    const jmp_source: i64 = @intCast(@intFromPtr(&victim_bytes_all[k]) + 5);
-                    const rel32: i32 = @intCast(jmp_target - jmp_source);
-                    victim_bytes_all[k] = jump_rel32;
-                    mem.writeInt(i32, victim_bytes_all[k + 1 ..][0..4], rel32, .little);
-                }
-
-                // INNER LOOP: J_Victim
-                // Constraints:
-                // Bytes [0 .. k] are free (before J_Patch).
-                // Bytes [k .. ] are used (overlap J_Patch).
-                const victim_flicken = Flicken{
-                    .name = "nop",
-                    .bytes = victim_orig_bytes[0..victim_size],
-                };
-
-                var victim_pii = PatchInstructionIterator.init(
-                    victim_bytes_all,
-                    k,
-                    victim_flicken.size(),
-                );
-
-                while (victim_pii.next(.{ .count = 16 })) |victim_range| {
-                    if (patch_range.touches(victim_range)) continue;
-
-                    try pages_made_writable.ensureUnusedCapacity(arena, touchedPageCount(victim_range));
-                    ensureRangeWritable(victim_range, pages_made_writable) catch |err| switch (err) {
-                        error.MappingAlreadyExists => continue,
-                        else => return err,
-                    };
-
-                    // SUCCESS! Commit everything.
-
-                    // 1. Write Patch Trampoline (J_Patch target)
-                    {
-                        const trampoline: [*]u8 = @ptrFromInt(patch_range.getStart(u64));
-                        var reloc_info: ?RelocInfo = null;
-                        if (request.flicken == .nop) {
-                            reloc_info = .{
-                                .instr = dis.disassembleInstruction(patch_flicken.bytes).?,
-                                .old_addr = @intFromPtr(request.bytes.ptr),
-                            };
-                        }
-                        commitTrampoline(
-                            trampoline,
-                            patch_flicken.bytes,
-                            reloc_info,
-                            @intFromPtr(request.bytes.ptr) + request.size,
-                        ) catch |err| switch (err) {
-                            error.RelocationOverflow => continue,
-                            else => return err,
-                        };
-                    }
-
-                    // 2. Write Victim Trampoline (J_Victim target)
-                    {
-                        const trampoline: [*]u8 = @ptrFromInt(victim_range.getStart(u64));
-                        commitTrampoline(
-                            trampoline,
-                            victim_orig_bytes[0..victim_size],
-                            .{
-                                .instr = dis.disassembleInstruction(victim_orig_bytes[0..victim_size]).?,
-                                .old_addr = @intFromPtr(victim_bytes_all.ptr),
-                            },
-                            @intFromPtr(victim_bytes_all.ptr) + victim_size,
-                        ) catch |err| switch (err) {
-                            error.RelocationOverflow => continue,
-                            else => return err,
-                        };
-                    }
-
-                    // 3. Write J_Victim (overwrites head of J_Patch which is fine)
-                    commitJump(
-                        victim_bytes_all.ptr,
-                        @intCast(victim_range.start),
-                        victim_pii.num_prefixes,
-                        k, // Total size for padding is limited to k to preserve J_Patch tail
-                    );
-
-                    // 4. Write J_Short at request
-                    request.bytes[0] = jump_rel8;
-                    request.bytes[1] = @intCast(disp);
-                    if (request.size > 2) {
-                        @memset(request.bytes[2..request.size], int3);
-                    }
-
-                    // 5. Locking
-                    try address_allocator.block(gpa, patch_range, 0);
-                    try address_allocator.block(gpa, victim_range, 0);
-
-                    locked_bytes.setRangeValue(
-                        .{ .start = request.offset, .end = request.offset + request.size },
-                        true,
-                    );
-                    // Lock victim range + any extension of J_Patch
-                    const j_patch_end = neighbor_offset + k + 5;
-                    const lock_end = @max(neighbor_offset + victim_size, j_patch_end);
-                    locked_bytes.setRangeValue(
-                        .{ .start = neighbor_offset, .end = lock_end },
-                        true,
-                    );
-
-                    stats.neighbor_eviction += 1;
-                    return true;
-                }
-
-                // Revert J_Patch write for next iteration
-                @memcpy(victim_bytes, victim_orig_bytes[0..victim_size]);
-            }
+            return PatchResult{ .patches = .{ patch1, patch2 }, .tactic = .neighbor_eviction };
         }
     }
-
-    return false;
+    return null;
 }
 
-/// Applies a standard patch (T1/B1/B2) where the instruction is replaced by a jump to a trampoline.
-///
-/// This handles the logic of writing the trampoline content (including relocation) and
-/// overwriting the original instruction with a `JMP` (plus prefixes/padding).
-fn applyPatch(
-    request: PatchRequest,
-    flicken: Flicken,
-    allocated_range: Range,
-    num_prefixes: u8,
-) !void {
-    const flicken_addr: [*]u8 = @ptrFromInt(allocated_range.getStart(u64));
+test "attemptNeighborEviction - Valid Neighbor Found" {
+    var patcher = try Patcher.init(testing.allocator);
+    defer patcher.deinit();
 
-    // Commit Trampoline
-    var reloc_info: ?RelocInfo = null;
-    if (request.flicken == .nop) {
-        reloc_info = .{
-            .instr = dis.disassembleInstruction(request.bytes[0..request.size]).?,
-            .old_addr = @intFromPtr(request.bytes.ptr),
-        };
-    }
+    var region: [1024]u8 align(page_size) = undefined;
+    @memset(&region, 0);
 
-    const ret_addr = @intFromPtr(request.bytes.ptr) + request.size;
-    try commitTrampoline(flicken_addr, flicken.bytes, reloc_info, ret_addr);
+    // Target (I): xor eax, eax (31 C0) -> 2 bytes [Offset 0]
+    // Padding: NOP NOP (90 90) -> 2 bytes [Offset 2]
+    // Neighbor (N): mov eax, 1 (B8 01 00 00 00) -> 5 bytes [Offset 4]
+    const instr = "\x31\xC0\x90\x90\xB8\x01\x00\x00\x00";
+    @memcpy(region[0..instr.len], instr);
 
-    // Commit Jump (Patch)
-    commitJump(request.bytes.ptr, @intCast(allocated_range.start), num_prefixes, request.size);
-}
+    const source_addr = @intFromPtr(&region);
 
-const RelocInfo = struct {
-    instr: dis.BundledInstruction,
-    old_addr: u64,
-};
-
-/// Helper to write code into a trampoline.
-///
-/// It copies the original bytes (or flicken content), relocates any RIP-relative instructions
-/// to be valid at the new address, and appends a jump back to the instruction stream.
-fn commitTrampoline(
-    trampoline_ptr: [*]u8,
-    content: []const u8,
-    reloc_info: ?RelocInfo,
-    return_addr: u64,
-) !void {
-    @memcpy(trampoline_ptr[0..content.len], content);
-
-    if (reloc_info) |info| {
-        try relocateInstruction(
-            info.instr,
-            @intFromPtr(trampoline_ptr),
-            trampoline_ptr[0..content.len],
-        );
-    }
-
-    // Write jump back
-    trampoline_ptr[content.len] = jump_rel32;
-    const jump_src = @intFromPtr(trampoline_ptr) + content.len + jump_rel32_size;
-    const jump_disp: i32 = @intCast(@as(i64, @intCast(return_addr)) - @as(i64, @intCast(jump_src)));
-    mem.writeInt(i32, trampoline_ptr[content.len + 1 ..][0..4], jump_disp, .little);
-}
-
-/// Helper to overwrite an instruction with a jump to a trampoline.
-///
-/// It handles writing optional prefixes (padding), the `0xE9` opcode, the relative offset,
-/// and fills any remaining bytes of the original instruction with `INT3` to prevent
-/// execution of garbage bytes.
-fn commitJump(
-    from_ptr: [*]u8,
-    to_addr: u64,
-    num_prefixes: u8,
-    total_size: usize,
-) void {
-    const prefixes_slice = from_ptr[0..num_prefixes];
-    @memcpy(prefixes_slice, prefixes[0..num_prefixes]);
-
-    from_ptr[num_prefixes] = jump_rel32;
-
-    const jump_src = @intFromPtr(from_ptr) + num_prefixes + jump_rel32_size;
-    const jump_disp: i32 = @intCast(@as(i64, @intCast(to_addr)) - @as(i64, @intCast(jump_src)));
-    mem.writeInt(i32, from_ptr[num_prefixes + 1 ..][0..4], jump_disp, .little);
-
-    const patch_end_index = num_prefixes + jump_rel32_size;
-    if (patch_end_index < total_size) {
-        @memset(from_ptr[patch_end_index..total_size], int3);
-    }
-}
-
-/// Only used for debugging.
-fn printMaps() !void {
-    const path = "/proc/self/maps";
-    var reader = try std.fs.cwd().openFile(path, .{});
-    var buffer: [1024 * 1024]u8 = undefined;
-    const size = try reader.readAll(&buffer);
-    std.debug.print("\n{s}\n", .{buffer[0..size]});
-}
-
-/// Returns the number of pages that the given range touches.
-fn touchedPageCount(range: Range) u32 {
-    const start_page = mem.alignBackward(u64, range.getStart(u64), page_size);
-    // alignBackward on (end - 1) handles the exclusive upper bound correctly
-    const end_page = mem.alignBackward(u64, range.getEnd(u64) - 1, page_size);
-    return @intCast((end_page - start_page) / page_size + 1);
-}
-
-/// Ensure `range` is mapped R|W. Assumes `pages_made_writable` has enough free capacity.
-fn ensureRangeWritable(
-    range: Range,
-    pages_made_writable: *std.AutoHashMapUnmanaged(u64, void),
-) !void {
-    const start_page = mem.alignBackward(u64, range.getStart(u64), page_size);
-    const end_page = mem.alignBackward(u64, range.getEnd(u64) - 1, page_size);
-    const protection = posix.PROT.READ | posix.PROT.WRITE;
-    var page_addr = start_page;
-    while (page_addr <= end_page) : (page_addr += page_size) {
-        // If the page is already writable, skip it.
-        if (pages_made_writable.get(page_addr)) |_| continue;
-        // If we mapped it already we have to do mprotect, else mmap.
-        const gop = try allocated_pages.getOrPut(gpa, page_addr);
-        if (gop.found_existing) {
-            const ptr: [*]align(page_size) u8 = @ptrFromInt(page_addr);
-            try posix.mprotect(ptr[0..page_size], protection);
-        } else {
-            const addr = posix.mmap(
-                @ptrFromInt(page_addr),
-                page_size,
-                protection,
-                .{ .TYPE = .PRIVATE, .ANONYMOUS = true, .FIXED_NOREPLACE = true },
-                -1,
-                0,
-            ) catch |err| switch (err) {
-                error.MappingAlreadyExists => {
-                    // If the mapping exists this means that the someone else
-                    // (executable, OS, dynamic loader,...) allocated something there.
-                    // We block this so we don't try this page again in the future,
-                    // saving a bunch of syscalls.
-                    try address_allocator.block(
-                        gpa,
-                        .{ .start = @intCast(page_addr), .end = @intCast(page_addr + page_size) },
-                        page_size,
-                    );
-                    return err;
-                },
-                else => return err,
-            };
-            assert(@as(u64, @intFromPtr(addr.ptr)) == page_addr);
-            // `gop.value_ptr.* = {};` not needed because it's void.
-        }
-        pages_made_writable.putAssumeCapacityNoClobber(page_addr, {});
-    }
-}
-
-const PatchInstructionIterator = struct {
-    bytes: []const u8, // first byte is first byte of instruction to patch.
-    instruction_size: u8,
-    flicken_size: u64,
-
-    // Internal state
-    num_prefixes: u8,
-    pli: PatchLocationIterator,
-    valid_range: Range,
-    allocated_count: u64,
-
-    fn init(
-        bytes: []const u8,
-        instruction_size: u8,
-        flicken_size: u64,
-    ) PatchInstructionIterator {
-        const patch_bytes = getPatchBytes(bytes, instruction_size, 0);
-        var pli = PatchLocationIterator.init(patch_bytes, @intFromPtr(&bytes[5]));
-        const valid_range = pli.next() orelse Range{ .start = 0, .end = 0 };
-        return .{
-            .bytes = bytes,
-            .instruction_size = instruction_size,
-            .flicken_size = flicken_size,
-            .num_prefixes = 0,
-            .pli = pli,
-            .valid_range = valid_range,
-            .allocated_count = 0,
-        };
-    }
-
-    pub const Strategy = union(enum) {
-        /// Iterates through all possible ranges.
-        /// Useful for finding the optimal allocation (fewest prefixes).
-        exhaustive: void,
-        /// Limits the search to `count` allocation attempts per valid constraint range found by the
-        /// PatchLocationIterator.
-        ///
-        /// This acts as a heuristic to prevent worst-case performance (scanning every byte of a 2GB
-        /// gap) while still offering better density than a purely greedy approach. A count of 1 is
-        /// equivalent to a greedy strategy.
-        count: u64,
+    const request = PatchRequest{
+        .flicken = .nop,
+        .offset = 0,
+        .size = 2,
+        .bytes = region[0..],
     };
 
-    fn next(
-        pii: *PatchInstructionIterator,
-        strategy: Strategy,
-    ) ?Range {
-        const State = enum {
-            allocation,
-            range,
-            prefix,
-        };
-        blk: switch (State.allocation) {
-            .allocation => {
-                if (address_allocator.findAllocation(
-                    pii.flicken_size,
-                    pii.valid_range,
-                )) |allocated_range| {
-                    assert(allocated_range.size() == pii.flicken_size);
-                    pii.allocated_count += 1;
-                    // Advancing the valid range, such that the next call to `findAllocation` won't
-                    // find the same range again.
-                    switch (strategy) {
-                        .exhaustive => pii.valid_range.start = allocated_range.start + 1,
-                        .count => |c| {
-                            if (pii.allocated_count >= c) {
-                                pii.valid_range.start = pii.valid_range.end;
-                                pii.allocated_count = 0;
-                            } else {
-                                pii.valid_range.start = allocated_range.start + 1;
-                            }
-                        },
-                    }
-                    return allocated_range;
-                } else {
-                    pii.allocated_count = 0;
-                    continue :blk .range;
-                }
-            },
-            .range => {
-                // Valid range is used up, so get a new one from the pli.
-                if (pii.pli.next()) |valid_range| {
-                    pii.valid_range = valid_range;
-                    continue :blk .allocation;
-                } else {
-                    continue :blk .prefix;
-                }
-            },
-            .prefix => {
-                if (pii.num_prefixes < @min(pii.instruction_size, prefixes.len)) {
-                    pii.num_prefixes += 1;
-                    const patch_bytes = getPatchBytes(pii.bytes, pii.instruction_size, pii.num_prefixes);
-                    pii.pli = PatchLocationIterator.init(
-                        patch_bytes,
-                        @intFromPtr(&pii.bytes[pii.num_prefixes + 5]),
-                    );
-                    continue :blk .range;
-                } else {
-                    return null;
-                }
-            },
-        }
-        comptime unreachable;
-    }
+    // Block immediate area to trigger the complex coupled solver logic.
+    try patcher.address_allocator.block(.{ .start = 0, .end = @intCast(source_addr + 0x2000) });
 
-    fn getPatchBytes(instruction_bytes: []const u8, instruction_size: u8, num_prefixes: u8) [4]PatchByte {
-        const offset_location = instruction_bytes[num_prefixes + 1 ..][0..4]; // +1 for e9
-        var patch_bytes: [4]PatchByte = undefined;
-        for (&patch_bytes, offset_location, num_prefixes + 1..) |*patch_byte, offset_byte, i| {
-            if (i < instruction_size) {
-                patch_byte.* = .free;
-            } else {
-                patch_byte.* = .{ .used = offset_byte };
-            }
-        }
-        return patch_bytes;
-    }
-};
+    var locked_bytes = try std.DynamicBitSetUnmanaged.initEmpty(testing.allocator, region.len);
+    defer locked_bytes.deinit(testing.allocator);
 
-/// Fixes RIP-relative operands in an instruction that has been moved to a new address.
-fn relocateInstruction(
-    instruction: dis.BundledInstruction,
-    address: u64,
-    buffer: []u8,
-) !void {
-    const instr = instruction.instruction;
-    // Iterate all operands
-    for (0..instr.operand_count) |i| {
-        const operand = &instruction.operands[i];
+    var instruction_starts = try std.DynamicBitSetUnmanaged.initEmpty(testing.allocator, region.len);
+    defer instruction_starts.deinit(testing.allocator);
+    instruction_starts.set(0);
+    instruction_starts.set(2);
+    instruction_starts.set(3);
+    instruction_starts.set(4); // Neighbor starts here
 
-        // Check for RIP-relative memory operand
-        const is_rip_rel = operand.type == zydis.ZYDIS_OPERAND_TYPE_MEMORY and
-            operand.unnamed_0.mem.base == zydis.ZYDIS_REGISTER_RIP;
-        // Check for relative immediate (e.g. JMP rel32)
-        const is_rel_imm = operand.type == zydis.ZYDIS_OPERAND_TYPE_IMMEDIATE and
-            operand.unnamed_0.imm.is_relative == zydis.ZYAN_TRUE;
-        if (!is_rip_rel and !is_rel_imm) continue;
+    const patches_opt = try attemptNeighborEviction(&patcher, request, &region, instruction_starts, locked_bytes);
+    try testing.expect(patches_opt != null);
+    const patches = patches_opt.?.patches;
 
-        // We have to apply a relocation
-        var result_address: u64 = 0;
-        const status = zydis.ZydisCalcAbsoluteAddress(
-            instr,
-            operand,
-            instruction.address,
-            &result_address,
-        );
-        assert(zydis.ZYAN_SUCCESS(status)); // TODO: maybe return an error instead
+    try testing.expectEqual(.active, patches[0].kind);
+    try testing.expectEqual(.active, patches[1].kind);
 
-        // Calculate new displacement relative to the new address
-        // The instruction length remains the same.
-        const next_rip: i64 = @intCast(address + instr.length);
-        const new_disp = @as(i64, @intCast(result_address)) - next_rip;
+    const p1 = patches[0];
+    const p2 = patches[1];
 
-        var offset: u16 = 0;
-        var size_bits: u8 = 0;
+    // Verify Patch 1 (The short jump)
+    try testing.expectEqual(source_addr, p1.source_addr);
+    try testing.expectEqual(2, p1.source_len);
+    try testing.expectEqual(0xEB, p1.source_bytes[0]);
 
-        if (is_rip_rel) {
-            offset = instr.raw.disp.offset;
-            size_bits = instr.raw.disp.size;
-        } else {
-            assert(is_rel_imm);
-            // For relative immediate, find the matching raw immediate.
-            var found = false;
-            for (&instr.raw.imm) |*imm| {
-                if (imm.is_relative == zydis.ZYAN_TRUE) {
-                    offset = imm.offset;
-                    size_bits = imm.size;
-                    found = true;
-                    break;
-                }
-            }
-            assert(found);
-        }
+    // Displacement should jump to the hole created at offset 4.
+    // Short jump origin is end of instruction (offset 2).
+    // Target is `neighbor_offset + k`. Assume it chose k=2 for the overlap: 4 + 2 = 6.
+    // disp = 6 - 2 = 4.
+    const expected_disp = p1.source_bytes[1];
+    const target_offset = 2 + @as(i8, @bitCast(expected_disp));
+    try testing.expect(target_offset > 4 and target_offset < 9);
 
-        assert(offset != 0);
-        assert(size_bits != 0);
-        const size_bytes = size_bits / 8;
+    // Verify Patch 2 (The overlapping jumps in the neighbor's location)
+    try testing.expectEqual(source_addr + 4, p2.source_addr);
+    try testing.expectEqual(0xE9, p2.source_bytes[0]); // J_V starts with 0xE9
 
-        if (offset + size_bytes > buffer.len) {
-            return error.RelocationFail;
-        }
-
-        const fits = switch (size_bits) {
-            8 => new_disp >= math.minInt(i8) and new_disp <= math.maxInt(i8),
-            16 => new_disp >= math.minInt(i16) and new_disp <= math.maxInt(i16),
-            32 => new_disp >= math.minInt(i32) and new_disp <= math.maxInt(i32),
-            64 => true,
-            else => unreachable,
-        };
-
-        if (!fits) {
-            return error.RelocationOverflow;
-        }
-
-        const ptr = buffer[offset..];
-        switch (size_bits) {
-            8 => ptr[0] = @as(u8, @bitCast(@as(i8, @intCast(new_disp)))),
-            16 => mem.writeInt(u16, ptr[0..2], @bitCast(@as(i16, @intCast(new_disp))), .little),
-            32 => mem.writeInt(u32, ptr[0..4], @bitCast(@as(i32, @intCast(new_disp))), .little),
-            64 => mem.writeInt(u64, ptr[0..8], @bitCast(@as(i64, @intCast(new_disp))), .little),
-            else => unreachable,
-        }
-    }
+    const k = target_offset - 4;
+    try testing.expectEqual(0xE9, p2.source_bytes[@intCast(k)]); // J_P starts with 0xE9 exactly where the short jump points!
 }
diff --git a/src/Range.zig b/src/Range.zig
index b04060f..5804fc0 100644
--- a/src/Range.zig
+++ b/src/Range.zig
@@ -17,16 +17,6 @@ pub fn size(range: Range) u64 {
     return @intCast(range.end - range.start);
 }
 
-pub fn alignTo(range: Range, alignment: u64) Range {
-    assert(range.end >= range.start);
-    assert(std.math.isPowerOfTwo(alignment));
-    assert(alignment <= std.math.maxInt(i64));
-    const lower = std.mem.alignBackward(i64, range.start, @intCast(alignment));
-    const upper = std.mem.alignForward(i64, range.end, @intCast(alignment));
-    assert(upper >= lower);
-    return .{ .start = lower, .end = upper };
-}
-
 pub fn overlaps(range: Range, other: Range) bool {
     assert(range.end >= range.start);
     assert(other.end >= other.start);
@@ -52,18 +42,17 @@ pub fn touches(range: Range, other: Range) bool {
 }
 
 /// Ranges are considered equal if they touch.
-pub fn compare(lhs: Range, rhs: Range) std.math.Order {
+pub fn compareTouching(lhs: Range, rhs: Range) std.math.Order {
     assert(lhs.end >= lhs.start);
     assert(rhs.end >= rhs.start);
     return if (lhs.start > rhs.end) .gt else if (lhs.end < rhs.start) .lt else .eq;
 }
 
-pub fn getStart(range: Range, T: type) T {
-    return @intCast(range.start);
-}
-
-pub fn getEnd(range: Range, T: type) T {
-    return @intCast(range.end);
+/// Ranges are considered equal if they overlap.
+pub fn compareOverlapping(lhs: Range, rhs: Range) std.math.Order {
+    assert(lhs.end >= lhs.start);
+    assert(rhs.end >= rhs.start);
+    return if (lhs.start >= rhs.end) .gt else if (lhs.end <= rhs.start) .lt else .eq;
 }
 
 pub fn format(
@@ -73,25 +62,23 @@ pub fn format(
     try writer.print(".{{ .start = 0x{x}, .end = 0x{x} }}", .{ self.start, self.end });
 }
 
+pub fn fromSlice(T: type, slice: []T) Range {
+    const start = @intFromPtr(slice.ptr);
+    return .{
+        .start = @intCast(start),
+        .end = @intCast(start + slice.len * @sizeOf(T)),
+    };
+}
+
+pub fn fromPtr(ptr: [*]u8, len: usize) Range {
+    return .fromSlice(u8, ptr[0..len]);
+}
+
 test "AddressRange size" {
     const range = Range{ .start = 100, .end = 250 };
     try std.testing.expectEqual(@as(u64, 150), range.size());
 }
 
-test "AddressRange alignTo unaligned" {
-    const range = Range{ .start = 101, .end = 199 };
-    const aligned = range.alignTo(16);
-    try std.testing.expectEqual(@as(i64, 96), aligned.start);
-    try std.testing.expectEqual(@as(i64, 208), aligned.end);
-}
-
-test "AddressRange alignTo already aligned" {
-    const range = Range{ .start = 64, .end = 128 };
-    const aligned = range.alignTo(64);
-    try std.testing.expectEqual(@as(i64, 64), aligned.start);
-    try std.testing.expectEqual(@as(i64, 128), aligned.end);
-}
-
 test "AddressRange no overlap before" {
     const base = Range{ .start = 100, .end = 200 };
     const other = Range{ .start = 0, .end = 100 };
diff --git a/src/Statistics.zig b/src/Statistics.zig
new file mode 100644
index 0000000..9a73bc0
--- /dev/null
+++ b/src/Statistics.zig
@@ -0,0 +1,46 @@
+const std = @import("std");
+const mem = std.mem;
+
+const Statistics = @This();
+
+/// Direct jumps
+jump: u64,
+/// Punning - index represents number of prefixes used
+punning: [4]u64,
+/// Successor Eviction
+successor_eviction: u64,
+/// Neighbor Eviction
+neighbor_eviction: u64,
+/// Failed to patch
+failed: u64,
+
+pub const empty = mem.zeroes(Statistics);
+
+pub fn punningSum(stats: *const Statistics) u64 {
+    return stats.punning[0] + stats.punning[1] + stats.punning[2] + stats.punning[3];
+}
+
+pub fn successful(stats: *const Statistics) u64 {
+    return stats.jump + stats.punningSum() + stats.successor_eviction + stats.neighbor_eviction;
+}
+
+pub fn total(stats: *const Statistics) u64 {
+    return stats.successful() + stats.failed;
+}
+
+pub fn percentage(stats: *const Statistics) f64 {
+    if (stats.total() == 0) return 1;
+    const s: f64 = @floatFromInt(stats.successful());
+    const t: f64 = @floatFromInt(stats.total());
+    return s / t;
+}
+
+pub fn add(self: *Statistics, other: *const Statistics) void {
+    self.jump += other.jump;
+    for (0..self.punning.len) |i| {
+        self.punning[i] += other.punning[i];
+    }
+    self.successor_eviction += other.successor_eviction;
+    self.neighbor_eviction += other.neighbor_eviction;
+    self.failed += other.failed;
+}
diff --git a/src/backend.zig b/src/backend.zig
new file mode 100644
index 0000000..3d2d5c1
--- /dev/null
+++ b/src/backend.zig
@@ -0,0 +1,49 @@
+const std = @import("std");
+const p = std.posix;
+
+const page_size_min = std.heap.page_size_min;
+
+pub const backend = switch (@import("builtin").is_test) {
+    true => testing,
+    false => posix,
+};
+
+// TODO: Maybe log?
+pub const testing = struct {
+    pub fn mmap(
+        ptr: [*]align(page_size_min) u8,
+        length: usize,
+        prot: u32,
+        flags: p.MAP,
+        fd: p.fd_t,
+        offset: u64,
+    ) p.MMapError![]align(page_size_min) u8 {
+        _ = .{ ptr, length, prot, flags, fd, offset };
+        return ptr[0..length];
+    }
+    pub fn mprotect(memory: []align(page_size_min) u8, protection: u32) p.MProtectError!void {
+        _ = .{ memory, protection };
+    }
+    pub fn munmap(memory: []align(page_size_min) const u8) void {
+        _ = memory;
+    }
+};
+
+pub const posix = struct {
+    pub fn mmap(
+        ptr: ?[*]align(page_size_min) u8,
+        length: usize,
+        prot: u32,
+        flags: p.MAP,
+        fd: p.fd_t,
+        offset: u64,
+    ) p.MMapError![]align(page_size_min) u8 {
+        return p.mmap(ptr, length, prot, flags, fd, offset);
+    }
+    pub fn mprotect(memory: []align(page_size_min) u8, protection: u32) p.MProtectError!void {
+        return p.mprotect(memory, protection);
+    }
+    pub fn munmap(memory: []align(page_size_min) const u8) void {
+        p.munmap(memory);
+    }
+};
diff --git a/src/loader.zig b/src/loader.zig
new file mode 100644
index 0000000..c3132a3
--- /dev/null
+++ b/src/loader.zig
@@ -0,0 +1,94 @@
+const std = @import("std");
+const elf = std.elf;
+const mem = std.mem;
+const posix = std.posix;
+
+const log = std.log.scoped(.loader);
+const page_size = std.heap.pageSize();
+
+pub const UnfinishedReadError = error{UnfinishedRead};
+
+pub const LoadResult = struct {
+    base: usize,
+    size: usize,
+};
+
+/// Loads all `PT_LOAD` segments of an ELF file into memory.
+///
+/// For `ET_EXEC` (non-PIE), segments are mapped at their fixed virtual addresses (`p_vaddr`).
+/// For `ET_DYN` (PIE), segments are mapped at a random base address chosen by the kernel.
+///
+/// It handles zero-initialized(e.g., .bss) sections by mapping anonymous memory and only reading
+/// `p_filesz` bytes from the file, ensuring `p_memsz` bytes are allocated.
+pub fn loadStaticElf(ehdr: elf.Header, file_reader: *std.fs.File.Reader) !LoadResult {
+    // NOTE: In theory we could also just look at the first and last loadable segment because the
+    // ELF spec mandates these to be in ascending order of `p_vaddr`, but better be safe than sorry.
+    // https://gabi.xinuos.com/elf/08-pheader.html#:~:text=ascending%20order
+    const minva, const maxva = bounds: {
+        var minva: u64 = std.math.maxInt(u64);
+        var maxva: u64 = 0;
+        var phdrs = ehdr.iterateProgramHeaders(file_reader);
+        while (try phdrs.next()) |phdr| {
+            if (phdr.p_type != elf.PT_LOAD) continue;
+            minva = @min(minva, phdr.p_vaddr);
+            maxva = @max(maxva, phdr.p_vaddr + phdr.p_memsz);
+        }
+        minva = mem.alignBackward(usize, minva, page_size);
+        maxva = mem.alignForward(usize, maxva, page_size);
+        log.debug("Calculated bounds: minva=0x{x}, maxva=0x{x}", .{ minva, maxva });
+        break :bounds .{ minva, maxva };
+    };
+
+    // Check, that the needed memory region can be allocated as a whole. We do this
+    const dynamic = ehdr.type == elf.ET.DYN;
+    log.debug("ELF type is {s}", .{if (dynamic) "DYN" else "EXEC (static)"});
+    const hint = if (dynamic) null else @as(?[*]align(page_size) u8, @ptrFromInt(minva));
+    log.debug("mmap pre-flight hint: {*}", .{hint});
+    const base = try posix.mmap(
+        hint,
+        maxva - minva,
+        posix.PROT.WRITE,
+        .{ .TYPE = .PRIVATE, .ANONYMOUS = true, .FIXED_NOREPLACE = !dynamic },
+        -1,
+        0,
+    );
+    log.debug("Pre-flight reservation at: {*}, size: 0x{x}", .{ base.ptr, base.len });
+
+    var phdrs = ehdr.iterateProgramHeaders(file_reader);
+    var phdr_idx: u32 = 0;
+    errdefer posix.munmap(base);
+    while (try phdrs.next()) |phdr| : (phdr_idx += 1) {
+        if (phdr.p_type != elf.PT_LOAD) continue;
+        if (phdr.p_memsz == 0) continue;
+
+        const offset = phdr.p_vaddr & (page_size - 1);
+        const size = mem.alignForward(usize, phdr.p_memsz + offset, page_size);
+        var start = mem.alignBackward(usize, phdr.p_vaddr, page_size);
+        const base_for_dyn = if (dynamic) @intFromPtr(base.ptr) else 0;
+        start += base_for_dyn;
+        log.debug(
+            "  - phdr[{}]: mapping 0x{x} - 0x{x} (vaddr=0x{x}, dyn_base=0x{x})",
+            .{ phdr_idx, start, start + size, phdr.p_vaddr, base_for_dyn },
+        );
+        const ptr: []align(page_size) u8 = @as([*]align(page_size) u8, @ptrFromInt(start))[0..size];
+        // TODO: we should likely just use mmap instead because then not touched memory isn't loaded
+        // unnecessarily
+        try file_reader.seekTo(phdr.p_offset);
+        if (try file_reader.read(ptr[offset..][0..phdr.p_filesz]) != phdr.p_filesz)
+            return UnfinishedReadError.UnfinishedRead;
+
+        const protections = elfToMmapProt(phdr.p_flags);
+        try posix.mprotect(ptr, protections);
+    }
+    log.debug("loadElf returning base: 0x{x}, size: 0x{x}", .{ @intFromPtr(base.ptr), base.len });
+    return .{ .base = @intFromPtr(base.ptr), .size = base.len };
+}
+
+/// Converts ELF program header protection flags to mmap protection flags.
+pub fn elfToMmapProt(elf_prot: u64) u32 {
+    var result: u32 = posix.PROT.NONE;
+    if ((elf_prot & elf.PF_R) != 0) result |= posix.PROT.READ;
+    if ((elf_prot & elf.PF_W) != 0) result |= posix.PROT.WRITE;
+    if ((elf_prot & elf.PF_X) != 0) result |= posix.PROT.EXEC;
+    return result;
+}
diff --git a/src/main.zig b/src/main.zig
index 19fa67b..651dec7 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -8,6 +8,7 @@ const testing = std.testing;
 
 const log = std.log.scoped(.flicker);
 const Patcher = @import("Patcher.zig");
+const loader = @import("loader.zig");
 
 const assert = std.debug.assert;
 
@@ -16,8 +17,8 @@ pub const std_options: std.Options = .{
     .log_scope_levels = &.{
         .{ .scope = .disassembler, .level = .info },
         .{ .scope = .patcher, .level = .debug },
-        .{ .scope = .patch_location_iterator, .level = .warn },
         .{ .scope = .flicker, .level = .info },
+        .{ .scope = .loader, .level = .info },
     },
 };
 const page_size = std.heap.pageSize();
@@ -32,6 +33,12 @@ const help =
 
 const UnfinishedReadError = error{UnfinishedRead};
 
+/// This needs to be a public global, such that it has a static memory location. This is needed
+/// for the syscall interception, in particular for patching new maps of the `mmap` call.
+pub var patcher: Patcher = undefined;
+pub var target_exec_path_buf: [std.fs.max_path_bytes]u8 = @splat(0);
+pub var target_exec_path: []const u8 = undefined;
+
 pub fn main() !void {
     // Parse arguments
     var arg_index: u64 = 1; // Skip own name
@@ -51,27 +58,29 @@ pub fn main() !void {
 
     const file = try lookupFile(mem.sliceTo(std.os.argv[arg_index], 0));
 
-    {
-        // Initialize patcher
-        try Patcher.init();
-        // Resolve the absolute path of the target executable. This is needed for the
-        // readlink("/proc/self/exe") interception. We use the file descriptor to get the
-        // authoritative path.
-        var self_buf: [128]u8 = undefined;
-        const fd_path = try std.fmt.bufPrint(&self_buf, "/proc/self/fd/{d}", .{file.handle});
-        Patcher.target_exec_path = try std.fs.readLinkAbsolute(fd_path, &Patcher.target_exec_path_buf);
-        log.debug("Resolved target executable path: {s}", .{Patcher.target_exec_path});
-    }
+    patcher = try .init(std.heap.page_allocator);
+
+    // Resolve the absolute path of the target executable for /proc/self/exe spoofing
+    const fd_path = try std.fmt.bufPrint(&target_exec_path_buf, "/proc/self/fd/{d}", .{file.handle});
+    target_exec_path = try std.fs.readLinkAbsolute(fd_path, &target_exec_path_buf);
+    log.debug("Resolved target executable path: {s}", .{target_exec_path});
+
+    try bootstrapMemoryMap(&patcher);
+    // TODO:
+    // block until `mmap_min_addr`
+    // block all entries in `proc/self/maps`
 
     // Map file into memory
     var file_buffer: [128]u8 = undefined;
     var file_reader = file.reader(&file_buffer);
     log.info("--- Loading executable: {s} ---", .{std.os.argv[arg_index]});
     const ehdr = try elf.Header.read(&file_reader.interface);
-    const base = try loadStaticElf(ehdr, &file_reader);
+    const load_result = try loader.loadStaticElf(ehdr, &file_reader);
+    const base = load_result.base;
     const entry = ehdr.entry + if (ehdr.type == .DYN) base else 0;
     log.info("Executable loaded: base=0x{x}, entry=0x{x}", .{ base, entry });
-    try patchLoadedElf(base);
+    try patcher.address_allocator.block(.fromPtr(@ptrFromInt(base), load_result.size));
+    try patchLoadedElf(load_result.base);
 
     // Check for dynamic linker
     var maybe_interp_base: ?usize = null;
@@ -96,13 +105,15 @@ pub fn main() !void {
         var interp_reader = interp.reader(&interp_buffer);
         const interp_ehdr = try elf.Header.read(&interp_reader.interface);
         assert(interp_ehdr.type == elf.ET.DYN);
-        const interp_base = try loadStaticElf(interp_ehdr, &interp_reader);
+        const interp_result = try loader.loadStaticElf(interp_ehdr, &interp_reader);
+        const interp_base = interp_result.base;
         maybe_interp_base = interp_base;
         maybe_interp_entry = interp_ehdr.entry + if (interp_ehdr.type == .DYN) interp_base else 0;
         log.info(
             "Interpreter loaded: base=0x{x}, entry=0x{x}",
             .{ interp_base, maybe_interp_entry.? },
         );
+        try patcher.address_allocator.block(.fromPtr(@ptrFromInt(interp_base), interp_result.size));
         try patchLoadedElf(interp_base);
         interp.close();
     }
@@ -118,9 +129,12 @@ pub fn main() !void {
             elf.AT_ENTRY => entry,
             elf.AT_EXECFN => @intFromPtr(std.os.argv[arg_index]),
             elf.AT_SYSINFO_EHDR => blk: {
-                log.info("Found vDSO at 0x{x}", .{auxv[i].a_un.a_val});
-                try patchLoadedElf(auxv[i].a_un.a_val);
-                break :blk auxv[i].a_un.a_val;
+                const vdso_base = auxv[i].a_un.a_val;
+                log.info("Found vDSO at 0x{x}", .{vdso_base});
+                try patchLoadedElf(vdso_base);
+                break :blk vdso_base;
+                // NOTE: We do not need to block this, because it's already done by the initial
+                // `/proc/self/maps` pass.
             },
             elf.AT_EXECFD => {
                 @panic("Got AT_EXECFD auxv value");
@@ -163,77 +177,6 @@ pub fn main() !void {
     trampoline(final_entry, argc);
 }
 
-/// Loads all `PT_LOAD` segments of an ELF file into memory.
-///
-/// For `ET_EXEC` (non-PIE), segments are mapped at their fixed virtual addresses (`p_vaddr`).
-/// For `ET_DYN` (PIE), segments are mapped at a random base address chosen by the kernel.
-///
-/// It handles zero-initialized(e.g., .bss) sections by mapping anonymous memory and only reading
-/// `p_filesz` bytes from the file, ensuring `p_memsz` bytes are allocated.
-fn loadStaticElf(ehdr: elf.Header, file_reader: *std.fs.File.Reader) !usize {
-    // NOTE: In theory we could also just look at the first and last loadable segment because the
-    // ELF spec mandates these to be in ascending order of `p_vaddr`, but better be safe than sorry.
-    // https://gabi.xinuos.com/elf/08-pheader.html#:~:text=ascending%20order
-    const minva, const maxva = bounds: {
-        var minva: u64 = std.math.maxInt(u64);
-        var maxva: u64 = 0;
-        var phdrs = ehdr.iterateProgramHeaders(file_reader);
-        while (try phdrs.next()) |phdr| {
-            if (phdr.p_type != elf.PT_LOAD) continue;
-            minva = @min(minva, phdr.p_vaddr);
-            maxva = @max(maxva, phdr.p_vaddr + phdr.p_memsz);
-        }
-        minva = mem.alignBackward(usize, minva, page_size);
-        maxva = mem.alignForward(usize, maxva, page_size);
-        log.debug("Calculated bounds: minva=0x{x}, maxva=0x{x}", .{ minva, maxva });
-        break :bounds .{ minva, maxva };
-    };
-
-    // Check, that the needed memory region can be allocated as a whole. We do this
-    const dynamic = ehdr.type == elf.ET.DYN;
-    log.debug("ELF type is {s}", .{if (dynamic) "DYN" else "EXEC (static)"});
-    const hint = if (dynamic) null else @as(?[*]align(page_size) u8, @ptrFromInt(minva));
-    log.debug("mmap pre-flight hint: {*}", .{hint});
-    const base = try posix.mmap(
-        hint,
-        maxva - minva,
-        posix.PROT.WRITE,
-        .{ .TYPE = .PRIVATE, .ANONYMOUS = true, .FIXED_NOREPLACE = !dynamic },
-        -1,
-        0,
-    );
-    log.debug("Pre-flight reservation at: {*}, size: 0x{x}", .{ base.ptr, base.len });
-
-    var phdrs = ehdr.iterateProgramHeaders(file_reader);
-    var phdr_idx: u32 = 0;
-    errdefer posix.munmap(base);
-    while (try phdrs.next()) |phdr| : (phdr_idx += 1) {
-        if (phdr.p_type != elf.PT_LOAD) continue;
-        if (phdr.p_memsz == 0) continue;
-
-        const offset = phdr.p_vaddr & (page_size - 1);
-        const size = mem.alignForward(usize, phdr.p_memsz + offset, page_size);
-        var start = mem.alignBackward(usize, phdr.p_vaddr, page_size);
-        const base_for_dyn = if (dynamic) @intFromPtr(base.ptr) else 0;
-        start += base_for_dyn;
-        log.debug(
-            "  - phdr[{}]: mapping 0x{x} - 0x{x} (vaddr=0x{x}, dyn_base=0x{x})",
-            .{ phdr_idx, start, start + size, phdr.p_vaddr, base_for_dyn },
-        );
-        const ptr: []align(page_size) u8 = @as([*]align(page_size) u8, @ptrFromInt(start))[0..size];
-        // TODO: we should likely just use mmap instead because then not touched memory isn't loaded
-        // unnecessarily
-        try file_reader.seekTo(phdr.p_offset);
-        if (try file_reader.read(ptr[offset..][0..phdr.p_filesz]) != phdr.p_filesz)
-            return UnfinishedReadError.UnfinishedRead;
-
-        const protections = elfToMmapProt(phdr.p_flags);
-        try posix.mprotect(ptr, protections);
-    }
-    log.debug("loadElf returning base: 0x{x}", .{@intFromPtr(base.ptr)});
-    return @intFromPtr(base.ptr);
-}
-
 fn patchLoadedElf(base: usize) !void {
     const ehdr = @as(*const elf.Ehdr, @ptrFromInt(base));
     if (!mem.eql(u8, ehdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic;
@@ -263,20 +206,11 @@ fn patchLoadedElf(base: usize) !void {
 
         const region = @as([*]align(page_size) u8, @ptrFromInt(page_start))[0..size];
 
-        try Patcher.patchRegion(region);
-        try posix.mprotect(region, elfToMmapProt(phdr.p_flags));
+        try patcher.patchRegion(region);
+        try posix.mprotect(region, loader.elfToMmapProt(phdr.p_flags));
     }
 }
 
-/// Converts ELF program header protection flags to mmap protection flags.
-fn elfToMmapProt(elf_prot: u64) u32 {
-    var result: u32 = posix.PROT.NONE;
-    if ((elf_prot & elf.PF_R) != 0) result |= posix.PROT.READ;
-    if ((elf_prot & elf.PF_W) != 0) result |= posix.PROT.WRITE;
-    if ((elf_prot & elf.PF_X) != 0) result |= posix.PROT.EXEC;
-    return result;
-}
-
 /// Opens the file by either opening via a (absolute or relative) path or searching through `PATH`
 /// for a file with the name.
 // TODO: support paths starting with ~
@@ -317,10 +251,50 @@ fn trampoline(entry: usize, sp: [*]usize) noreturn {
     unreachable;
 }
 
+fn bootstrapMemoryMap(p: *Patcher) !void {
+    {
+        var min_addr: u64 = 0x10000;
+        if (std.fs.openFileAbsolute("/proc/sys/vm/mmap_min_addr", .{})) |file| {
+            defer file.close();
+            var buf: [32]u8 = undefined;
+            if (file.readAll(&buf)) |len| {
+                const trimmed = std.mem.trim(u8, buf[0..len], " \n\r\t");
+                if (std.fmt.parseInt(u64, trimmed, 10)) |val| {
+                    min_addr = val;
+                } else |_| {}
+            } else |_| {}
+        } else |_| {}
+        try p.address_allocator.block(.{ .start = 0, .end = @intCast(min_addr) });
+    }
+
+    {
+        var maps_file = try std.fs.openFileAbsolute("/proc/self/maps", .{});
+        defer maps_file.close();
+        var buf: [512]u8 = undefined;
+        var reader = maps_file.reader(&buf);
+        while (true) {
+            const line = reader.interface.takeDelimiterInclusive('\n') catch |err| switch (err) {
+                error.EndOfStream => break,
+                error.ReadFailed => |e| return reader.err orelse e,
+                else => |e| return e,
+            };
+            std.debug.print("{s}", .{line});
+            const dash = mem.indexOfScalar(u8, line, '-') orelse continue;
+            const space = mem.indexOfScalar(u8, line, ' ') orelse continue;
+            assert(space > dash);
+            const start = std.fmt.parseInt(u64, line[0..dash], 16) catch unreachable;
+            const end = std.fmt.parseInt(u64, line[dash + 1 .. space], 16) catch unreachable;
+            // TODO: remove when Range is `u64`
+            try p.address_allocator.block(.{
+                .start = @as(u63, @truncate(start)),
+                .end = @as(u63, @truncate(end)),
+            });
+        }
+    }
+}
+
 test {
-    _ = @import("AddressAllocator.zig");
-    _ = @import("Range.zig");
-    _ = @import("PatchLocationIterator.zig");
+    _ = @import("Patcher.zig");
 }
 
 // TODO: make this be passed in from the build system
diff --git a/src/relocation.zig b/src/relocation.zig
new file mode 100644
index 0000000..d0f6555
--- /dev/null
+++ b/src/relocation.zig
@@ -0,0 +1,98 @@
+const dis = @import("disassembler.zig");
+const std = @import("std");
+const math = std.math;
+const mem = std.mem;
+const zydis = @import("zydis").zydis;
+
+const assert = std.debug.assert;
+
+pub const RelocInfo = struct {
+    instr: dis.BundledInstruction,
+    old_addr: u64,
+};
+
+/// Fixes RIP-relative operands in an instruction that has been moved to a new address.
+pub fn relocateInstruction(
+    instruction: dis.BundledInstruction,
+    address: u64,
+    buffer: []u8,
+) !void {
+    const instr = instruction.instruction;
+    // Iterate all operands
+    for (0..instr.operand_count) |i| {
+        const operand = &instruction.operands[i];
+
+        // Check for RIP-relative memory operand
+        const is_rip_rel = operand.type == zydis.ZYDIS_OPERAND_TYPE_MEMORY and
+            operand.unnamed_0.mem.base == zydis.ZYDIS_REGISTER_RIP;
+        // Check for relative immediate (e.g. JMP rel32)
+        const is_rel_imm = operand.type == zydis.ZYDIS_OPERAND_TYPE_IMMEDIATE and
+            operand.unnamed_0.imm.is_relative == zydis.ZYAN_TRUE;
+        if (!is_rip_rel and !is_rel_imm) continue;
+
+        // We have to apply a relocation
+        var result_address: u64 = 0;
+        const status = zydis.ZydisCalcAbsoluteAddress(
+            instr,
+            operand,
+            instruction.address,
+            &result_address,
+        );
+        assert(zydis.ZYAN_SUCCESS(status)); // TODO: maybe return an error instead
+
+        // Calculate new displacement relative to the new address
+        // The instruction length remains the same.
+        const next_rip: i64 = @intCast(address + instr.length);
+        const new_disp = @as(i64, @intCast(result_address)) - next_rip;
+
+        var offset: u16 = 0;
+        var size_bits: u8 = 0;
+
+        if (is_rip_rel) {
+            offset = instr.raw.disp.offset;
+            size_bits = instr.raw.disp.size;
+        } else {
+            assert(is_rel_imm);
+            // For relative immediate, find the matching raw immediate.
+            var found = false;
+            for (&instr.raw.imm) |*imm| {
+                if (imm.is_relative == zydis.ZYAN_TRUE) {
+                    offset = imm.offset;
+                    size_bits = imm.size;
+                    found = true;
+                    break;
+                }
+            }
+            assert(found);
+        }
+
+        assert(offset != 0);
+        assert(size_bits != 0);
+        const size_bytes = size_bits / 8;
+
+        if (offset + size_bytes > buffer.len) {
+            return error.RelocationFail;
+        }
+
+        const fits = switch (size_bits) {
+            8 => new_disp >= math.minInt(i8) and new_disp <= math.maxInt(i8),
+            16 => new_disp >= math.minInt(i16) and new_disp <= math.maxInt(i16),
+            32 => new_disp >= math.minInt(i32) and new_disp <= math.maxInt(i32),
+            64 => true,
+            else => unreachable,
+        };
+
+        if (!fits) {
+            return error.RelocationOverflow;
+        }
+
+        const ptr = buffer[offset..];
+        switch (size_bits) {
+            8 => ptr[0] = @as(u8, @bitCast(@as(i8, @intCast(new_disp)))),
+            16 => mem.writeInt(u16, ptr[0..2], @bitCast(@as(i16, @intCast(new_disp))), .little),
+            32 => mem.writeInt(u32, ptr[0..4], @bitCast(@as(i32, @intCast(new_disp))), .little),
+            64 => mem.writeInt(u64, ptr[0..8], @bitCast(@as(i64, @intCast(new_disp))), .little),
+            else => unreachable,
+        }
+    }
+}
diff --git a/src/syscalls.zig b/src/syscalls.zig
index 7e05951..40dacd7 100644
--- a/src/syscalls.zig
+++ b/src/syscalls.zig
@@ -1,11 +1,12 @@
 const std = @import("std");
 const linux = std.os.linux;
 const posix = std.posix;
-const Patcher = @import("Patcher.zig");
-const assert = std.debug.assert;
 
+const assert = std.debug.assert;
 const page_size = std.heap.pageSize();
 
+const main = @import("main.zig");
+
 const log = std.log.scoped(.syscalls);
 
 /// Represents the stack layout pushed by `syscallEntry` before calling the handler.
@@ -114,7 +115,7 @@ export fn syscall_handler(ctx: *SavedContext) callconv(.c) void {
             // mmap addresses are always page aligned
             const ptr = @as([*]align(page_size) u8, @ptrFromInt(addr));
             // Check if we can patch it
-            Patcher.patchRegion(ptr[0..len]) catch |err| {
+            main.patcher.patchRegion(ptr[0..len]) catch |err| {
                 std.log.warn("JIT Patching failed: {}", .{err});
             };
 
@@ -132,7 +133,7 @@ export fn syscall_handler(ctx: *SavedContext) callconv(.c) void {
                 // mprotect requires addr to be page aligned.
                 if (len > 0 and std.mem.isAligned(addr, page_size)) {
                     const ptr = @as([*]align(page_size) u8, @ptrFromInt(addr));
-                    Patcher.patchRegion(ptr[0..len]) catch |err| {
+                    main.patcher.patchRegion(ptr[0..len]) catch |err| {
                         std.log.warn("mprotect Patching failed: {}", .{err});
                     };
                     // patchRegion leaves it R|W.
@@ -250,7 +251,7 @@ fn isProcSelfExe(path: [*:0]const u8) bool {
 }
 
 fn handleReadlink(buf_addr: u64, buf_size: u64, ctx: *SavedContext) void {
-    const target = Patcher.target_exec_path;
+    const target = main.target_exec_path;
     const len = @min(target.len, buf_size);
     const dest = @as([*]u8, @ptrFromInt(buf_addr));
     @memcpy(dest[0..len], target[0..len]);