Files
flicker/src/AddressAllocator.zig
2026-03-09 11:04:15 +01:00

1129 lines
39 KiB
Zig

const std = @import("std");
const math = std.math;
const mem = std.mem;
const sort = std.sort;
const testing = std.testing;
const assert = std.debug.assert;
const Range = @import("Range.zig");
const log = std.log.scoped(.address_allocator);
const AddressAllocator = @This();
/// The **sorted** list of `Range`s that are blocked.
ranges: std.ArrayListUnmanaged(Range) = .empty,
child_allocator: mem.Allocator,
// TODO: we should likely create an init function that blocks the entire negative address space
pub fn init(child_allocator: mem.Allocator) !AddressAllocator {
var aa: AddressAllocator = .{ .child_allocator = child_allocator };
const ranges = try child_allocator.alloc(Range, std.heap.pageSize() / @sizeOf(Range));
aa.ranges = .initBuffer(ranges);
aa.block(.fromSlice(Range, ranges)) catch unreachable;
return aa;
}
pub fn deinit(self: *AddressAllocator) void {
self.ranges.deinit(self.child_allocator);
}
pub fn allocator(self: *AddressAllocator) mem.Allocator {
return .{
.ptr = self,
.vtable = &.{
.alloc = alloc,
.resize = resize,
.remap = remap,
.free = free,
},
};
}
fn alloc(ctx: *anyopaque, n: usize, alignment: std.mem.Alignment, ra: usize) ?[*]u8 {
const self: *AddressAllocator = @ptrCast(@alignCast(ctx));
const ptr = self.child_allocator.rawAlloc(n, alignment, ra) orelse return null;
self.block(.fromPtr(ptr, n)) catch @panic("OOM");
return ptr;
}
fn resize(
ctx: *anyopaque,
buf: []u8,
alignment: std.mem.Alignment,
new_len: usize,
ret_addr: usize,
) bool {
const self: *AddressAllocator = @ptrCast(@alignCast(ctx));
const success = self.child_allocator.rawResize(buf, alignment, new_len, ret_addr);
if (success) {
self.block(.fromPtr(buf.ptr, new_len)) catch @panic("OOM");
}
return success;
}
fn remap(
context: *anyopaque,
memory: []u8,
alignment: std.mem.Alignment,
new_len: usize,
return_address: usize,
) ?[*]u8 {
const self: *AddressAllocator = @ptrCast(@alignCast(context));
const ptr = self.child_allocator.rawRemap(memory, alignment, new_len, return_address) orelse
return null;
if (ptr != memory.ptr) { // new memory location
self.unblock(.fromSlice(u8, memory)) catch @panic("OOM");
}
self.block(.fromPtr(ptr, new_len)) catch @panic("OOM");
return ptr;
}
fn free(
ctx: *anyopaque,
buf: []u8,
alignment: std.mem.Alignment,
ret_addr: usize,
) void {
const self: *AddressAllocator = @ptrCast(@alignCast(ctx));
self.unblock(.fromSlice(u8, buf)) catch @panic("OOM");
return self.child_allocator.rawFree(buf, alignment, ret_addr);
}
/// Block a range to not be used by the `allocate` function. This function will always succeed, if
/// there is enough memory available.
pub fn block(self: *AddressAllocator, range: Range) !void {
if (range.size() == 0) return;
// Find the correct sorted position to insert the new range.
const insert_idx = sort.lowerBound(
Range,
self.ranges.items,
range,
Range.compareTouching,
);
log.debug(
"block: range: {f}, insert_idx: {}",
.{ range, insert_idx },
);
// If we don't overlap any existing one, we just insert.
if (insert_idx == self.ranges.items.len or
self.ranges.items[insert_idx].compareTouching(range) == .gt)
{
return self.ranges.insert(self.child_allocator, insert_idx, range);
}
errdefer comptime unreachable;
assert(self.ranges.items.len > 0);
// Now `insert_idx` points to the first entry, that touches `range`.
const first = &self.ranges.items[insert_idx];
assert(first.touches(range));
if (insert_idx > 0 and self.ranges.items.len > 0) {
assert(!self.ranges.items[insert_idx - 1].touches(range));
}
log.debug("block: `range` touches at least one existing range.", .{});
first.start = @min(first.start, range.start);
first.end = @max(first.end, range.end);
// Merge any following overlapping ranges into this one.
// NOTE: We "iterate" through the slice by removing unneeded items and moving all following ones
// back by one. That's why we always look at `insert_idx + 1`.
while (insert_idx + 1 < self.ranges.items.len and
self.ranges.items[insert_idx + 1].touches(range))
{
const neighbor = self.ranges.items[insert_idx + 1];
assert(range.end >= neighbor.start);
assert(range.start <= neighbor.start);
first.end = @max(first.end, neighbor.end);
_ = self.ranges.orderedRemove(insert_idx + 1);
}
}
pub fn unblock(
self: *AddressAllocator,
range: Range,
) !void {
// Find the correct sorted position to remove the range.
var remove_idx = sort.lowerBound(
Range,
self.ranges.items,
range,
Range.compareOverlapping,
);
log.debug(
"unblock: range: {f}, remove_idx: {}",
.{ range, remove_idx },
);
// If we don't overlap any existing one, we just return.
if (remove_idx == self.ranges.items.len or
self.ranges.items[remove_idx].compareOverlapping(range) == .gt)
{
log.debug("unblock: Range to unblock overlaps nothing", .{});
for (self.ranges.items) |r| {
assert(!r.overlaps(range));
}
return;
}
assert(self.ranges.items.len > 0);
// Now `remove_idx` points to the first entry, that touches `range`.
const first = &self.ranges.items[remove_idx];
assert(first.touches(range));
if (remove_idx > 0 and self.ranges.items.len > 0) {
assert(!self.ranges.items[remove_idx - 1].overlaps(range));
}
log.debug("unblock: `range` touches at least one existing range.", .{});
// We have multiple cases for the first touching range:
//
// [ range to unblock ]
// 0 [ first ] -> split
//
// [ range to unblock ]
// 1 [ first ]
// 1 [ first ] -> change start
//
// [ range to unblock ]
// 2 [ first ]
// 2 [ first ]
// 2 [ first ] -> remove
//
// [ range to unblock ]
// 3 [ first ]
// 3 [ first ] -> change end
//
// If it's cases 0 or 1 the operation is finished because we can't overlap another one. For cases 2
// and 3 we will have to remove the following ranges until we arrive at one of the following cases:
// 1.
// [ range to unblock ]
// [ last ]
// 2.
// [ range to unblock ]
// [ last ]
//
if (first.start < range.start and first.end > range.end) {
const old_end = first.end;
first.end = range.start;
try self.ranges.insert(self.child_allocator, remove_idx + 1, .{
.start = range.end,
.end = old_end,
});
return;
} else if (first.start >= range.start and first.start < range.end and first.end > range.end) {
first.start = range.end;
return;
} else if (first.start >= range.start and first.end <= range.end) {
_ = self.ranges.orderedRemove(remove_idx);
} else if (first.start < range.start and first.end > range.start and first.end <= range.end) {
first.end = range.start;
remove_idx += 1;
} else {
unreachable;
}
// NOTE: We "iterate" through the slice by removing unneeded items and moving all following ones
// back by one. That's why we always look at `insert_idx + 1`.
while (remove_idx < self.ranges.items.len) {
const next_range = &self.ranges.items[remove_idx];
if (next_range.start >= range.end) break;
if (next_range.end <= range.end) {
_ = self.ranges.orderedRemove(remove_idx);
} else {
next_range.start = range.end;
break;
}
}
}
test "fuzz against bitset" {
const iterations = 64 * 1024;
const size = 1024;
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
var bitset_ref = try std.bit_set.DynamicBitSetUnmanaged.initEmpty(testing.allocator, size);
defer bitset_ref.deinit(testing.allocator);
var prng = std.Random.DefaultPrng.init(testing.random_seed);
const random = prng.random();
var expected_ranges = try std.ArrayListUnmanaged(Range).initCapacity(testing.allocator, size / 2);
defer expected_ranges.deinit(testing.allocator);
var bitset_temp = try std.bit_set.DynamicBitSetUnmanaged.initEmpty(testing.allocator, size);
defer bitset_temp.deinit(testing.allocator);
for (0..iterations) |_| {
const is_block = random.boolean();
const start = random.intRangeLessThan(usize, 0, size);
const len = random.intRangeAtMost(usize, 1, size - start);
const end = start + len;
const range = Range{ .start = @intCast(start), .end = @intCast(end) };
if (is_block) {
try aa.block(range);
bitset_ref.setRangeValue(.{ .start = start, .end = end }, true);
} else {
try aa.unblock(range);
bitset_ref.setRangeValue(.{ .start = start, .end = end }, false);
}
bitset_temp.unsetAll();
for (aa.ranges.items) |r| {
bitset_temp.setRangeValue(.{ .start = @intCast(r.start), .end = @intCast(r.end) }, true);
}
try testing.expect(bitset_ref.eql(bitset_temp));
}
}
/// An internal iterator that cleanly yields unblocked memory holes.
const HoleIterator = struct {
ranges: []const Range,
valid_range: Range,
size: i64,
candidate_start: i64,
idx: usize,
fn init(aa: *const AddressAllocator, valid_range: Range, size: u64) HoleIterator {
const start_idx = sort.lowerBound(
Range,
aa.ranges.items,
valid_range,
Range.compareOverlapping,
);
return .{
.ranges = aa.ranges.items,
.valid_range = valid_range,
.size = @intCast(size),
.candidate_start = valid_range.start,
.idx = start_idx,
};
}
fn next(self: *HoleIterator) ?Range {
while (self.idx < self.ranges.len) {
const reserved = self.ranges[self.idx];
if (self.candidate_start >= self.valid_range.end) return null;
if (self.candidate_start < reserved.start) {
const hole_end = @min(reserved.start, self.valid_range.end);
const hole_start = self.candidate_start;
self.candidate_start = reserved.end;
if (hole_end >= hole_start + self.size) {
return Range{ .start = hole_start, .end = hole_end };
}
} else {
self.candidate_start = @max(self.candidate_start, reserved.end);
}
self.idx += 1;
}
if (self.candidate_start < self.valid_range.end) {
const hole_start = self.candidate_start;
const hole_end = self.valid_range.end;
self.candidate_start = self.valid_range.end; // Mark done to prevent infinite loops
if (hole_end >= hole_start + self.size) {
return Range{ .start = hole_start, .end = hole_end };
}
}
return null;
}
test {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
try aa.block(.{ .start = 100, .end = 200 });
try aa.block(.{ .start = 300, .end = 400 });
var it = HoleIterator.init(&aa, .{ .start = 0, .end = 500 }, 10);
try testing.expectEqual(Range{ .start = 0, .end = 100 }, it.next().?);
try testing.expectEqual(Range{ .start = 200, .end = 300 }, it.next().?);
try testing.expectEqual(Range{ .start = 400, .end = 500 }, it.next().?);
try testing.expectEqual(null, it.next());
}
};
const Constraint = struct {
min_rel: i32,
max_rel: i32,
mask: u32,
pattern: u32,
};
/// Solves a single 32-bit relative jump constraint in O(1) time.
///
/// Returns the smallest `rel32` such that
/// - `min_rel <= rel32 <= max_rel` and
/// - `(rel32 & mask) == pattern`
///
/// Context:
/// During "Instruction Punning", we overwrite an instruction with a 5-byte jump (`E9 xx xx xx xx`).
/// If the original instruction is smaller than 5 bytes, our jump offset (`xx xx xx xx`) will spill
/// into the next instruction. To prevent crashing, the spilled bytes must form the successor
/// instruction. This restricts certain bits/bytes of our `rel32` offset to fixed values.
///
/// The algorithm uses a bit-twiddling hack to isolate the "free" (unmasked) bits, increment them as
/// a single continuous integer, and map them back around the fixed "pattern" bits, completely
/// avoiding loops over the search space.
///
/// Visualization of the bit-twiddling constraint logic:
/// -------------------------------------------------------------------------
/// Mask: 1111 1111 0000 0000 1111 1111 0000 0000 (1 = Locked bits)
/// Pattern: 0000 0000 0000 0000 1110 1001 0000 0000 (The forced values)
/// Free: 0000 0000 1111 1111 0000 0000 1111 1111 (~Mask)
///
/// Current Candidate: [ Fixed A ] [ Free 1 ] [ Fixed B ] [ Free 0 ]
///
/// If `Current Candidate < min_rel`, we add 1 to the "Free" bits.
/// The hack `(((candidate & free) | mask) + 1) & free` allows the arithmetic carry to jump over the
/// fixed bits without corrupting them:
///
/// Next Valid Val: [ Fixed A ][ Free 1 + carry ] [ Fixed B ] [ Free 0 + 1 ]
/// -------------------------------------------------------------------------
fn solveRelativeConstraint(c: Constraint) ?i32 {
log.debug(
"solveRelative: min: {x}, max: {x}, mask: {x}, pattern: {x}",
.{ c.min_rel, c.max_rel, c.mask, c.pattern },
);
assert((c.pattern & ~c.mask) == 0);
if (c.min_rel > c.max_rel) return null;
// Force the pattern onto the current minimum value
var candidate: u32 = (@as(u32, @bitCast(c.min_rel)) & ~c.mask) | c.pattern;
log.debug(" candidate (init): {x}", .{candidate});
// If forcing the pattern made the value smaller than min_rel, we must increment the "free" bits
// to find the next valid higher number.
if (@as(i32, @bitCast(candidate)) < c.min_rel) {
if (~c.mask == 0) {
log.debug(" failed: fully constrained", .{});
return null;
}
const incremented_free = (((candidate & ~c.mask) | c.mask) +% 1) & ~c.mask;
assert(incremented_free & c.mask == 0); // All constrained bits are 0
candidate = incremented_free | c.pattern;
log.debug(" candidate (incr): {x}", .{candidate});
}
const result: i32 = @bitCast(candidate);
if (result >= c.min_rel and result <= c.max_rel) {
log.debug(" success: {x}", .{result});
return result;
}
log.debug(" failed: result {x} out of bounds", .{result});
return null;
}
test "solveRelativeConstraint basic" {
try testing.expectEqual(100, solveRelativeConstraint(.{
.min_rel = 100,
.max_rel = 200,
.mask = 0,
.pattern = 0,
}));
}
test "solveRelativeConstraint aligned" {
try testing.expectEqual(0x10E8, solveRelativeConstraint(.{
.min_rel = 0x1000,
.max_rel = 0x2000,
.mask = 0xFF,
.pattern = 0xE8,
}));
try testing.expectEqual(0x10E8, solveRelativeConstraint(.{
.min_rel = 0x10E8,
.max_rel = 0x2000,
.mask = 0xFF,
.pattern = 0xE8,
}));
try testing.expectEqual(0x11E8, solveRelativeConstraint(.{
.min_rel = 0x10E9,
.max_rel = 0x2000,
.mask = 0xFF,
.pattern = 0xE8,
}));
}
test "solveRelativeConstraint negative" {
try testing.expectEqual(@as(i32, @bitCast(@as(u32, 0xFFFFF0E8))), solveRelativeConstraint(.{
.min_rel = -0x1000,
.max_rel = 0,
.mask = 0xFF,
.pattern = 0xE8,
}));
}
test "solveRelativeConstraint impossible" {
try testing.expectEqual(null, solveRelativeConstraint(.{
.min_rel = 0x1000,
.max_rel = 0x10E7,
.mask = 0xFF,
.pattern = 0xE8,
}));
try testing.expectEqual(null, solveRelativeConstraint(.{
.min_rel = 0x10000000,
.max_rel = 0x11000000,
.mask = 0xFFFFFFFF,
.pattern = 0x12345678,
}));
}
test "solveRelativeConstraint overflow" {
try testing.expectEqual(0x12345678, solveRelativeConstraint(.{
.min_rel = 0x10000000,
.max_rel = 0x20000000,
.mask = 0xFFFFFFFF,
.pattern = 0x12345678,
}));
try testing.expectEqual(null, solveRelativeConstraint(.{
.min_rel = 2147483640,
.max_rel = 2147483647,
.mask = 0xFF,
.pattern = 0x00,
}));
}
pub const Request = struct {
source: u64,
size: u64,
valid_range: Range,
mask: u32 = 0,
pattern: u32 = 0,
};
/// Finds the first free range of `size` bytes within `valid_range` that also satisfies the relative
/// 32-bit jump constraints `mask` and `pattern` from `jump_source`.
/// Runs in `O(|H| + log(#R))` for
/// - `H` being the set of holes in the valid range and
/// - `#R` being the number of ranges in the AddressAllocator.
pub fn findAllocation(
self: *AddressAllocator,
r: Request,
) ?Range {
if (r.valid_range.size() < r.size) return null;
if (r.size == 0) return null;
var it = HoleIterator.init(self, r.valid_range, r.size);
while (it.next()) |hole| {
log.debug("findAllocation: Hole: {f}", .{hole});
const bounds = getRelativeBounds(hole, @intCast(r.size), r.source) orelse continue;
const rel32 = solveRelativeConstraint(.{
.min_rel = bounds.min,
.max_rel = bounds.max,
.mask = r.mask,
.pattern = r.pattern,
}) orelse continue;
const start = @as(i64, @intCast(r.source)) + rel32;
const end = start + @as(i64, @intCast(r.size));
assert(end - start == r.size);
assert(start >= r.valid_range.start);
assert(end <= r.valid_range.end);
return .{ .start = start, .end = end };
}
return null;
}
fn getRelativeBounds(hole: Range, size: i64, source: u64) ?struct { min: i32, max: i32 } {
if (hole.end - hole.start < size) return null;
const offset_to_min = hole.start - @as(i64, @intCast(source));
const offset_to_max = (hole.end - size) - @as(i64, @intCast(source));
const min_rel = @max(offset_to_min, math.minInt(i32));
const max_rel = @min(offset_to_max, math.maxInt(i32));
if (min_rel > max_rel) return null;
return .{
.min = @intCast(min_rel),
.max = @intCast(max_rel),
};
}
test "findConstrainedAllocation" {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
try aa.block(.{ .start = 0x1000, .end = 0x2000 });
try aa.block(.{ .start = 0x3000, .end = 0x4000 });
try testing.expectEqual(
Range{ .start = 0x00AA, .end = 0x00BA },
aa.findAllocation(.{
.size = 0x10,
.valid_range = .{ .start = 0x0000, .end = 0x4000 },
.source = 0,
.mask = 0xFF,
.pattern = 0xAA,
}),
);
try testing.expectEqual(
Range{ .start = 0x20AA, .end = 0x20BA },
aa.findAllocation(.{
.size = 0x10,
.valid_range = .{ .start = 0x1000, .end = 0x4000 },
.source = 0,
.mask = 0xFF,
.pattern = 0xAA,
}),
);
try testing.expectEqual(
null,
aa.findAllocation(.{
.size = 0x10,
.valid_range = .{ .start = 0x2000, .end = 0x8000 },
.source = 0,
.mask = 0xFFFF,
.pattern = 0xAAAA,
}),
);
try testing.expectEqual(
Range{ .start = 0x40AA, .end = 0x50AA },
aa.findAllocation(.{
.size = 0x1000,
.valid_range = .{ .start = 0x2000, .end = 0x8000 },
.source = 0,
.mask = 0xFF,
.pattern = 0xAA,
}),
);
}
pub const CoupledResult = struct {
rel1: i32,
rel2: i32,
};
/// Attempts to find a joint bit-pattern that satisfies two overlapping jump constraints.
///
/// Context:
/// In tactics like Successor Eviction, we overwrite two adjacent instructions with 5-byte jumps (J1
/// and J2). If the distance between them is less than 5 bytes, their physical bytes overlap in
/// memory.
///
/// `k` represents the physical distance (in bytes) between the start of J1 and J2 (1 <= k <= 4).
/// Because x86_64 uses Little-Endian representation, the Most Significant Bytes (MSB) of J1's
/// relative offset (`rel1`) physically overlap with the Least Significant Bytes (LSB) of J2's
/// relative offset (`rel2`).
///
/// Furthermore, J2's opcode (`0xE9`) falls squarely inside the bytes of `rel1`.
///
/// Memory Layout & Endianness Overlap (Example where K = 2):
/// -----------------------------------------------------------------------------------
/// Memory Offset: +0 +1 +2 +3 +4 +5 +6
/// J1 Bytes: [0xE9] [ X0 ] [ X1 ] [ X2 ] [ X3 ]
/// J2 Bytes: [0xE9] [ Y0 ] [ Y1 ] [ Y2 ] [ Y3 ]
///
/// Consequences for `rel1` (X) and `rel2` (Y):
/// 1. Opcode Constraint: `X1` MUST exactly equal `0xE9`.
/// 2. Shared Bytes (S): `X2` MUST exactly equal `Y0`.
/// `X3` MUST exactly equal `Y1`.
/// -----------------------------------------------------------------------------------
///
/// Algorithm ("The Squeeze"):
/// Iterating possibly billions of combinations of X and Y is too slow. Instead, we use the
/// constraints of the memory layout:
///
/// `rel1` is constrained to a physical memory hole `[min1, max1]`. Because memory holes are usually
/// small (e.g., 4KB), the Most Significant Bytes of `rel1` (which are exactly our Shared Bytes 'S')
/// are heavily restricted.
///
/// There are usually only a few possible values for S:
/// 1. We extract the possible values for S from `min1..max1`.
/// 2. We apply S as a strict constraint on the lower bytes of `rel2`.
/// 3. We delegate the remaining independent bits (X0, Y2 and Y3) to the `solveRelativeConstraint`.
///
/// Parameters:
/// `k`: The physical byte offset of J2 relative to J1 (1 <= k <= 4).
/// `min1`, `max1`: The valid rel32 hardware bounds for J1.
/// `min2`, `max2`: The valid rel32 hardware bounds for J2.
/// `mask1`, `pattern1`: The original byte constraints on J1.
/// `mask2`, `pattern2`: The original byte constraints on J2.
pub fn solveCoupledConstraint(
k: u8,
c1: Constraint,
c2: Constraint,
) ?CoupledResult {
log.debug("solveCoupled: k={}", .{k});
log.debug(" C1: min={x} max={x} mask={x} pat={x}", .{ c1.min_rel, c1.max_rel, c1.mask, c1.pattern });
log.debug(" C2: min={x} max={x} mask={x} pat={x}", .{ c2.min_rel, c2.max_rel, c2.mask, c2.pattern });
assert(k >= 1);
assert(k <= 4);
// The opcode for J2 (0xE9) physically falls inside rel32 of J1 at byte index `k - 1` of rel1.
const e9_shift = @as(u5, @intCast(k - 1)) * 8;
const e9_mask = @as(u32, 0xFF) << e9_shift;
if ((c1.mask & e9_mask) != 0 and (c1.pattern & e9_mask) != (@as(u32, 0xE9) << e9_shift)) {
log.debug(" failed: opcode 0xE9 conflict in C1", .{});
return null; // Caller's pattern conflicts with the mandatory J2 opcode
}
const c_mask1 = c1.mask | e9_mask;
const c_pattern1 = (c1.pattern & ~e9_mask) | (@as(u32, 0xE9) << e9_shift);
if (k == 4) {
// J1 is completely resolved just with the 0xE9 constraint applied above.
log.debug(" Fast path K=4", .{});
const rel1 = solveRelativeConstraint(.{
.min_rel = c1.min_rel,
.max_rel = c1.max_rel,
.mask = c_mask1,
.pattern = c_pattern1,
}) orelse return null;
const rel2 = solveRelativeConstraint(.{
.min_rel = c2.min_rel,
.max_rel = c2.max_rel,
.mask = c2.mask,
.pattern = c2.pattern,
}) orelse return null;
return .{ .rel1 = rel1, .rel2 = rel2 };
}
// Determine the bitwise shift and mask for the Shared Bytes (S)
const s_shift = @as(u5, @intCast(k)) * 8;
const num_shared = @as(u5, @intCast(4 - k));
const s_mask = (@as(u32, 1) << (num_shared * 8)) - 1;
log.debug(" Shared Bytes: shift={}, mask={x}", .{ s_shift, s_mask });
var current_min = c1.min_rel;
while (current_min <= c1.max_rel) {
const u_rel: u32 = @bitCast(current_min);
const S = u_rel >> s_shift; // Extract shared bytes from top of rel1
// Calculate the maximum u32 value that shares this S
const max_u_rel_for_S = (S << s_shift) | ((@as(u32, 1) << s_shift) - 1);
const max_i_rel_for_S: i32 = @bitCast(max_u_rel_for_S);
const local_max1 = @min(c1.max_rel, max_i_rel_for_S);
// Does this S conflict with J2's requirements?
if ((c2.mask & s_mask) != 0) {
if ((c2.pattern & c2.mask & s_mask) != (S & c2.mask & s_mask)) {
// Advance to the next block of S.
log.debug(" Conflict at S={x} (min={x})", .{ S, current_min });
if (max_i_rel_for_S == std.math.maxInt(i32)) break;
const next_min = max_i_rel_for_S + 1;
if (next_min > c1.max_rel) break;
current_min = next_min;
continue;
}
}
log.debug(" Trying S={x} range [{x}, {x}]", .{ S, current_min, local_max1 });
// Apply S as a strict constraint on the lowest bytes of J2
const c_mask2 = c2.mask | s_mask;
const c_pattern2 = (c2.pattern & ~s_mask) | S;
// O(1) solver execution for this specific S value
const opt_rel1 = solveRelativeConstraint(.{
.min_rel = current_min,
.max_rel = local_max1,
.mask = c_mask1,
.pattern = c_pattern1,
});
const opt_rel2 = solveRelativeConstraint(.{
.min_rel = c2.min_rel,
.max_rel = c2.max_rel,
.mask = c_mask2,
.pattern = c_pattern2,
});
if (opt_rel1 != null and opt_rel2 != null) {
log.debug(" Success: rel1={x} rel2={x}", .{ opt_rel1.?, opt_rel2.? });
return .{ .rel1 = opt_rel1.?, .rel2 = opt_rel2.? };
}
if (max_i_rel_for_S == std.math.maxInt(i32)) break;
const next_min = max_i_rel_for_S + 1;
if (next_min > c1.max_rel) break;
current_min = next_min;
}
log.debug(" failed: no coupled solution found", .{});
return null;
}
test "solveCoupledConstraint K=4 (Independent)" {
// If K=4, J1 and J2 don't share rel32 bytes, but byte 3 of rel1 MUST be 0xE9 (the J2 opcode).
// Let's force rel1 to be in[0x12000000, 0x120000FF].
// Since highest byte (byte 3) must be 0xE9, no value starting with 0x12 will work.
try testing.expectEqual(null, solveCoupledConstraint(
4,
.{
.min_rel = 0x12000000,
.max_rel = 0x120000FF,
.mask = 0,
.pattern = 0,
},
.{
.min_rel = 0,
.max_rel = 100,
.mask = 0,
.pattern = 0,
},
));
const res = solveCoupledConstraint(
4,
.{
.min_rel = @bitCast(@as(u32, 0xE8000000)),
.max_rel = @bitCast(@as(u32, 0xEA000000)),
.mask = 0,
.pattern = 0,
},
.{
.min_rel = 0x1234,
.max_rel = 0x1234,
.mask = 0,
.pattern = 0,
},
);
try testing.expect(res != null);
try testing.expectEqual(@as(i32, @bitCast(@as(u32, 0xE9000000))), res.?.rel1);
try testing.expectEqual(0x1234, res.?.rel2);
}
test "solveCoupledConstraint K=2 (2 byte overlap)" {
// K=2 means the top 2 bytes of rel1 are the bottom 2 bytes of rel2.
// J2 opcode (0xE9) sits at byte 1 of rel1.
const res = solveCoupledConstraint(
2,
.{
.min_rel = 0x12340000,
.max_rel = 0x1234FFFF,
.mask = 0,
.pattern = 0,
},
.{
.min_rel = 0x00000000,
.max_rel = 0x0000FFFF,
.mask = 0,
.pattern = 0,
},
);
try testing.expect(res != null);
try testing.expectEqual(0x1234E900, res.?.rel1);
try testing.expectEqual(0x00001234, res.?.rel2);
}
test "solveCoupledConstraint K=2 conflict" {
// Same as above, but J2 explicitly forbids lower bytes from being 0x1234.
const res = solveCoupledConstraint(
2,
.{
.min_rel = 0x12340000,
.max_rel = 0x1234FFFF,
.mask = 0,
.pattern = 0,
},
.{
.min_rel = 0x00000000,
.max_rel = 0x0000FFFF,
.mask = 0x0000FFFF,
.pattern = 0x00005678,
},
);
try testing.expectEqual(null, res);
}
test "solveCoupledConstraint K=2 spans multiple S values" {
// We give J1 a wide range:[0x00000000, 0x00060000]. S can be 0 to 6.
// We force J2 to require lower bytes = 0x0004. This forces the solver to skip S=0 and similar
// and find S=4.
const res = solveCoupledConstraint(
2,
.{
.min_rel = 0,
.max_rel = 0x00060000,
.mask = 0,
.pattern = 0,
},
.{
.min_rel = 0,
.max_rel = 0x0000FFFF,
.mask = 0x0000FFFF,
.pattern = 0x00000004,
},
);
try testing.expect(res != null);
try testing.expectEqual(0x0004E900, res.?.rel1);
try testing.expectEqual(0x00000004, res.?.rel2);
}
/// Finds two allocations that simultaneously satisfy their individual offset constraints and the
/// physical overlap constraints of their origin instructions.
/// `r1` (for J1) and `r2` (for J2) separated by `k` bytes.
///
/// Runs in O(|H1| * |H2| + log(#R)) for
/// - `H1` and `H2` being the set of holes in the valid ranges in `r1` and `r2`
/// - `#R` being the number of ranges in the AddressAllocator.
pub fn findCoupledAllocation(
self: *AddressAllocator,
k: u8,
r1: Request,
r2: Request,
) ?[2]Range {
if (r1.valid_range.size() < r1.size or r1.size == 0) return null;
if (r2.valid_range.size() < r2.size or r2.size == 0) return null;
assert(r2.source > r1.source);
assert(r2.source - r1.source == k);
var it1 = HoleIterator.init(self, r1.valid_range, r1.size);
while (it1.next()) |hole1| {
log.debug("findCoupledAllocation: Hole1: {f}", .{hole1});
const b1 = getRelativeBounds(hole1, @intCast(r1.size), r1.source) orelse continue;
var it2 = HoleIterator.init(self, r2.valid_range, r2.size);
while (it2.next()) |hole2| {
log.debug(" Hole2: {f}", .{hole2});
const b2 = getRelativeBounds(hole2, @intCast(r2.size), r2.source) orelse continue;
const c1 = Constraint{
.min_rel = b1.min,
.max_rel = b1.max,
.mask = r1.mask,
.pattern = r1.pattern,
};
const c2 = Constraint{
.min_rel = b2.min,
.max_rel = b2.max,
.mask = r2.mask,
.pattern = r2.pattern,
};
if (solveCoupledConstraint(k, c1, c2)) |result| {
const start1 = @as(i64, @intCast(r1.source)) + result.rel1;
const end1 = start1 + @as(i64, @intCast(r1.size));
const start2 = @as(i64, @intCast(r2.source)) + result.rel2;
const end2 = start2 + @as(i64, @intCast(r2.size));
assert(end1 - start1 == r1.size);
assert(end2 - start2 == r2.size);
// If we used the same hole, we must ensure the actual allocations don't overlap.
const range1 = Range{ .start = start1, .end = end1 };
const range2 = Range{ .start = start2, .end = end2 };
// TODO: Support allocating both trampolines in the exact same memory hole.
// This requires dynamically partitioning the hole so the trampolines don't overlap
// each other. For now, simply skip this case.
if (range1.overlaps(range2)) continue;
return [2]Range{
.{ .start = start1, .end = end1 },
.{ .start = start2, .end = end2 },
};
}
}
}
return null;
}
/// A generic helper to mechanically verify that a coupled allocation satisfies all bitwise and
/// physical overlap constraints.
fn verifyCoupled(k: u8, r1: Request, r2: Request, j1_range: Range, j2_range: Range) !void {
const rel1: i32 = @intCast(j1_range.start - @as(i64, @intCast(r1.source)));
const rel2: i32 = @intCast(j2_range.start - @as(i64, @intCast(r2.source)));
const u_rel1: u32 = @bitCast(rel1);
const u_rel2: u32 = @bitCast(rel2);
// Opcode Constraint
const e9_shift = @as(u5, @intCast(k - 1)) * 8;
try testing.expectEqual(@as(u32, 0xE9), (u_rel1 >> e9_shift) & 0xFF);
// Shared Bytes Constraint
if (k < 4) {
const shared_shift = @as(u5, @intCast(k)) * 8;
const shared_mask = (@as(u32, 1) << (@as(u5, @intCast(4 - k)) * 8)) - 1;
const shared1 = (u_rel1 >> shared_shift) & shared_mask;
const shared2 = u_rel2 & shared_mask;
try testing.expectEqual(shared1, shared2);
}
// Original User Constraints
try testing.expectEqual(r1.pattern, u_rel1 & r1.mask);
try testing.expectEqual(r2.pattern, u_rel2 & r2.mask);
}
test "findCoupledAllocation" {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
// Block memory so we have distinct holes.
// We need a hole that allows `rel1` to have `0xE9` in its second byte.
// This means `rel1` needs to be around `0xE900`.
try aa.block(.{ .start = 0x2000, .end = 0xE000 });
try aa.block(.{ .start = 0xF000, .end = 0x10000 });
const r1 = Request{ .source = 0, .size = 10, .valid_range = .{ .start = 0, .end = 0x20000 } };
const r2 = Request{ .source = 2, .size = 10, .valid_range = .{ .start = 0, .end = 0x20000 } };
const res = aa.findCoupledAllocation(2, r1, r2);
try testing.expect(res != null);
const j1_range = res.?[0];
const j2_range = res.?[1];
try testing.expect(j1_range.start >= 0xE000 and j1_range.end <= 0xF000);
try testing.expect(j2_range.start >= 0x0000 and j2_range.end <= 0x2000);
try verifyCoupled(2, r1, r2, j1_range, j2_range);
}
test "findCoupledAllocation K=1 (3 shared bytes)" {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
try aa.block(.{ .start = 0x2000, .end = 0x01000000 });
const r1 = Request{ .source = 0, .size = 10, .valid_range = .{ .start = 0, .end = 0x10000000 } };
const r2 = Request{ .source = 1, .size = 10, .valid_range = .{ .start = 0, .end = 0x10000000 } };
const res = aa.findCoupledAllocation(1, r1, r2);
try testing.expect(res != null);
// For K=1, rel1's lowest byte MUST be 0xE9.
// In Hole 1, the smallest valid rel1 is 0x000000E9.
// This makes the shared bytes (top 3 bytes) 0x000000.
try testing.expectEqual(0xE9, res.?[0].start);
try testing.expectEqual(0x01, res.?[1].start);
try verifyCoupled(1, r1, r2, res.?[0], res.?[1]);
}
test "findCoupledAllocation K=3 (1 shared byte)" {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
// K=3 means rel1 byte 2 MUST be 0xE9. rel1 looks like 0xXXE9XXXX.
// Smallest positive is ~0x00E90000. We need a hole there.
try aa.block(.{ .start = 0x2000, .end = 0x00E90000 });
const r1 = Request{ .source = 0, .size = 10, .valid_range = .{ .start = 0, .end = 0x10000000 } };
const r2 = Request{ .source = 3, .size = 10, .valid_range = .{ .start = 0, .end = 0x10000000 } };
const res = aa.findCoupledAllocation(3, r1, r2);
try testing.expect(res != null);
try verifyCoupled(3, r1, r2, res.?[0], res.?[1]);
}
test "findCoupledAllocation K=4 (Independent)" {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
try aa.block(.{ .start = 0x2000, .end = 0x01000000 });
const r1 = Request{
.source = 0x50000000,
.size = 10,
.valid_range = .{ .start = 0, .end = 0x60000000 },
};
const r2 = Request{
.source = 0x50000004,
.size = 10,
.valid_range = .{ .start = 0, .end = 0x60000000 },
};
const res = aa.findCoupledAllocation(4, r1, r2);
try testing.expect(res != null);
try verifyCoupled(4, r1, r2, res.?[0], res.?[1]);
}
test "findCoupledAllocation Negative Jumps (Both Backwards)" {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
// We block everything except two specific holes far behind the jump source.
try aa.block(.{ .start = 0, .end = 0x10000000 });
try aa.block(.{ .start = 0x10010000, .end = 0x20000000 });
try aa.block(.{ .start = 0x20010000, .end = 0x60000000 });
const r1 = Request{
.source = 0x50000000,
.size = 10,
.valid_range = .{ .start = 0, .end = 0x60000000 },
};
const r2 = Request{
.source = 0x50000002,
.size = 10,
.valid_range = .{ .start = 0, .end = 0x60000000 },
};
// The math solver natively handles the two's complement wraparound.
const res = aa.findCoupledAllocation(2, r1, r2);
try testing.expect(res != null);
try verifyCoupled(2, r1, r2, res.?[0], res.?[1]);
}
test "findCoupledAllocation with Mask/Pattern Constraints" {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
try aa.block(.{ .start = 0, .end = 0x10000 });
try aa.block(.{ .start = 0x20000, .end = 0x44440000 });
try aa.block(.{ .start = 0x44450000, .end = 0x80000000 });
// K=2. We force the shared bytes to be exactly 0x4444.
const r1 = Request{ .source = 0, .size = 10, .valid_range = .{ .start = 0, .end = 0x80000000 } };
const r2 = Request{
.source = 2,
.size = 10,
.valid_range = .{ .start = 0, .end = 0x80000000 },
.mask = 0x0000FFFF,
.pattern = 0x00004444,
};
const res = aa.findCoupledAllocation(2, r1, r2);
try testing.expect(res != null);
try verifyCoupled(2, r1, r2, res.?[0], res.?[1]);
// Explicitly verify the constraint was propagated to J1
const rel1: i32 = @intCast(res.?[0].start);
const u_rel1: u32 = @bitCast(rel1);
try testing.expectEqual(@as(u32, 0x4444), (u_rel1 >> 16) & 0xFFFF);
}
test "findCoupledAllocation Fails on Math Impossibility" {
var aa = AddressAllocator{ .child_allocator = testing.allocator };
defer aa.deinit();
const r1 = Request{
.source = 0,
.size = 10,
.valid_range = .{ .start = 0, .end = 0x80000000 },
.mask = 0xFFFF0000,
.pattern = 0x11110000,
};
const r2 = Request{
.source = 2,
.size = 10,
.valid_range = .{ .start = 0, .end = 0x80000000 },
.mask = 0x0000FFFF,
.pattern = 0x00002222,
};
const res = aa.findCoupledAllocation(2, r1, r2);
try testing.expectEqual(null, res);
}