Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ pub fn build(b: *std.Build) void {
// ── mcp-zig dependency ──
const mcp_dep = b.dependency("mcp_zig", .{});
exe.root_module.addImport("mcp", mcp_dep.module("mcp"));

// Windows default stack is 1MB; match the 8MB Linux default for headroom.
if (target.result.os.tag == .windows) {
exe.stack_size = 8 * 1024 * 1024;
}

b.installArtifact(exe);

// ── macOS codesign (ad-hoc by default; configurable for release builds) ──
Expand Down
5 changes: 5 additions & 0 deletions src/compat.zig
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ const posix = std.posix;
const linux = std.os.linux;
const fs = std.fs;

/// On Windows, std.fs.max_path_bytes is 32767 (\\?\-prefixed paths).
/// That's too large for stack buffers (e.g. EventQueue's 4096-element array
/// becomes ~128MB). Use a practical limit instead.
pub const path_buf_size: usize = if (builtin.os.tag == .windows) 1024 else std.fs.max_path_bytes;

/// Cached result of the runtime statx probe.
var statx_supported: enum(u8) { unknown = 0, yes = 1, no = 2 } = .unknown;

Expand Down
47 changes: 34 additions & 13 deletions src/index.zig
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const std = @import("std");
const builtin = @import("builtin");
const compat = @import("compat.zig");

// ── Inverted word index ─────────────────────────────────────
Expand Down Expand Up @@ -964,6 +965,7 @@ pub const MmapTrigramIndex = struct {
allocator: std.mem.Allocator,

pub fn initFromDisk(dir_path: []const u8, allocator: std.mem.Allocator) ?MmapTrigramIndex {
if (comptime builtin.os.tag == .windows) return null;
return initFromDiskInner(dir_path, allocator) catch null;
}

Expand Down Expand Up @@ -1082,8 +1084,10 @@ pub const MmapTrigramIndex = struct {
for (self.file_table) |p| self.allocator.free(p);
self.allocator.free(self.file_table);
self.file_set.deinit();
std.posix.munmap(self.postings_data);
std.posix.munmap(self.lookup_data);
if (comptime builtin.os.tag != .windows) {
std.posix.munmap(self.postings_data);
std.posix.munmap(self.lookup_data);
}
}

pub fn fileCount(self: *const MmapTrigramIndex) u32 {
Expand Down Expand Up @@ -1760,33 +1764,50 @@ pub fn resetFrequencyTable() void {
/// Build a per-project frequency table by counting byte-pair occurrences in
/// `content`, then inverting counts to weights (common → low, rare → high).
pub fn buildFrequencyTable(content: []const u8) [256][256]u16 {
var counts: [256][256]u64 = .{.{0} ** 256} ** 256;
// Heap-allocate counts (~512KB) and output (~128KB) to avoid stack overflow
// on Windows where the default stack is 1MB.
const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq;
defer std.heap.page_allocator.destroy(counts);
const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq;
defer std.heap.page_allocator.destroy(out);
counts.* = .{.{0} ** 256} ** 256;
if (content.len >= 2) {
for (0..content.len - 1) |i| {
counts[content[i]][content[i + 1]] += 1;
}
}
return finishFrequencyTable(&counts);
finishFrequencyTable(counts, out);
return out.*;
}

/// Build a frequency table by streaming over multiple content slices.
/// Zero extra memory — counts pairs within each slice, skipping cross-slice
/// boundaries (negligible loss for large corpora).
/// Heap-allocates working buffers to avoid stack overflow on Windows.
/// Counts pairs within each slice, skipping cross-slice boundaries
/// (negligible loss for large corpora).
pub fn buildFrequencyTableFromSlices(slices: []const []const u8) [256][256]u16 {
var counts: [256][256]u64 = .{.{0} ** 256} ** 256;
const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq;
defer std.heap.page_allocator.destroy(counts);
const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq;
defer std.heap.page_allocator.destroy(out);
counts.* = .{.{0} ** 256} ** 256;
for (slices) |content| {
if (content.len < 2) continue;
for (0..content.len - 1) |i| {
counts[content[i]][content[i + 1]] += 1;
}
}
return finishFrequencyTable(&counts);
finishFrequencyTable(counts, out);
return out.*;
}

/// Build a frequency table by streaming over a StringHashMap of content.
/// Iterates file-by-file — no concatenation, zero extra memory.
pub fn buildFrequencyTableFromMap(contents: *const std.StringHashMap([]const u8)) [256][256]u16 {
var counts: [256][256]u64 = .{.{0} ** 256} ** 256;
const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq;
defer std.heap.page_allocator.destroy(counts);
const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq;
defer std.heap.page_allocator.destroy(out);
counts.* = .{.{0} ** 256} ** 256;
var iter = contents.valueIterator();
while (iter.next()) |content_ptr| {
const content = content_ptr.*;
Expand All @@ -1795,18 +1816,19 @@ pub fn buildFrequencyTableFromMap(contents: *const std.StringHashMap([]const u8)
counts[content[i]][content[i + 1]] += 1;
}
}
return finishFrequencyTable(&counts);
finishFrequencyTable(counts, out);
return out.*;
}

fn finishFrequencyTable(counts: *const [256][256]u64) [256][256]u16 {
fn finishFrequencyTable(counts: *const [256][256]u64, table: *[256][256]u16) void {
var max_count: u64 = 1;
for (counts) |row| {
for (row) |c| {
if (c > max_count) max_count = c;
}
}
// Invert: count 0 → 0xFE00 (rare, high); max_count → 0x1000 (common, low).
var table: [256][256]u16 = .{.{0xFE00} ** 256} ** 256;
table.* = .{.{0xFE00} ** 256} ** 256;
for (0..256) |a| {
for (0..256) |b| {
const c = counts[a][b];
Expand All @@ -1816,7 +1838,6 @@ fn finishFrequencyTable(counts: *const [256][256]u64) [256][256]u16 {
table[a][b] = @intCast(@min(w, 0xFE00));
}
}
return table;
}

/// Persist a frequency table as a raw binary blob to `<dir_path>/pair_freq.bin`.
Expand Down
100 changes: 51 additions & 49 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const Store = @import("store.zig").Store;
const AgentRegistry = @import("agent.zig").AgentRegistry;
const Explorer = @import("explore.zig").Explorer;
const watcher = @import("watcher.zig");
const is_windows = @import("builtin").os.tag == .windows;
const server = @import("server.zig");
const mcp_server = @import("mcp.zig");
const sty = @import("style.zig");
Expand All @@ -28,22 +29,14 @@ const Out = struct {
}
};

/// The real entry point. Zig may merge all command-branch stack frames into
/// one, producing a ~33 MB frame that overflows the default 16 MB OS stack.
/// We trampoline through a thread with an explicit 64 MB stack.
/// Trampoline: LLVM merges all branch stack frames (tree, serve, mcp)
/// into main()'s frame, creating a ~128MB frame on Windows that exceeds
/// guard page stride → STATUS_STACK_OVERFLOW. noinline prevents this.
pub fn main() !void {
const thread = try std.Thread.spawn(.{ .stack_size = 64 * 1024 * 1024 }, mainInner, .{});
thread.join();
return mainImpl();
}

fn mainInner() void {
mainImpl() catch |err| {
std.debug.print("fatal: {s}\n", .{@errorName(err)});
std.process.exit(1);
};
}

fn mainImpl() !void {
noinline fn mainImpl() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
Expand Down Expand Up @@ -231,7 +224,7 @@ fn mainImpl() !void {
root = ".";
}

var root_buf: [std.fs.max_path_bytes]u8 = undefined;
var root_buf: [compat.path_buf_size]u8 = undefined;
const abs_root = resolveRoot(root, &root_buf) catch {
out.p("{s}\xe2\x9c\x97{s} cannot resolve root: {s}{s}{s}\n", .{
s.red, s.reset, s.bold, root, s.reset,
Expand Down Expand Up @@ -578,26 +571,32 @@ fn mainImpl() !void {
s.reset,
});
} else if (std.mem.eql(u8, cmd, "serve")) {
const port: u16 = 7719;
var agents = AgentRegistry.init(allocator);
defer agents.deinit();
_ = try agents.register("__filesystem__");
if (comptime is_windows) {
out.p("{s}\xe2\x9c\x97{s} HTTP serve not supported on Windows. Use {s}mcp{s} mode.\n", .{
s.red, s.reset, s.cyan, s.reset,
});
std.process.exit(1);
} else {
const port: u16 = 7719;
var agents = AgentRegistry.init(allocator);
defer agents.deinit();
_ = try agents.register("__filesystem__");

var shutdown = std.atomic.Value(bool).init(false);
defer shutdown.store(true, .release);
var scan_already_done = std.atomic.Value(bool).init(true);
var shutdown = std.atomic.Value(bool).init(false);
defer shutdown.store(true, .release);
var scan_already_done = std.atomic.Value(bool).init(true);

const queue = try allocator.create(watcher.EventQueue);
defer allocator.destroy(queue);
queue.* = watcher.EventQueue{};
const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, queue, root, &shutdown, &scan_already_done });
defer watch_thread.join();
var queue = try watcher.EventQueue.init();
defer queue.deinit();
const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, &queue, root, &shutdown, &scan_already_done });
defer watch_thread.join();

const reap_thread = try std.Thread.spawn(.{}, reapLoop, .{ &agents, &shutdown });
defer reap_thread.join();
const reap_thread = try std.Thread.spawn(.{}, reapLoop, .{ &agents, &shutdown });
defer reap_thread.join();

std.log.info("codedb: {d} files indexed, listening on :{d}", .{ store.currentSeq(), port });
try server.serve(allocator, &store, &agents, &explorer, queue, port);
std.log.info("codedb: {d} files indexed, listening on :{d}", .{ store.currentSeq(), port });
try server.serve(allocator, &store, &agents, &explorer, &queue, port);
}
} else if (std.mem.eql(u8, cmd, "mcp")) {
var agents = AgentRegistry.init(allocator);
defer agents.deinit();
Expand Down Expand Up @@ -630,9 +629,8 @@ fn mainImpl() !void {
var shutdown = std.atomic.Value(bool).init(false);
var scan_done = std.atomic.Value(bool).init(snapshot_loaded);

const queue = try allocator.create(watcher.EventQueue);
defer allocator.destroy(queue);
queue.* = watcher.EventQueue{};
var queue = try watcher.EventQueue.init();
defer queue.deinit();
var scan_thread: ?std.Thread = null;
const startup_t0 = std.time.milliTimestamp();
if (!snapshot_loaded) {
Expand All @@ -642,7 +640,7 @@ fn mainImpl() !void {
telem.recordCodebaseStats(&explorer, startup_time_ms);
}

const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, queue, root, &shutdown, &scan_done });
const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, &queue, root, &shutdown, &scan_done });
const idle_thread = try std.Thread.spawn(.{}, idleWatchdog, .{&shutdown});

std.log.info("codedb mcp: root={s} files={d} data={s}", .{ abs_root, store.currentSeq(), data_dir });
Expand All @@ -668,7 +666,7 @@ fn isCommand(arg: []const u8) bool {
return false;
}

fn resolveRoot(root: []const u8, buf: *[std.fs.max_path_bytes]u8) ![]const u8 {
fn resolveRoot(root: []const u8, buf: *[compat.path_buf_size]u8) ![]const u8 {
if (std.mem.eql(u8, root, ".")) {
return std.fs.cwd().realpath(".", buf) catch return error.ResolveFailed;
}
Expand All @@ -677,7 +675,8 @@ fn resolveRoot(root: []const u8, buf: *[std.fs.max_path_bytes]u8) ![]const u8 {

fn getDataDir(allocator: std.mem.Allocator, abs_root: []const u8) ![]u8 {
const hash = std.hash.Wyhash.hash(0, abs_root);
const home = std.process.getEnvVarOwned(allocator, "HOME") catch {
const home = std.process.getEnvVarOwned(allocator, "HOME") catch
std.process.getEnvVarOwned(allocator, "USERPROFILE") catch {
return std.fmt.allocPrint(allocator, "{s}/.codedb", .{abs_root});
};
defer allocator.free(home);
Expand Down Expand Up @@ -873,22 +872,25 @@ fn idleWatchdog(shutdown: *std.atomic.Value(bool)) void {
std.Thread.sleep(std.time.ns_per_s);
}

// Quick liveness check: poll stdin for POLLHUP (client disconnected)
// Quick liveness check: poll stdin for HUP (client disconnected).
// Windows stdin is not a socket, so poll() is POSIX-only.
const stdin = std.fs.File.stdin();
var poll_fds = [_]std.posix.pollfd{.{
.fd = stdin.handle,
.events = std.posix.POLL.IN | std.posix.POLL.HUP,
.revents = 0,
}};
const poll_result = std.posix.poll(&poll_fds, 0) catch 0;
if (poll_result > 0 and (poll_fds[0].revents & std.posix.POLL.HUP) != 0) {
std.log.info("stdin closed (client disconnected), exiting", .{});
stdin.close();
shutdown.store(true, .release);
return;
if (comptime @import("builtin").os.tag != .windows) {
var poll_fds = [_]std.posix.pollfd{.{
.fd = stdin.handle,
.events = std.posix.POLL.IN | std.posix.POLL.HUP,
.revents = 0,
}};
const poll_result = std.posix.poll(&poll_fds, 0) catch 0;
if (poll_result > 0 and (poll_fds[0].revents & std.posix.POLL.HUP) != 0) {
std.log.info("stdin closed (client disconnected), exiting", .{});
stdin.close();
shutdown.store(true, .release);
return;
}
}

// Fallback: idle timeout
// Idle timeout (primary exit mechanism on Windows)
const last = mcp.last_activity.load(.acquire);
if (last == 0) continue;
const now = std.time.milliTimestamp();
Expand Down
Loading