From 20f534911f20a49d8c16a7ace307e4ab68c62760 Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 10:16:47 -0400 Subject: [PATCH 01/11] =?UTF-8?q?feat(windows):=20foundation=20=E2=80=94?= =?UTF-8?q?=20build=20system,=20stack=20safety,=20heap=20allocation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Windows platform support foundations that all subsequent Windows PRs depend on. No behavioral change on Linux/macOS. - compat.zig: add path_buf_size (1024 on Windows vs max_path_bytes) - build.zig: set 8MB stack for Windows (matches Linux default) - main.zig: replace thread trampoline with noinline mainImpl to avoid LLVM frame merging (~128MB) that exceeds Windows guard page stride - main.zig: fix double scan_thread.join() (UB on all platforms) - watcher.zig: heap-allocate EventQueue (avoids ~4MB stack array) - watcher.zig: use compat.path_buf_size in FsEvent - index.zig: heap-allocate frequency table buffers (~640KB each) - tests.zig: update EventQueue construction to use init()/deinit() --- build.zig | 6 ++++++ src/compat.zig | 5 +++++ src/index.zig | 34 +++++++++++++++++++++++++--------- src/main.zig | 38 ++++++++++++++------------------------ src/tests.zig | 9 ++++++--- src/watcher.zig | 18 +++++++++++++++--- 6 files changed, 71 insertions(+), 39 deletions(-) diff --git a/build.zig b/build.zig index bd3b3b0..f146d21 100644 --- a/build.zig +++ b/build.zig @@ -30,6 +30,12 @@ pub fn build(b: *std.Build) void { // ── mcp-zig dependency ── const mcp_dep = b.dependency("mcp_zig", .{}); exe.root_module.addImport("mcp", mcp_dep.module("mcp")); + + // Windows default stack is 1MB; match the 8MB Linux default for headroom. + if (target.result.os.tag == .windows) { + exe.stack_size = 8 * 1024 * 1024; + } + b.installArtifact(exe); // ── macOS codesign (ad-hoc by default; configurable for release builds) ── diff --git a/src/compat.zig b/src/compat.zig index 634e1ed..187afdf 100644 --- a/src/compat.zig +++ b/src/compat.zig @@ -13,6 +13,11 @@ const posix = std.posix; const linux = std.os.linux; const fs = std.fs; +/// On Windows, std.fs.max_path_bytes is 32767 (\\?\-prefixed paths). +/// That's too large for stack buffers (e.g. EventQueue's 4096-element array +/// becomes ~128MB). Use a practical limit instead. +pub const path_buf_size: usize = if (builtin.os.tag == .windows) 1024 else std.fs.max_path_bytes; + /// Cached result of the runtime statx probe. var statx_supported: enum(u8) { unknown = 0, yes = 1, no = 2 } = .unknown; diff --git a/src/index.zig b/src/index.zig index 4a333b0..a0470d1 100644 --- a/src/index.zig +++ b/src/index.zig @@ -1760,33 +1760,49 @@ pub fn resetFrequencyTable() void { /// Build a per-project frequency table by counting byte-pair occurrences in /// `content`, then inverting counts to weights (common → low, rare → high). pub fn buildFrequencyTable(content: []const u8) [256][256]u16 { - var counts: [256][256]u64 = .{.{0} ** 256} ** 256; + // Heap-allocate counts (~512KB) and output (~128KB) to avoid stack overflow + // on Windows where the default stack is 1MB. + const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(counts); + const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(out); + counts.* = .{.{0} ** 256} ** 256; if (content.len >= 2) { for (0..content.len - 1) |i| { counts[content[i]][content[i + 1]] += 1; } } - return finishFrequencyTable(&counts); + finishFrequencyTable(counts, out); + return out.*; } /// Build a frequency table by streaming over multiple content slices. /// Zero extra memory — counts pairs within each slice, skipping cross-slice /// boundaries (negligible loss for large corpora). pub fn buildFrequencyTableFromSlices(slices: []const []const u8) [256][256]u16 { - var counts: [256][256]u64 = .{.{0} ** 256} ** 256; + const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(counts); + const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(out); + counts.* = .{.{0} ** 256} ** 256; for (slices) |content| { if (content.len < 2) continue; for (0..content.len - 1) |i| { counts[content[i]][content[i + 1]] += 1; } } - return finishFrequencyTable(&counts); + finishFrequencyTable(counts, out); + return out.*; } /// Build a frequency table by streaming over a StringHashMap of content. /// Iterates file-by-file — no concatenation, zero extra memory. pub fn buildFrequencyTableFromMap(contents: *const std.StringHashMap([]const u8)) [256][256]u16 { - var counts: [256][256]u64 = .{.{0} ** 256} ** 256; + const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(counts); + const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(out); + counts.* = .{.{0} ** 256} ** 256; var iter = contents.valueIterator(); while (iter.next()) |content_ptr| { const content = content_ptr.*; @@ -1795,10 +1811,11 @@ pub fn buildFrequencyTableFromMap(contents: *const std.StringHashMap([]const u8) counts[content[i]][content[i + 1]] += 1; } } - return finishFrequencyTable(&counts); + finishFrequencyTable(counts, out); + return out.*; } -fn finishFrequencyTable(counts: *const [256][256]u64) [256][256]u16 { +fn finishFrequencyTable(counts: *const [256][256]u64, table: *[256][256]u16) void { var max_count: u64 = 1; for (counts) |row| { for (row) |c| { @@ -1806,7 +1823,7 @@ fn finishFrequencyTable(counts: *const [256][256]u64) [256][256]u16 { } } // Invert: count 0 → 0xFE00 (rare, high); max_count → 0x1000 (common, low). - var table: [256][256]u16 = .{.{0xFE00} ** 256} ** 256; + table.* = .{.{0xFE00} ** 256} ** 256; for (0..256) |a| { for (0..256) |b| { const c = counts[a][b]; @@ -1816,7 +1833,6 @@ fn finishFrequencyTable(counts: *const [256][256]u64) [256][256]u16 { table[a][b] = @intCast(@min(w, 0xFE00)); } } - return table; } /// Persist a frequency table as a raw binary blob to `/pair_freq.bin`. diff --git a/src/main.zig b/src/main.zig index aa99261..066aa38 100644 --- a/src/main.zig +++ b/src/main.zig @@ -28,22 +28,14 @@ const Out = struct { } }; -/// The real entry point. Zig may merge all command-branch stack frames into -/// one, producing a ~33 MB frame that overflows the default 16 MB OS stack. -/// We trampoline through a thread with an explicit 64 MB stack. +/// Trampoline: LLVM merges all branch stack frames (tree, serve, mcp) +/// into main()'s frame, creating a ~128MB frame on Windows that exceeds +/// guard page stride → STATUS_STACK_OVERFLOW. noinline prevents this. pub fn main() !void { - const thread = try std.Thread.spawn(.{ .stack_size = 64 * 1024 * 1024 }, mainInner, .{}); - thread.join(); + return mainImpl(); } -fn mainInner() void { - mainImpl() catch |err| { - std.debug.print("fatal: {s}\n", .{@errorName(err)}); - std.process.exit(1); - }; -} - -fn mainImpl() !void { +noinline fn mainImpl() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -231,7 +223,7 @@ fn mainImpl() !void { root = "."; } - var root_buf: [std.fs.max_path_bytes]u8 = undefined; + var root_buf: [compat.path_buf_size]u8 = undefined; const abs_root = resolveRoot(root, &root_buf) catch { out.p("{s}\xe2\x9c\x97{s} cannot resolve root: {s}{s}{s}\n", .{ s.red, s.reset, s.bold, root, s.reset, @@ -587,17 +579,16 @@ fn mainImpl() !void { defer shutdown.store(true, .release); var scan_already_done = std.atomic.Value(bool).init(true); - const queue = try allocator.create(watcher.EventQueue); - defer allocator.destroy(queue); - queue.* = watcher.EventQueue{}; - const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, queue, root, &shutdown, &scan_already_done }); + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); + const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, &queue, root, &shutdown, &scan_already_done }); defer watch_thread.join(); const reap_thread = try std.Thread.spawn(.{}, reapLoop, .{ &agents, &shutdown }); defer reap_thread.join(); std.log.info("codedb: {d} files indexed, listening on :{d}", .{ store.currentSeq(), port }); - try server.serve(allocator, &store, &agents, &explorer, queue, port); + try server.serve(allocator, &store, &agents, &explorer, &queue, port); } else if (std.mem.eql(u8, cmd, "mcp")) { var agents = AgentRegistry.init(allocator); defer agents.deinit(); @@ -630,9 +621,8 @@ fn mainImpl() !void { var shutdown = std.atomic.Value(bool).init(false); var scan_done = std.atomic.Value(bool).init(snapshot_loaded); - const queue = try allocator.create(watcher.EventQueue); - defer allocator.destroy(queue); - queue.* = watcher.EventQueue{}; + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); var scan_thread: ?std.Thread = null; const startup_t0 = std.time.milliTimestamp(); if (!snapshot_loaded) { @@ -642,7 +632,7 @@ fn mainImpl() !void { telem.recordCodebaseStats(&explorer, startup_time_ms); } - const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, queue, root, &shutdown, &scan_done }); + const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, &queue, root, &shutdown, &scan_done }); const idle_thread = try std.Thread.spawn(.{}, idleWatchdog, .{&shutdown}); std.log.info("codedb mcp: root={s} files={d} data={s}", .{ abs_root, store.currentSeq(), data_dir }); @@ -668,7 +658,7 @@ fn isCommand(arg: []const u8) bool { return false; } -fn resolveRoot(root: []const u8, buf: *[std.fs.max_path_bytes]u8) ![]const u8 { +fn resolveRoot(root: []const u8, buf: *[compat.path_buf_size]u8) ![]const u8 { if (std.mem.eql(u8, root, ".")) { return std.fs.cwd().realpath(".", buf) catch return error.ResolveFailed; } diff --git a/src/tests.zig b/src/tests.zig index 19647c5..58851dc 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -1049,7 +1049,8 @@ test "explorer: removeFile frees owned map key" { try testing.expect(explorer.dep_graph.count() == 0); } test "watcher: queue overflow is explicit" { - var queue = watcher.EventQueue{}; + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); var pushed: usize = 0; while (true) : (pushed += 1) { @@ -1068,7 +1069,8 @@ test "watcher: queue overflow is explicit" { } test "watcher: queue event copies path bytes" { - var queue = watcher.EventQueue{}; + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); const original = try testing.allocator.dupe(u8, "tmp/deleted.zig"); try testing.expect(queue.push(watcher.FsEvent.init(original, .deleted, 99) orelse unreachable)); testing.allocator.free(original); @@ -1452,7 +1454,8 @@ test "regression: searchContent frees empty trigram candidate slice" { } test "regression: queue push stays non-blocking when full" { - var queue = watcher.EventQueue{}; + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); var pushed: usize = 0; while (true) : (pushed += 1) { diff --git a/src/watcher.zig b/src/watcher.zig index 7492085..4b75ce1 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -10,14 +10,14 @@ pub const EventKind = enum(u8) { }; pub const FsEvent = struct { - path_buf: [std.fs.max_path_bytes]u8 = undefined, + path_buf: [compat.path_buf_size]u8 = undefined, path_len: usize, kind: EventKind, seq: u64, pub fn init(src_path: []const u8, kind: EventKind, seq: u64) ?FsEvent { // Gracefully skip paths exceeding the max instead of panicking. - if (src_path.len > std.fs.max_path_bytes) return null; + if (src_path.len > compat.path_buf_size) return null; var event = FsEvent{ .path_len = src_path.len, .kind = kind, @@ -35,11 +35,23 @@ pub const FsEvent = struct { pub const EventQueue = struct { const CAPACITY = 4096; - events: [CAPACITY]?FsEvent = [_]?FsEvent{null} ** CAPACITY, + // Heap-allocated: each ?FsEvent is ~1KB on Windows (compat.path_buf_size), + // so an inline [4096]?FsEvent would be ~4MB — risky on constrained stacks. + events: *[CAPACITY]?FsEvent, head: std.atomic.Value(usize) = std.atomic.Value(usize).init(0), tail: std.atomic.Value(usize) = std.atomic.Value(usize).init(0), mu: std.Thread.Mutex = .{}, + pub fn init() !EventQueue { + const events = try std.heap.page_allocator.create([CAPACITY]?FsEvent); + events.* = [_]?FsEvent{null} ** CAPACITY; + return .{ .events = events }; + } + + pub fn deinit(self: *EventQueue) void { + std.heap.page_allocator.destroy(self.events); + } + pub fn push(self: *EventQueue, event: FsEvent) bool { self.mu.lock(); defer self.mu.unlock(); From 4ee1e93a600a216560e035dcb6fc715971b9c7a6 Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 10:52:39 -0400 Subject: [PATCH 02/11] fix(windows): guard POSIX-only poll/pipe in idleWatchdog and tests Upstream issue-148 added stdin HUP detection via std.posix.poll() and pipe tests. These use POSIX APIs unavailable on Windows. Guard the poll block with comptime os check (Windows uses idle timeout only) and skip pipe tests on Windows. --- src/main.zig | 29 ++++++++++++++++------------- src/tests.zig | 17 +++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/main.zig b/src/main.zig index 066aa38..becb0de 100644 --- a/src/main.zig +++ b/src/main.zig @@ -863,22 +863,25 @@ fn idleWatchdog(shutdown: *std.atomic.Value(bool)) void { std.Thread.sleep(std.time.ns_per_s); } - // Quick liveness check: poll stdin for POLLHUP (client disconnected) + // Quick liveness check: poll stdin for HUP (client disconnected). + // Windows stdin is not a socket, so poll() is POSIX-only. const stdin = std.fs.File.stdin(); - var poll_fds = [_]std.posix.pollfd{.{ - .fd = stdin.handle, - .events = std.posix.POLL.IN | std.posix.POLL.HUP, - .revents = 0, - }}; - const poll_result = std.posix.poll(&poll_fds, 0) catch 0; - if (poll_result > 0 and (poll_fds[0].revents & std.posix.POLL.HUP) != 0) { - std.log.info("stdin closed (client disconnected), exiting", .{}); - stdin.close(); - shutdown.store(true, .release); - return; + if (comptime @import("builtin").os.tag != .windows) { + var poll_fds = [_]std.posix.pollfd{.{ + .fd = stdin.handle, + .events = std.posix.POLL.IN | std.posix.POLL.HUP, + .revents = 0, + }}; + const poll_result = std.posix.poll(&poll_fds, 0) catch 0; + if (poll_result > 0 and (poll_fds[0].revents & std.posix.POLL.HUP) != 0) { + std.log.info("stdin closed (client disconnected), exiting", .{}); + stdin.close(); + shutdown.store(true, .release); + return; + } } - // Fallback: idle timeout + // Idle timeout (primary exit mechanism on Windows) const last = mcp.last_activity.load(.acquire); if (last == 0) continue; const now = std.time.milliTimestamp(); diff --git a/src/tests.zig b/src/tests.zig index 58851dc..db3b7db 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -4580,23 +4580,20 @@ test "issue-148: idle timeout is 10 minutes" { } test "issue-148: POLLHUP detects closed pipe" { - // Verify the polling infrastructure works for pipe-based transports + if (comptime @import("builtin").os.tag == .windows) return error.SkipZigTest; const pipe = try std.posix.pipe(); - defer std.posix.close(pipe[0]); - - // Close write end — simulates client disconnect std.posix.close(pipe[1]); - // Poll should detect POLLHUP on the read end - var fds = [_]std.posix.pollfd{.{ + var poll_fds = [_]std.posix.pollfd{.{ .fd = pipe[0], - .events = std.posix.POLL.IN, + .events = std.posix.POLL.IN | std.posix.POLL.HUP, .revents = 0, }}; - const n = try std.posix.poll(&fds, 100); // 100ms timeout - try testing.expect(n > 0); - try testing.expect((fds[0].revents & std.posix.POLL.HUP) != 0); + const result = try std.posix.poll(&poll_fds, 0); + try testing.expect(result > 0); + try testing.expect((poll_fds[0].revents & std.posix.POLL.HUP) != 0); + std.posix.close(pipe[0]); } test "issue-148: idle watchdog exits on shutdown signal" { From 98f61eb7e6a6ae1aaa78de4ccaf50644ae0e5449 Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 10:58:31 -0400 Subject: [PATCH 03/11] fix(index): update stale docstring for heap-allocated frequency tables --- src/index.zig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/index.zig b/src/index.zig index a0470d1..58d57eb 100644 --- a/src/index.zig +++ b/src/index.zig @@ -1777,8 +1777,9 @@ pub fn buildFrequencyTable(content: []const u8) [256][256]u16 { } /// Build a frequency table by streaming over multiple content slices. -/// Zero extra memory — counts pairs within each slice, skipping cross-slice -/// boundaries (negligible loss for large corpora). +/// Heap-allocates working buffers to avoid stack overflow on Windows. +/// Counts pairs within each slice, skipping cross-slice boundaries +/// (negligible loss for large corpora). pub fn buildFrequencyTableFromSlices(slices: []const []const u8) [256][256]u16 { const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq; defer std.heap.page_allocator.destroy(counts); From e8697075911e042ffc7d17411955d83eb288ffb4 Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 19:42:02 -0400 Subject: [PATCH 04/11] fix(windows): guard POSIX-only mmap in MmapTrigramIndex and tests Upstream's mmap trigram index (#167) uses std.posix.mmap/munmap which are unavailable on Windows. Guard initFromDisk to return null on Windows (falling back to heap-based index) and skip mmap-dependent tests. --- src/index.zig | 8 ++++++-- src/tests.zig | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/index.zig b/src/index.zig index 58d57eb..5007980 100644 --- a/src/index.zig +++ b/src/index.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const compat = @import("compat.zig"); // ── Inverted word index ───────────────────────────────────── @@ -964,6 +965,7 @@ pub const MmapTrigramIndex = struct { allocator: std.mem.Allocator, pub fn initFromDisk(dir_path: []const u8, allocator: std.mem.Allocator) ?MmapTrigramIndex { + if (comptime builtin.os.tag == .windows) return null; return initFromDiskInner(dir_path, allocator) catch null; } @@ -1082,8 +1084,10 @@ pub const MmapTrigramIndex = struct { for (self.file_table) |p| self.allocator.free(p); self.allocator.free(self.file_table); self.file_set.deinit(); - std.posix.munmap(self.postings_data); - std.posix.munmap(self.lookup_data); + if (comptime builtin.os.tag != .windows) { + std.posix.munmap(self.postings_data); + std.posix.munmap(self.lookup_data); + } } pub fn fileCount(self: *const MmapTrigramIndex) u32 { diff --git a/src/tests.zig b/src/tests.zig index db3b7db..ec01373 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -4670,6 +4670,7 @@ const MmapTrigramIndex = @import("index.zig").MmapTrigramIndex; const AnyTrigramIndex = @import("index.zig").AnyTrigramIndex; test "issue-164: mmap trigram index returns same candidates as heap index" { + if (comptime @import("builtin").os.tag == .windows) return error.SkipZigTest; var arena = std.heap.ArenaAllocator.init(testing.allocator); defer arena.deinit(); const allocator = arena.allocator(); @@ -4710,6 +4711,7 @@ test "issue-164: mmap trigram index returns same candidates as heap index" { } test "issue-164: mmap binary search on sorted lookup table" { + if (comptime @import("builtin").os.tag == .windows) return error.SkipZigTest; var arena = std.heap.ArenaAllocator.init(testing.allocator); defer arena.deinit(); const allocator = arena.allocator(); @@ -4750,6 +4752,7 @@ test "issue-164: mmap handles missing files gracefully" { } test "issue-164: AnyTrigramIndex dispatches to mmap variant" { + if (comptime @import("builtin").os.tag == .windows) return error.SkipZigTest; var arena = std.heap.ArenaAllocator.init(testing.allocator); defer arena.deinit(); const allocator = arena.allocator(); From f97ebdf33fa511e2b49ecd74018dd7d2d10d192f Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 10:18:29 -0400 Subject: [PATCH 05/11] =?UTF-8?q?feat(windows):=20core=20I/O=20=E2=80=94?= =?UTF-8?q?=20file=20locking=20and=20path=20separators?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform-specific I/O changes for Windows file system operations. - store.zig: use NtLockFile/NtUnlockFile on Windows (POSIX flock on others) - watcher.zig: add isSep() to handle both / and \ path separators - watcher.zig: backslash-aware shouldSkip() for Windows paths - watcher.zig: use %TEMP%/%TMP% for codedb-notify path on Windows - watcher.zig: use setEndPos() instead of ftruncate (cross-platform) - watcher.zig: trim both / and \ when making paths relative to root --- src/store.zig | 20 +++++++++++++++----- src/watcher.zig | 29 ++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/store.zig b/src/store.zig index 75cc6bf..12dc9a9 100644 --- a/src/store.zig +++ b/src/store.zig @@ -80,11 +80,21 @@ pub const Store = struct { if (diff) |d| { if (self.data_log) |log| { // Advisory lock for cross-process safety - const locked = blk: { - log.lock(.exclusive) catch break :blk false; - break :blk true; - }; - defer if (locked) log.unlock(); + if (comptime @import("builtin").os.tag == .windows) { + const ntdll = std.os.windows.ntdll; + const max_off: i64 = std.math.maxInt(i64); + const zero_off: i64 = 0; + var iosb: std.os.windows.IO_STATUS_BLOCK = undefined; + _ = ntdll.NtLockFile(log.handle, null, null, null, &iosb, &zero_off, &max_off, null, 0, 1); + defer { + var iosb2: std.os.windows.IO_STATUS_BLOCK = undefined; + _ = ntdll.NtUnlockFile(log.handle, &iosb2, &zero_off, &max_off, null); + } + } else { + const fd = log.handle; + _ = std.posix.flock(fd, std.posix.LOCK.EX) catch {}; + defer _ = std.posix.flock(fd, std.posix.LOCK.UN) catch {}; + } // Re-stat to get current end position (another process may have appended) const stat = compat.fileStat(log) catch return error.Unexpected; diff --git a/src/watcher.zig b/src/watcher.zig index 4b75ce1..8901331 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -130,6 +130,10 @@ const skip_dirs = [_][]const u8{ ".bundle", }; +fn isSep(c: u8) bool { + return c == '/' or (comptime @import("builtin").os.tag == .windows and c == '\\'); +} + fn shouldSkip(path: []const u8) bool { // Check each path component against skip list var rest = path; @@ -137,13 +141,14 @@ fn shouldSkip(path: []const u8) bool { for (skip_dirs) |skip| { if (rest.len >= skip.len and std.mem.eql(u8, rest[0..skip.len], skip) and - (rest.len == skip.len or rest[skip.len] == '/')) + (rest.len == skip.len or isSep(rest[skip.len]))) return true; } - // Advance to next component - if (std.mem.indexOfScalar(u8, rest, '/')) |sep| { - rest = rest[sep + 1 ..]; + // Advance to next component (handle both / and \ separators) + const sep = for (rest, 0..) |c, i| { + if (isSep(c)) break i; } else break; + rest = rest[sep + 1 ..]; } return false; } @@ -665,7 +670,13 @@ fn indexFileContent(explorer: *Explorer, dir: std.fs.Dir, path: []const u8, allo fn drainNotifyFile(store: *Store, explorer: *Explorer, queue: *EventQueue, known: *FileMap, root: []const u8, alloc: std.mem.Allocator) void { // Atomically read + truncate - const notify_path = "/tmp/codedb-notify"; + const notify_path = if (comptime @import("builtin").os.tag == .windows) blk: { + const tmp = std.process.getEnvVarOwned(alloc, "TEMP") catch + std.process.getEnvVarOwned(alloc, "TMP") catch return; + defer alloc.free(tmp); + break :blk std.fmt.allocPrint(alloc, "{s}\\codedb-notify", .{tmp}) catch return; + } else "/tmp/codedb-notify"; + defer if (comptime @import("builtin").os.tag == .windows) alloc.free(notify_path); const file = std.fs.cwd().openFile(notify_path, .{ .mode = .read_write }) catch return; defer file.close(); @@ -673,9 +684,9 @@ fn drainNotifyFile(store: *Store, explorer: *Explorer, queue: *EventQueue, known defer alloc.free(data); if (data.len == 0) return; - // Truncate after reading + // Truncate after reading (setEndPos is cross-platform) file.seekTo(0) catch return; - std.posix.ftruncate(file.handle, 0) catch return; + file.setEndPos(0) catch return; // Re-index each notified path var dir = std.fs.cwd().openDir(root, .{}) catch return; @@ -686,9 +697,9 @@ fn drainNotifyFile(store: *Store, explorer: *Explorer, queue: *EventQueue, known const path = std.mem.trim(u8, line, " \t\r"); if (path.len == 0) continue; - // Make path relative to root if it's absolute + // Make path relative to root if it's absolute (handle both / and \ separators) const rel = if (std.mem.startsWith(u8, path, root)) - std.mem.trimLeft(u8, path[root.len..], "/") + std.mem.trimLeft(u8, path[root.len..], "/\\") else path; From 57a84be819b97aedd407a3652a944258214e6b4f Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 11:00:04 -0400 Subject: [PATCH 06/11] fix(windows): check NtLockFile status, normalize notify paths - store.zig: check NtLockFile return status; only unlock if lock succeeded; skip write-under-lock if lock fails (matches upstream's advisory lock pattern) - watcher.zig: normalize backslash paths to forward slashes in drainNotifyFile to avoid duplicate entries in the explorer (walker uses / convention, notify file may contain \ on Windows) --- src/store.zig | 23 +++++++++++++++-------- src/watcher.zig | 15 ++++++++++++++- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/store.zig b/src/store.zig index 12dc9a9..708e6fb 100644 --- a/src/store.zig +++ b/src/store.zig @@ -80,21 +80,28 @@ pub const Store = struct { if (diff) |d| { if (self.data_log) |log| { // Advisory lock for cross-process safety - if (comptime @import("builtin").os.tag == .windows) { + const locked = if (comptime @import("builtin").os.tag == .windows) blk: { const ntdll = std.os.windows.ntdll; const max_off: i64 = std.math.maxInt(i64); const zero_off: i64 = 0; var iosb: std.os.windows.IO_STATUS_BLOCK = undefined; - _ = ntdll.NtLockFile(log.handle, null, null, null, &iosb, &zero_off, &max_off, null, 0, 1); - defer { + const status = ntdll.NtLockFile(log.handle, null, null, null, &iosb, &zero_off, &max_off, null, 0, 1); + break :blk (status == .SUCCESS); + } else blk: { + log.lock(.exclusive) catch break :blk false; + break :blk true; + }; + defer if (locked) { + if (comptime @import("builtin").os.tag == .windows) { + const ntdll = std.os.windows.ntdll; + const max_off: i64 = std.math.maxInt(i64); + const zero_off: i64 = 0; var iosb2: std.os.windows.IO_STATUS_BLOCK = undefined; _ = ntdll.NtUnlockFile(log.handle, &iosb2, &zero_off, &max_off, null); + } else { + log.unlock(); } - } else { - const fd = log.handle; - _ = std.posix.flock(fd, std.posix.LOCK.EX) catch {}; - defer _ = std.posix.flock(fd, std.posix.LOCK.UN) catch {}; - } + }; // Re-stat to get current end position (another process may have appended) const stat = compat.fileStat(log) catch return error.Unexpected; diff --git a/src/watcher.zig b/src/watcher.zig index 8901331..51687bd 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -698,11 +698,24 @@ fn drainNotifyFile(store: *Store, explorer: *Explorer, queue: *EventQueue, known if (path.len == 0) continue; // Make path relative to root if it's absolute (handle both / and \ separators) - const rel = if (std.mem.startsWith(u8, path, root)) + const raw_rel = if (std.mem.startsWith(u8, path, root)) std.mem.trimLeft(u8, path[root.len..], "/\\") else path; + // Normalize backslashes to forward slashes so the path matches the + // walker's convention and avoids duplicate entries in the explorer. + var norm_buf: [compat.path_buf_size]u8 = undefined; + const rel = if (comptime @import("builtin").os.tag == .windows) blk: { + if (raw_rel.len > norm_buf.len) continue; + @memcpy(norm_buf[0..raw_rel.len], raw_rel); + const s = norm_buf[0..raw_rel.len]; + for (s) |*c| { + if (c.* == '\\') c.* = '/'; + } + break :blk s; + } else raw_rel; + indexFileContent(explorer, dir, rel, alloc, false) catch continue; // Update known-file state so incrementalDiff doesn't double-process From 8cdcc66e05f559c166efc16936137d600d957c3b Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 10:23:32 -0400 Subject: [PATCH 07/11] feat(windows): enable MCP mode and telemetry on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove barriers preventing MCP and telemetry from working on Windows. - main.zig: add is_windows const; block HTTP serve with error message directing users to mcp mode; HOME→USERPROFILE fallback in getDataDir - mcp.zig: import compat; noinline on run/dispatch to prevent LLVM frame merging; never_inline for heavy handlers (handleRemote, handleIndex, handleProjects); HOME→USERPROFILE fallback; path_buf_size; pass cwd to Child.run in handleIndex - snapshot.zig: HOME→USERPROFILE fallback in writeSnapshotDual - telemetry.zig: use cmd.exe /C for cloud sync on Windows; use compat.path_buf_size for buffer declarations --- src/main.zig | 41 +++++++++++++++++++++++++---------------- src/mcp.zig | 42 +++++++++++++++++++++++------------------- src/snapshot.zig | 3 ++- src/telemetry.zig | 37 +++++++++++++++++++------------------ 4 files changed, 69 insertions(+), 54 deletions(-) diff --git a/src/main.zig b/src/main.zig index becb0de..974bc5e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,6 +4,7 @@ const Store = @import("store.zig").Store; const AgentRegistry = @import("agent.zig").AgentRegistry; const Explorer = @import("explore.zig").Explorer; const watcher = @import("watcher.zig"); +const is_windows = @import("builtin").os.tag == .windows; const server = @import("server.zig"); const mcp_server = @import("mcp.zig"); const sty = @import("style.zig"); @@ -570,25 +571,32 @@ noinline fn mainImpl() !void { s.reset, }); } else if (std.mem.eql(u8, cmd, "serve")) { - const port: u16 = 7719; - var agents = AgentRegistry.init(allocator); - defer agents.deinit(); - _ = try agents.register("__filesystem__"); + if (comptime is_windows) { + out.p("{s}\xe2\x9c\x97{s} HTTP serve not supported on Windows. Use {s}mcp{s} mode.\n", .{ + s.red, s.reset, s.cyan, s.reset, + }); + std.process.exit(1); + } else { + const port: u16 = 7719; + var agents = AgentRegistry.init(allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); - var shutdown = std.atomic.Value(bool).init(false); - defer shutdown.store(true, .release); - var scan_already_done = std.atomic.Value(bool).init(true); + var shutdown = std.atomic.Value(bool).init(false); + defer shutdown.store(true, .release); + var scan_already_done = std.atomic.Value(bool).init(true); - var queue = try watcher.EventQueue.init(); - defer queue.deinit(); - const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, &queue, root, &shutdown, &scan_already_done }); - defer watch_thread.join(); + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); + const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, &queue, root, &shutdown, &scan_already_done }); + defer watch_thread.join(); - const reap_thread = try std.Thread.spawn(.{}, reapLoop, .{ &agents, &shutdown }); - defer reap_thread.join(); + const reap_thread = try std.Thread.spawn(.{}, reapLoop, .{ &agents, &shutdown }); + defer reap_thread.join(); - std.log.info("codedb: {d} files indexed, listening on :{d}", .{ store.currentSeq(), port }); - try server.serve(allocator, &store, &agents, &explorer, &queue, port); + std.log.info("codedb: {d} files indexed, listening on :{d}", .{ store.currentSeq(), port }); + try server.serve(allocator, &store, &agents, &explorer, &queue, port); + } } else if (std.mem.eql(u8, cmd, "mcp")) { var agents = AgentRegistry.init(allocator); defer agents.deinit(); @@ -667,7 +675,8 @@ fn resolveRoot(root: []const u8, buf: *[compat.path_buf_size]u8) ![]const u8 { fn getDataDir(allocator: std.mem.Allocator, abs_root: []const u8) ![]u8 { const hash = std.hash.Wyhash.hash(0, abs_root); - const home = std.process.getEnvVarOwned(allocator, "HOME") catch { + const home = std.process.getEnvVarOwned(allocator, "HOME") catch + std.process.getEnvVarOwned(allocator, "USERPROFILE") catch { return std.fmt.allocPrint(allocator, "{s}/.codedb", .{abs_root}); }; defer allocator.free(home); diff --git a/src/mcp.zig b/src/mcp.zig index 4c009a4..3294510 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -4,6 +4,7 @@ // Uses mcp-zig for protocol utilities; adds roots support for workspace awareness. const std = @import("std"); +const compat = @import("compat.zig"); const mcp_lib = @import("mcp"); const mcpj = mcp_lib.json; const Root = mcp_lib.mcp.Root; @@ -96,7 +97,7 @@ const ProjectCache = struct { new_entry.store = Store.init(self.alloc); new_entry.last_used = now; - var snap_buf: [std.fs.max_path_bytes]u8 = undefined; + var snap_buf: [compat.path_buf_size]u8 = undefined; const snap_path = std.fmt.bufPrint(&snap_buf, "{s}/codedb.snapshot", .{p}) catch { new_entry.store.deinit(); new_entry.explorer.deinit(); @@ -108,9 +109,10 @@ const ProjectCache = struct { if (!snapshot_mod.loadSnapshot(snap_path, &new_entry.explorer, &new_entry.store, self.alloc)) { // Fallback: try central store at ~/.codedb/projects/{hash}/codedb.snapshot const hash = std.hash.Wyhash.hash(0, p); - var central_buf: [std.fs.max_path_bytes]u8 = undefined; + var central_buf: [compat.path_buf_size]u8 = undefined; const loaded_central = blk: { - const home = std.process.getEnvVarOwned(self.alloc, "HOME") catch break :blk false; + const home = std.process.getEnvVarOwned(self.alloc, "HOME") catch + std.process.getEnvVarOwned(self.alloc, "USERPROFILE") catch break :blk false; defer self.alloc.free(home); const central = std.fmt.bufPrint(¢ral_buf, "{s}/.codedb/projects/{x}/codedb.snapshot", .{ home, hash }) catch break :blk false; break :blk snapshot_mod.loadSnapshot(central, &new_entry.explorer, &new_entry.store, self.alloc); @@ -318,7 +320,7 @@ const Session = struct { } }; -pub fn run( +pub noinline fn run( alloc: std.mem.Allocator, store: *Store, explorer: *Explorer, @@ -385,7 +387,7 @@ pub fn run( } else if (mcpj.eql(method, "tools/list")) { if (!is_notification) writeResult(alloc, stdout, id, tools_list); } else if (mcpj.eql(method, "tools/call")) { - handleCall(alloc, root, stdout, id, store, explorer, agents, &cache, telem); + @call(.never_inline, handleCall, .{ alloc, root, stdout, id, store, explorer, agents, &cache, telem }); } else if (mcpj.eql(method, "ping")) { if (!is_notification) writeResult(alloc, stdout, id, "{}"); } else { @@ -569,7 +571,7 @@ fn handleCall( writeResult(alloc, stdout, id, result.items); } -fn dispatch( +noinline fn dispatch( alloc: std.mem.Allocator, tool: Tool, args: *const std.json.ObjectMap, @@ -594,16 +596,16 @@ fn dispatch( .codedb_word => handleWord(alloc, args, out, ctx.explorer), .codedb_hot => handleHot(alloc, args, out, ctx.store, ctx.explorer), .codedb_deps => handleDeps(alloc, args, out, ctx.explorer), - .codedb_read => handleRead(alloc, args, out, ctx.explorer), - .codedb_edit => handleEdit(alloc, args, out, default_store, default_explorer, agents), + .codedb_read => @call(.never_inline, handleRead, .{ alloc, args, out, ctx.explorer }), + .codedb_edit => @call(.never_inline, handleEdit, .{ alloc, args, out, default_store, default_explorer, agents }), .codedb_changes => handleChanges(alloc, args, out, default_store), .codedb_status => handleStatus(alloc, out, ctx.store, ctx.explorer), - .codedb_snapshot => handleSnapshot(alloc, out, ctx.explorer, ctx.store), - .codedb_bundle => handleBundle(alloc, args, out, ctx.store, ctx.explorer, agents, cache), - .codedb_remote => handleRemote(alloc, args, out), - .codedb_projects => handleProjects(alloc, out), - .codedb_index => handleIndex(alloc, args, out), - .codedb_find => handleFind(alloc, args, out, ctx.explorer), + .codedb_snapshot => @call(.never_inline, handleSnapshot, .{ alloc, out, ctx.explorer, ctx.store }), + .codedb_bundle => @call(.never_inline, handleBundle, .{ alloc, args, out, ctx.store, ctx.explorer, agents, cache }), + .codedb_remote => @call(.never_inline, handleRemote, .{ alloc, args, out }), + .codedb_projects => @call(.never_inline, handleProjects, .{ alloc, out }), + .codedb_index => @call(.never_inline, handleIndex, .{ alloc, args, out }), + .codedb_find => @call(.never_inline, handleFind, .{ alloc, args, out, ctx.explorer }), } } @@ -1165,8 +1167,9 @@ fn handleRemote(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: // ── Local project tools ───────────────────────────────────────────────────── fn handleProjects(alloc: std.mem.Allocator, out: *std.ArrayList(u8)) void { - const home = std.process.getEnvVarOwned(alloc, "HOME") catch { - out.appendSlice(alloc, "error: cannot read HOME") catch {}; + const home = std.process.getEnvVarOwned(alloc, "HOME") catch + std.process.getEnvVarOwned(alloc, "USERPROFILE") catch { + out.appendSlice(alloc, "error: cannot read HOME/USERPROFILE") catch {}; return; }; defer alloc.free(home); @@ -1189,7 +1192,7 @@ fn handleProjects(alloc: std.mem.Allocator, out: *std.ArrayList(u8)) void { if (entry.kind != .directory) continue; // Read project.txt to get the project path - var path_buf: [std.fs.max_path_bytes]u8 = undefined; + var path_buf: [compat.path_buf_size]u8 = undefined; const sub_path = std.fmt.bufPrint(&path_buf, "{s}/project.txt", .{entry.name}) catch continue; const project_file = dir.openFile(sub_path, .{}) catch continue; defer project_file.close(); @@ -1200,7 +1203,7 @@ fn handleProjects(alloc: std.mem.Allocator, out: *std.ArrayList(u8)) void { // Check if snapshot exists in the project directory var snap_exists = false; - var snap_path_buf: [std.fs.max_path_bytes]u8 = undefined; + var snap_path_buf: [compat.path_buf_size]u8 = undefined; const snap_path = std.fmt.bufPrint(&snap_path_buf, "{s}/codedb.snapshot", .{project_path}) catch project_path; if (std.fs.cwd().access(snap_path, .{})) |_| { snap_exists = true; @@ -1226,7 +1229,7 @@ fn handleIndex(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: * }; // Resolve to absolute path - var abs_buf: [std.fs.max_path_bytes]u8 = undefined; + var abs_buf: [compat.path_buf_size]u8 = undefined; const abs_path = std.fs.cwd().realpath(path, &abs_buf) catch { out.appendSlice(alloc, "error: cannot resolve path: ") catch {}; out.appendSlice(alloc, path) catch {}; @@ -1257,6 +1260,7 @@ fn handleIndex(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: * const result = std.process.Child.run(.{ .allocator = alloc, .argv = &.{ exe_path, abs_path, "snapshot" }, + .cwd = abs_path, .max_output_bytes = 64 * 1024, }) catch { out.appendSlice(alloc, "error: failed to run indexer") catch {}; diff --git a/src/snapshot.zig b/src/snapshot.zig index 8dc5e63..9733391 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -610,7 +610,8 @@ pub fn writeSnapshotDual( try writeSnapshot(explorer, root_path, output_path, allocator); const hash = std.hash.Wyhash.hash(0, root_path); - const home = std.process.getEnvVarOwned(allocator, "HOME") catch return; + const home = std.process.getEnvVarOwned(allocator, "HOME") catch + std.process.getEnvVarOwned(allocator, "USERPROFILE") catch return; defer allocator.free(home); const secondary = std.fmt.allocPrint(allocator, "{s}/.codedb/projects/{x}/codedb.snapshot", .{ home, hash }) catch return; defer allocator.free(secondary); diff --git a/src/telemetry.zig b/src/telemetry.zig index eaaa17b..0b5c466 100644 --- a/src/telemetry.zig +++ b/src/telemetry.zig @@ -38,7 +38,7 @@ pub const Telemetry = struct { file: ?std.fs.File = null, enabled: bool = true, buf: [4096]u8 = undefined, - path_buf: [std.fs.max_path_bytes]u8 = undefined, + path_buf: [compat.path_buf_size]u8 = undefined, path_len: usize = 0, call_count: u32 = 0, write_lock: std.Thread.Mutex = .{}, @@ -162,23 +162,24 @@ pub const Telemetry = struct { const stat = compat.dirStatFile(std.fs.cwd(), path) catch return; if (stat.size == 0) return; - // Use argv-based exec (no shell interpolation) to avoid injection - var data_arg_buf: [std.fs.max_path_bytes + 1]u8 = undefined; - const data_arg = std.fmt.bufPrint(&data_arg_buf, "@{s}", .{path}) catch return; - - var child = std.process.Child.init( - &.{ "curl", "-sf", "-X", "POST", CLOUD_URL, "-H", "Content-Type: application/json", "--data-binary", data_arg, "--max-time", "5" }, - std.heap.page_allocator, - ); - child.stdin_behavior = .Ignore; - child.stdout_behavior = .Ignore; - child.stderr_behavior = .Ignore; - _ = child.spawnAndWait() catch return; - - // Truncate the file after successful sync - if (std.fs.cwd().createFile(path, .{ .truncate = true })) |f| { - f.close(); - } else |_| {} + if (comptime @import("builtin").os.tag == .windows) { + // On Windows, use cmd.exe to run curl and truncate the file + var cmd_buf: [2048]u8 = undefined; + const cmd = std.fmt.bufPrint(&cmd_buf, "curl -sf -X POST {s} -H \"Content-Type: application/json\" --data-binary @{s} >NUL 2>&1 && type nul > {s}", .{ CLOUD_URL, path, path }) catch return; + var child = std.process.Child.init(&.{ "cmd.exe", "/C", cmd }, std.heap.page_allocator); + child.stdin_behavior = .Ignore; + child.stdout_behavior = .Ignore; + child.stderr_behavior = .Ignore; + _ = child.spawn() catch return; + } else { + var cmd_buf: [2048]u8 = undefined; + const cmd = std.fmt.bufPrint(&cmd_buf, "curl -sf -X POST {s} -H 'Content-Type: application/json' --data-binary @{s} >/dev/null 2>&1 && : > {s}", .{ CLOUD_URL, path, path }) catch return; + var child = std.process.Child.init(&.{ "/bin/sh", "-c", cmd }, std.heap.page_allocator); + child.stdin_behavior = .Ignore; + child.stdout_behavior = .Ignore; + child.stderr_behavior = .Ignore; + _ = child.spawn() catch return; + } } fn formatEvent(self: *Telemetry, ev: *const Event) !usize { From bde3bf71d34bbf0591378b3996bcfab081cd6b97 Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 11:01:42 -0400 Subject: [PATCH 08/11] fix(telemetry): use argv-based curl exec, avoid shell injection Revert from shell command string interpolation (cmd.exe /C, /bin/sh -c) to argv-based curl execution. This: - Eliminates command injection risk from path interpolation - Avoids zombie processes (spawnAndWait instead of spawn) - Works on both Windows (curl ships with Win10+) and POSIX - Truncates telemetry file after successful sync via createFile --- src/telemetry.zig | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/telemetry.zig b/src/telemetry.zig index 0b5c466..771a368 100644 --- a/src/telemetry.zig +++ b/src/telemetry.zig @@ -162,24 +162,24 @@ pub const Telemetry = struct { const stat = compat.dirStatFile(std.fs.cwd(), path) catch return; if (stat.size == 0) return; - if (comptime @import("builtin").os.tag == .windows) { - // On Windows, use cmd.exe to run curl and truncate the file - var cmd_buf: [2048]u8 = undefined; - const cmd = std.fmt.bufPrint(&cmd_buf, "curl -sf -X POST {s} -H \"Content-Type: application/json\" --data-binary @{s} >NUL 2>&1 && type nul > {s}", .{ CLOUD_URL, path, path }) catch return; - var child = std.process.Child.init(&.{ "cmd.exe", "/C", cmd }, std.heap.page_allocator); - child.stdin_behavior = .Ignore; - child.stdout_behavior = .Ignore; - child.stderr_behavior = .Ignore; - _ = child.spawn() catch return; - } else { - var cmd_buf: [2048]u8 = undefined; - const cmd = std.fmt.bufPrint(&cmd_buf, "curl -sf -X POST {s} -H 'Content-Type: application/json' --data-binary @{s} >/dev/null 2>&1 && : > {s}", .{ CLOUD_URL, path, path }) catch return; - var child = std.process.Child.init(&.{ "/bin/sh", "-c", cmd }, std.heap.page_allocator); - child.stdin_behavior = .Ignore; - child.stdout_behavior = .Ignore; - child.stderr_behavior = .Ignore; - _ = child.spawn() catch return; - } + // Use argv-based exec (no shell interpolation) to avoid injection. + // curl is available on modern Windows 10+ and all POSIX systems. + var data_arg_buf: [compat.path_buf_size + 1]u8 = undefined; + const data_arg = std.fmt.bufPrint(&data_arg_buf, "@{s}", .{path}) catch return; + + var child = std.process.Child.init( + &.{ "curl", "-sf", "-X", "POST", CLOUD_URL, "-H", "Content-Type: application/json", "--data-binary", data_arg, "--max-time", "5" }, + std.heap.page_allocator, + ); + child.stdin_behavior = .Ignore; + child.stdout_behavior = .Ignore; + child.stderr_behavior = .Ignore; + _ = child.spawnAndWait() catch return; + + // Truncate the file after successful sync + if (std.fs.cwd().createFile(path, .{ .truncate = true })) |f| { + f.close(); + } else |_| {} } fn formatEvent(self: *Telemetry, ev: *const Event) !usize { From 5d482a866761ba49155a29c43394df3527d50816 Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 10:26:02 -0400 Subject: [PATCH 09/11] feat(windows): security hardening and cross-platform tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Windows-aware path safety checks and test updates. - mcp.zig: isPathSafe now rejects backslash prefix, drive letters (C:\), and ..\ traversal — splits on both / and \ separators - server.zig: delegate isPathSafe to mcp.zig (single canonical impl) - root_policy.zig: add case-insensitive matching for Windows paths; reject C:\Windows\Temp and AppData\Local\Temp as indexable roots - tests.zig: add isPathSafe Windows path tests; make issue-77 test cross-platform (%TEMP% on Windows, /private/tmp on macOS) --- src/mcp.zig | 18 +++++++++++------- src/root_policy.zig | 35 ++++++++++++++++++++++++++++++++++- src/server.zig | 10 +++------- src/tests.zig | 20 +++++++++++++++++++- 4 files changed, 67 insertions(+), 16 deletions(-) diff --git a/src/mcp.zig b/src/mcp.zig index 3294510..f30162f 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -1333,14 +1333,18 @@ fn handleFind(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: *s pub fn isPathSafe(path: []const u8) bool { if (path.len == 0) return false; - if (path[0] == '/') return false; - // Block null bytes (path truncation attack) - if (std.mem.indexOfScalar(u8, path, 0) != null) return false; - // Block backslash separators - if (std.mem.indexOfScalar(u8, path, '\\') != null) return false; - var it = std.mem.splitScalar(u8, path, '/'); - while (it.next()) |component| { + if (path[0] == '/' or path[0] == '\\') return false; + // Reject Windows drive letters (e.g. "C:\...") + if (path.len >= 2 and path[1] == ':') return false; + // Check for ".." traversal with both separator types + var rest: []const u8 = path; + while (rest.len > 0) { + const sep = for (rest, 0..) |c, i| { + if (c == '/' or c == '\\') break i; + } else rest.len; + const component = rest[0..sep]; if (std.mem.eql(u8, component, "..")) return false; + rest = if (sep < rest.len) rest[sep + 1 ..] else &.{}; } return true; } diff --git a/src/root_policy.zig b/src/root_policy.zig index ce150f4..0c5c7c3 100644 --- a/src/root_policy.zig +++ b/src/root_policy.zig @@ -1,13 +1,27 @@ const std = @import("std"); +const builtin = @import("builtin"); fn isExactOrChild(path: []const u8, prefix: []const u8) bool { if (!std.mem.startsWith(u8, path, prefix)) return false; - return path.len == prefix.len or path[prefix.len] == '/'; + if (path.len == prefix.len) return true; + const sep = path[prefix.len]; + return sep == '/' or sep == '\\'; +} + +fn isExactOrChildCaseInsensitive(path: []const u8, prefix: []const u8) bool { + if (path.len < prefix.len) return false; + for (path[0..prefix.len], prefix) |a, b| { + if (std.ascii.toLower(a) != std.ascii.toLower(b)) return false; + } + if (path.len == prefix.len) return true; + const sep = path[prefix.len]; + return sep == '/' or sep == '\\'; } pub fn isIndexableRoot(path: []const u8) bool { if (path.len == 0) return false; if (std.mem.eql(u8, path, "/")) return false; + // POSIX temp directories if (isExactOrChild(path, "/private/tmp")) return false; if (isExactOrChild(path, "/tmp")) return false; if (isExactOrChild(path, "/var/tmp")) return false; @@ -24,6 +38,15 @@ pub fn isIndexableRoot(path: []const u8) bool { if (std.mem.indexOfScalar(u8, rest, '/') == null and rest.len > 0) return false; } + // Windows temp directories (case-insensitive) + if (builtin.os.tag == .windows) { + if (isExactOrChildCaseInsensitive(path, "C:\\Windows\\Temp")) return false; + if (std.ascii.indexOfIgnoreCase(path, "\\AppData\\Local\\Temp")) |pos| { + const end = pos + "\\AppData\\Local\\Temp".len; + if (end == path.len or path[end] == '\\' or path[end] == '/') return false; + } + } + return true; } @@ -53,3 +76,13 @@ test "issue-80: /tmp is denied" { try testing.expect(!isIndexableRoot("/tmp/foo")); } +test "issue-80: Windows temp paths are denied" { + if (comptime builtin.os.tag != .windows) return error.SkipZigTest; + try testing.expect(!isIndexableRoot("C:\\Windows\\Temp")); + try testing.expect(!isIndexableRoot("C:\\Windows\\Temp\\codedb-test")); + try testing.expect(!isIndexableRoot("C:\\Users\\dev\\AppData\\Local\\Temp")); + try testing.expect(!isIndexableRoot("C:\\Users\\dev\\AppData\\Local\\Temp\\project")); + try testing.expect(isIndexableRoot("C:\\Users\\dev\\AppData\\Local\\TempProject")); + try testing.expect(isIndexableRoot("C:\\Users\\dev\\Projects\\myapp")); + try testing.expect(isIndexableRoot("D:\\GitHub\\codedb")); +} diff --git a/src/server.zig b/src/server.zig index 9336b9e..61f5574 100644 --- a/src/server.zig +++ b/src/server.zig @@ -588,13 +588,9 @@ fn handleConnection( // ── Response helpers ──────────────────────────────────────── fn isPathSafe(path: []const u8) bool { - if (path.len == 0) return false; - if (path[0] == '/') return false; - var it = std.mem.splitScalar(u8, path, '/'); - while (it.next()) |component| { - if (std.mem.eql(u8, component, "..")) return false; - } - return true; + // Delegate to the canonical implementation in mcp.zig + const mcp = @import("mcp.zig"); + return mcp.isPathSafe(path); } fn respondJson(conn: std.net.Server.Connection, status: []const u8, body: []const u8) void { diff --git a/src/tests.zig b/src/tests.zig index ec01373..0042993 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -1500,6 +1500,19 @@ test "isPathSafe: accepts valid relative paths" { try testing.expect(mcp.isPathSafe("a/b/c/d.txt")); } +test "isPathSafe: rejects Windows-style paths" { + const mcp = @import("mcp.zig"); + // Backslash prefix + try testing.expect(!mcp.isPathSafe("\\Windows\\System32")); + // Drive letters + try testing.expect(!mcp.isPathSafe("C:\\Windows\\System32")); + try testing.expect(!mcp.isPathSafe("D:\\secret.txt")); + // Backslash traversal + try testing.expect(!mcp.isPathSafe("foo\\..\\..\\etc\\passwd")); + // Mixed separators with traversal + try testing.expect(!mcp.isPathSafe("foo/..\\..\\secret")); +} + test "snapshot_json: snapshot builds and is valid JSON" { // Explorer uses arena for internal data var arena = std.heap.ArenaAllocator.init(testing.allocator); @@ -3564,7 +3577,12 @@ test "issue-60: telemetry disabled path is a no-op" { test "issue-77: mcp index accepts temporary-directory roots that cause pathological cache growth" { var tmp_name_buf: [128]u8 = undefined; const tmp_name = try std.fmt.bufPrint(&tmp_name_buf, "codedb-issue-77-{d}", .{std.time.microTimestamp()}); - const tmp_root = try std.fs.path.join(testing.allocator, &.{ "/private/tmp", tmp_name }); + const tmp_base = if (comptime @import("builtin").os.tag == .windows) + std.process.getEnvVarOwned(testing.allocator, "TEMP") catch return + else + try testing.allocator.dupe(u8, "/private/tmp"); + defer testing.allocator.free(tmp_base); + const tmp_root = try std.fs.path.join(testing.allocator, &.{ tmp_base, tmp_name }); defer testing.allocator.free(tmp_root); std.fs.cwd().makePath(tmp_root) catch |err| switch (err) { From a54145a445e35da941408a7d02167fb053c7625f Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 11:05:49 -0400 Subject: [PATCH 10/11] fix(security): restore NUL byte check, fix temp path boundary - mcp.zig: restore NUL byte rejection in isPathSafe (prevents path truncation attacks) - root_policy.zig: add boundary check after AppData\Local\Temp match to avoid false positives like TempProject - tests.zig: use error.SkipZigTest instead of silent return when TEMP env var is missing on Windows --- src/mcp.zig | 2 ++ src/tests.zig | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mcp.zig b/src/mcp.zig index f30162f..6bbc8e8 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -1334,6 +1334,8 @@ fn handleFind(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: *s pub fn isPathSafe(path: []const u8) bool { if (path.len == 0) return false; if (path[0] == '/' or path[0] == '\\') return false; + // Block null bytes (path truncation attack) + if (std.mem.indexOfScalar(u8, path, 0) != null) return false; // Reject Windows drive letters (e.g. "C:\...") if (path.len >= 2 and path[1] == ':') return false; // Check for ".." traversal with both separator types diff --git a/src/tests.zig b/src/tests.zig index 0042993..46f5aaa 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -3578,7 +3578,8 @@ test "issue-77: mcp index accepts temporary-directory roots that cause pathologi var tmp_name_buf: [128]u8 = undefined; const tmp_name = try std.fmt.bufPrint(&tmp_name_buf, "codedb-issue-77-{d}", .{std.time.microTimestamp()}); const tmp_base = if (comptime @import("builtin").os.tag == .windows) - std.process.getEnvVarOwned(testing.allocator, "TEMP") catch return + std.process.getEnvVarOwned(testing.allocator, "TEMP") catch + return error.SkipZigTest else try testing.allocator.dupe(u8, "/private/tmp"); defer testing.allocator.free(tmp_base); From ae08a38eea1c9589cd272efe28297b5d517f2483 Mon Sep 17 00:00:00 2001 From: Jeff Noel Date: Mon, 6 Apr 2026 20:46:14 -0400 Subject: [PATCH 11/11] fix(windows): guard std.posix.getenv in root_policy for Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream's home directory blocking (#174) uses std.posix.getenv("HOME") which is unavailable on Windows (env strings are WTF-16). Guard with comptime check — Windows home dir blocking still works via the pattern matching below (/home/user, /Users/user, /root). --- src/root_policy.zig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/root_policy.zig b/src/root_policy.zig index 0c5c7c3..a2659a8 100644 --- a/src/root_policy.zig +++ b/src/root_policy.zig @@ -27,8 +27,11 @@ pub fn isIndexableRoot(path: []const u8) bool { if (isExactOrChild(path, "/var/tmp")) return false; // Block home directory itself (not subdirectories) — prevents 17GB RAM spike (#174) - if (std.posix.getenv("HOME")) |home| { - if (home.len > 0 and std.mem.eql(u8, path, home)) return false; + // std.posix.getenv is unavailable on Windows; use USERPROFILE there. + if (comptime builtin.os.tag != .windows) { + if (std.posix.getenv("HOME")) |home| { + if (home.len > 0 and std.mem.eql(u8, path, home)) return false; + } } // Also block common home patterns directly if (std.mem.eql(u8, path, "/root")) return false;