diff --git a/build.zig b/build.zig index bd3b3b0..f146d21 100644 --- a/build.zig +++ b/build.zig @@ -30,6 +30,12 @@ pub fn build(b: *std.Build) void { // ── mcp-zig dependency ── const mcp_dep = b.dependency("mcp_zig", .{}); exe.root_module.addImport("mcp", mcp_dep.module("mcp")); + + // Windows default stack is 1MB; match the 8MB Linux default for headroom. + if (target.result.os.tag == .windows) { + exe.stack_size = 8 * 1024 * 1024; + } + b.installArtifact(exe); // ── macOS codesign (ad-hoc by default; configurable for release builds) ── diff --git a/src/compat.zig b/src/compat.zig index 634e1ed..187afdf 100644 --- a/src/compat.zig +++ b/src/compat.zig @@ -13,6 +13,11 @@ const posix = std.posix; const linux = std.os.linux; const fs = std.fs; +/// On Windows, std.fs.max_path_bytes is 32767 (\\?\-prefixed paths). +/// That's too large for stack buffers (e.g. EventQueue's 4096-element array +/// becomes ~128MB). Use a practical limit instead. +pub const path_buf_size: usize = if (builtin.os.tag == .windows) 1024 else std.fs.max_path_bytes; + /// Cached result of the runtime statx probe. var statx_supported: enum(u8) { unknown = 0, yes = 1, no = 2 } = .unknown; diff --git a/src/index.zig b/src/index.zig index 4a333b0..5007980 100644 --- a/src/index.zig +++ b/src/index.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const compat = @import("compat.zig"); // ── Inverted word index ───────────────────────────────────── @@ -964,6 +965,7 @@ pub const MmapTrigramIndex = struct { allocator: std.mem.Allocator, pub fn initFromDisk(dir_path: []const u8, allocator: std.mem.Allocator) ?MmapTrigramIndex { + if (comptime builtin.os.tag == .windows) return null; return initFromDiskInner(dir_path, allocator) catch null; } @@ -1082,8 +1084,10 @@ pub const MmapTrigramIndex = struct { for (self.file_table) |p| self.allocator.free(p); self.allocator.free(self.file_table); self.file_set.deinit(); - std.posix.munmap(self.postings_data); - std.posix.munmap(self.lookup_data); + if (comptime builtin.os.tag != .windows) { + std.posix.munmap(self.postings_data); + std.posix.munmap(self.lookup_data); + } } pub fn fileCount(self: *const MmapTrigramIndex) u32 { @@ -1760,33 +1764,50 @@ pub fn resetFrequencyTable() void { /// Build a per-project frequency table by counting byte-pair occurrences in /// `content`, then inverting counts to weights (common → low, rare → high). pub fn buildFrequencyTable(content: []const u8) [256][256]u16 { - var counts: [256][256]u64 = .{.{0} ** 256} ** 256; + // Heap-allocate counts (~512KB) and output (~128KB) to avoid stack overflow + // on Windows where the default stack is 1MB. + const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(counts); + const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(out); + counts.* = .{.{0} ** 256} ** 256; if (content.len >= 2) { for (0..content.len - 1) |i| { counts[content[i]][content[i + 1]] += 1; } } - return finishFrequencyTable(&counts); + finishFrequencyTable(counts, out); + return out.*; } /// Build a frequency table by streaming over multiple content slices. -/// Zero extra memory — counts pairs within each slice, skipping cross-slice -/// boundaries (negligible loss for large corpora). +/// Heap-allocates working buffers to avoid stack overflow on Windows. +/// Counts pairs within each slice, skipping cross-slice boundaries +/// (negligible loss for large corpora). pub fn buildFrequencyTableFromSlices(slices: []const []const u8) [256][256]u16 { - var counts: [256][256]u64 = .{.{0} ** 256} ** 256; + const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(counts); + const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(out); + counts.* = .{.{0} ** 256} ** 256; for (slices) |content| { if (content.len < 2) continue; for (0..content.len - 1) |i| { counts[content[i]][content[i + 1]] += 1; } } - return finishFrequencyTable(&counts); + finishFrequencyTable(counts, out); + return out.*; } /// Build a frequency table by streaming over a StringHashMap of content. /// Iterates file-by-file — no concatenation, zero extra memory. pub fn buildFrequencyTableFromMap(contents: *const std.StringHashMap([]const u8)) [256][256]u16 { - var counts: [256][256]u64 = .{.{0} ** 256} ** 256; + const counts = std.heap.page_allocator.create([256][256]u64) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(counts); + const out = std.heap.page_allocator.create([256][256]u16) catch return default_pair_freq; + defer std.heap.page_allocator.destroy(out); + counts.* = .{.{0} ** 256} ** 256; var iter = contents.valueIterator(); while (iter.next()) |content_ptr| { const content = content_ptr.*; @@ -1795,10 +1816,11 @@ pub fn buildFrequencyTableFromMap(contents: *const std.StringHashMap([]const u8) counts[content[i]][content[i + 1]] += 1; } } - return finishFrequencyTable(&counts); + finishFrequencyTable(counts, out); + return out.*; } -fn finishFrequencyTable(counts: *const [256][256]u64) [256][256]u16 { +fn finishFrequencyTable(counts: *const [256][256]u64, table: *[256][256]u16) void { var max_count: u64 = 1; for (counts) |row| { for (row) |c| { @@ -1806,7 +1828,7 @@ fn finishFrequencyTable(counts: *const [256][256]u64) [256][256]u16 { } } // Invert: count 0 → 0xFE00 (rare, high); max_count → 0x1000 (common, low). - var table: [256][256]u16 = .{.{0xFE00} ** 256} ** 256; + table.* = .{.{0xFE00} ** 256} ** 256; for (0..256) |a| { for (0..256) |b| { const c = counts[a][b]; @@ -1816,7 +1838,6 @@ fn finishFrequencyTable(counts: *const [256][256]u64) [256][256]u16 { table[a][b] = @intCast(@min(w, 0xFE00)); } } - return table; } /// Persist a frequency table as a raw binary blob to `/pair_freq.bin`. diff --git a/src/main.zig b/src/main.zig index aa99261..974bc5e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,6 +4,7 @@ const Store = @import("store.zig").Store; const AgentRegistry = @import("agent.zig").AgentRegistry; const Explorer = @import("explore.zig").Explorer; const watcher = @import("watcher.zig"); +const is_windows = @import("builtin").os.tag == .windows; const server = @import("server.zig"); const mcp_server = @import("mcp.zig"); const sty = @import("style.zig"); @@ -28,22 +29,14 @@ const Out = struct { } }; -/// The real entry point. Zig may merge all command-branch stack frames into -/// one, producing a ~33 MB frame that overflows the default 16 MB OS stack. -/// We trampoline through a thread with an explicit 64 MB stack. +/// Trampoline: LLVM merges all branch stack frames (tree, serve, mcp) +/// into main()'s frame, creating a ~128MB frame on Windows that exceeds +/// guard page stride → STATUS_STACK_OVERFLOW. noinline prevents this. pub fn main() !void { - const thread = try std.Thread.spawn(.{ .stack_size = 64 * 1024 * 1024 }, mainInner, .{}); - thread.join(); + return mainImpl(); } -fn mainInner() void { - mainImpl() catch |err| { - std.debug.print("fatal: {s}\n", .{@errorName(err)}); - std.process.exit(1); - }; -} - -fn mainImpl() !void { +noinline fn mainImpl() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -231,7 +224,7 @@ fn mainImpl() !void { root = "."; } - var root_buf: [std.fs.max_path_bytes]u8 = undefined; + var root_buf: [compat.path_buf_size]u8 = undefined; const abs_root = resolveRoot(root, &root_buf) catch { out.p("{s}\xe2\x9c\x97{s} cannot resolve root: {s}{s}{s}\n", .{ s.red, s.reset, s.bold, root, s.reset, @@ -578,26 +571,32 @@ fn mainImpl() !void { s.reset, }); } else if (std.mem.eql(u8, cmd, "serve")) { - const port: u16 = 7719; - var agents = AgentRegistry.init(allocator); - defer agents.deinit(); - _ = try agents.register("__filesystem__"); + if (comptime is_windows) { + out.p("{s}\xe2\x9c\x97{s} HTTP serve not supported on Windows. Use {s}mcp{s} mode.\n", .{ + s.red, s.reset, s.cyan, s.reset, + }); + std.process.exit(1); + } else { + const port: u16 = 7719; + var agents = AgentRegistry.init(allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); - var shutdown = std.atomic.Value(bool).init(false); - defer shutdown.store(true, .release); - var scan_already_done = std.atomic.Value(bool).init(true); + var shutdown = std.atomic.Value(bool).init(false); + defer shutdown.store(true, .release); + var scan_already_done = std.atomic.Value(bool).init(true); - const queue = try allocator.create(watcher.EventQueue); - defer allocator.destroy(queue); - queue.* = watcher.EventQueue{}; - const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, queue, root, &shutdown, &scan_already_done }); - defer watch_thread.join(); + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); + const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, &queue, root, &shutdown, &scan_already_done }); + defer watch_thread.join(); - const reap_thread = try std.Thread.spawn(.{}, reapLoop, .{ &agents, &shutdown }); - defer reap_thread.join(); + const reap_thread = try std.Thread.spawn(.{}, reapLoop, .{ &agents, &shutdown }); + defer reap_thread.join(); - std.log.info("codedb: {d} files indexed, listening on :{d}", .{ store.currentSeq(), port }); - try server.serve(allocator, &store, &agents, &explorer, queue, port); + std.log.info("codedb: {d} files indexed, listening on :{d}", .{ store.currentSeq(), port }); + try server.serve(allocator, &store, &agents, &explorer, &queue, port); + } } else if (std.mem.eql(u8, cmd, "mcp")) { var agents = AgentRegistry.init(allocator); defer agents.deinit(); @@ -630,9 +629,8 @@ fn mainImpl() !void { var shutdown = std.atomic.Value(bool).init(false); var scan_done = std.atomic.Value(bool).init(snapshot_loaded); - const queue = try allocator.create(watcher.EventQueue); - defer allocator.destroy(queue); - queue.* = watcher.EventQueue{}; + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); var scan_thread: ?std.Thread = null; const startup_t0 = std.time.milliTimestamp(); if (!snapshot_loaded) { @@ -642,7 +640,7 @@ fn mainImpl() !void { telem.recordCodebaseStats(&explorer, startup_time_ms); } - const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, queue, root, &shutdown, &scan_done }); + const watch_thread = try std.Thread.spawn(.{}, watcher.incrementalLoop, .{ &store, &explorer, &queue, root, &shutdown, &scan_done }); const idle_thread = try std.Thread.spawn(.{}, idleWatchdog, .{&shutdown}); std.log.info("codedb mcp: root={s} files={d} data={s}", .{ abs_root, store.currentSeq(), data_dir }); @@ -668,7 +666,7 @@ fn isCommand(arg: []const u8) bool { return false; } -fn resolveRoot(root: []const u8, buf: *[std.fs.max_path_bytes]u8) ![]const u8 { +fn resolveRoot(root: []const u8, buf: *[compat.path_buf_size]u8) ![]const u8 { if (std.mem.eql(u8, root, ".")) { return std.fs.cwd().realpath(".", buf) catch return error.ResolveFailed; } @@ -677,7 +675,8 @@ fn resolveRoot(root: []const u8, buf: *[std.fs.max_path_bytes]u8) ![]const u8 { fn getDataDir(allocator: std.mem.Allocator, abs_root: []const u8) ![]u8 { const hash = std.hash.Wyhash.hash(0, abs_root); - const home = std.process.getEnvVarOwned(allocator, "HOME") catch { + const home = std.process.getEnvVarOwned(allocator, "HOME") catch + std.process.getEnvVarOwned(allocator, "USERPROFILE") catch { return std.fmt.allocPrint(allocator, "{s}/.codedb", .{abs_root}); }; defer allocator.free(home); @@ -873,22 +872,25 @@ fn idleWatchdog(shutdown: *std.atomic.Value(bool)) void { std.Thread.sleep(std.time.ns_per_s); } - // Quick liveness check: poll stdin for POLLHUP (client disconnected) + // Quick liveness check: poll stdin for HUP (client disconnected). + // Windows stdin is not a socket, so poll() is POSIX-only. const stdin = std.fs.File.stdin(); - var poll_fds = [_]std.posix.pollfd{.{ - .fd = stdin.handle, - .events = std.posix.POLL.IN | std.posix.POLL.HUP, - .revents = 0, - }}; - const poll_result = std.posix.poll(&poll_fds, 0) catch 0; - if (poll_result > 0 and (poll_fds[0].revents & std.posix.POLL.HUP) != 0) { - std.log.info("stdin closed (client disconnected), exiting", .{}); - stdin.close(); - shutdown.store(true, .release); - return; + if (comptime @import("builtin").os.tag != .windows) { + var poll_fds = [_]std.posix.pollfd{.{ + .fd = stdin.handle, + .events = std.posix.POLL.IN | std.posix.POLL.HUP, + .revents = 0, + }}; + const poll_result = std.posix.poll(&poll_fds, 0) catch 0; + if (poll_result > 0 and (poll_fds[0].revents & std.posix.POLL.HUP) != 0) { + std.log.info("stdin closed (client disconnected), exiting", .{}); + stdin.close(); + shutdown.store(true, .release); + return; + } } - // Fallback: idle timeout + // Idle timeout (primary exit mechanism on Windows) const last = mcp.last_activity.load(.acquire); if (last == 0) continue; const now = std.time.milliTimestamp(); diff --git a/src/mcp.zig b/src/mcp.zig index 4c009a4..6bbc8e8 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -4,6 +4,7 @@ // Uses mcp-zig for protocol utilities; adds roots support for workspace awareness. const std = @import("std"); +const compat = @import("compat.zig"); const mcp_lib = @import("mcp"); const mcpj = mcp_lib.json; const Root = mcp_lib.mcp.Root; @@ -96,7 +97,7 @@ const ProjectCache = struct { new_entry.store = Store.init(self.alloc); new_entry.last_used = now; - var snap_buf: [std.fs.max_path_bytes]u8 = undefined; + var snap_buf: [compat.path_buf_size]u8 = undefined; const snap_path = std.fmt.bufPrint(&snap_buf, "{s}/codedb.snapshot", .{p}) catch { new_entry.store.deinit(); new_entry.explorer.deinit(); @@ -108,9 +109,10 @@ const ProjectCache = struct { if (!snapshot_mod.loadSnapshot(snap_path, &new_entry.explorer, &new_entry.store, self.alloc)) { // Fallback: try central store at ~/.codedb/projects/{hash}/codedb.snapshot const hash = std.hash.Wyhash.hash(0, p); - var central_buf: [std.fs.max_path_bytes]u8 = undefined; + var central_buf: [compat.path_buf_size]u8 = undefined; const loaded_central = blk: { - const home = std.process.getEnvVarOwned(self.alloc, "HOME") catch break :blk false; + const home = std.process.getEnvVarOwned(self.alloc, "HOME") catch + std.process.getEnvVarOwned(self.alloc, "USERPROFILE") catch break :blk false; defer self.alloc.free(home); const central = std.fmt.bufPrint(¢ral_buf, "{s}/.codedb/projects/{x}/codedb.snapshot", .{ home, hash }) catch break :blk false; break :blk snapshot_mod.loadSnapshot(central, &new_entry.explorer, &new_entry.store, self.alloc); @@ -318,7 +320,7 @@ const Session = struct { } }; -pub fn run( +pub noinline fn run( alloc: std.mem.Allocator, store: *Store, explorer: *Explorer, @@ -385,7 +387,7 @@ pub fn run( } else if (mcpj.eql(method, "tools/list")) { if (!is_notification) writeResult(alloc, stdout, id, tools_list); } else if (mcpj.eql(method, "tools/call")) { - handleCall(alloc, root, stdout, id, store, explorer, agents, &cache, telem); + @call(.never_inline, handleCall, .{ alloc, root, stdout, id, store, explorer, agents, &cache, telem }); } else if (mcpj.eql(method, "ping")) { if (!is_notification) writeResult(alloc, stdout, id, "{}"); } else { @@ -569,7 +571,7 @@ fn handleCall( writeResult(alloc, stdout, id, result.items); } -fn dispatch( +noinline fn dispatch( alloc: std.mem.Allocator, tool: Tool, args: *const std.json.ObjectMap, @@ -594,16 +596,16 @@ fn dispatch( .codedb_word => handleWord(alloc, args, out, ctx.explorer), .codedb_hot => handleHot(alloc, args, out, ctx.store, ctx.explorer), .codedb_deps => handleDeps(alloc, args, out, ctx.explorer), - .codedb_read => handleRead(alloc, args, out, ctx.explorer), - .codedb_edit => handleEdit(alloc, args, out, default_store, default_explorer, agents), + .codedb_read => @call(.never_inline, handleRead, .{ alloc, args, out, ctx.explorer }), + .codedb_edit => @call(.never_inline, handleEdit, .{ alloc, args, out, default_store, default_explorer, agents }), .codedb_changes => handleChanges(alloc, args, out, default_store), .codedb_status => handleStatus(alloc, out, ctx.store, ctx.explorer), - .codedb_snapshot => handleSnapshot(alloc, out, ctx.explorer, ctx.store), - .codedb_bundle => handleBundle(alloc, args, out, ctx.store, ctx.explorer, agents, cache), - .codedb_remote => handleRemote(alloc, args, out), - .codedb_projects => handleProjects(alloc, out), - .codedb_index => handleIndex(alloc, args, out), - .codedb_find => handleFind(alloc, args, out, ctx.explorer), + .codedb_snapshot => @call(.never_inline, handleSnapshot, .{ alloc, out, ctx.explorer, ctx.store }), + .codedb_bundle => @call(.never_inline, handleBundle, .{ alloc, args, out, ctx.store, ctx.explorer, agents, cache }), + .codedb_remote => @call(.never_inline, handleRemote, .{ alloc, args, out }), + .codedb_projects => @call(.never_inline, handleProjects, .{ alloc, out }), + .codedb_index => @call(.never_inline, handleIndex, .{ alloc, args, out }), + .codedb_find => @call(.never_inline, handleFind, .{ alloc, args, out, ctx.explorer }), } } @@ -1165,8 +1167,9 @@ fn handleRemote(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: // ── Local project tools ───────────────────────────────────────────────────── fn handleProjects(alloc: std.mem.Allocator, out: *std.ArrayList(u8)) void { - const home = std.process.getEnvVarOwned(alloc, "HOME") catch { - out.appendSlice(alloc, "error: cannot read HOME") catch {}; + const home = std.process.getEnvVarOwned(alloc, "HOME") catch + std.process.getEnvVarOwned(alloc, "USERPROFILE") catch { + out.appendSlice(alloc, "error: cannot read HOME/USERPROFILE") catch {}; return; }; defer alloc.free(home); @@ -1189,7 +1192,7 @@ fn handleProjects(alloc: std.mem.Allocator, out: *std.ArrayList(u8)) void { if (entry.kind != .directory) continue; // Read project.txt to get the project path - var path_buf: [std.fs.max_path_bytes]u8 = undefined; + var path_buf: [compat.path_buf_size]u8 = undefined; const sub_path = std.fmt.bufPrint(&path_buf, "{s}/project.txt", .{entry.name}) catch continue; const project_file = dir.openFile(sub_path, .{}) catch continue; defer project_file.close(); @@ -1200,7 +1203,7 @@ fn handleProjects(alloc: std.mem.Allocator, out: *std.ArrayList(u8)) void { // Check if snapshot exists in the project directory var snap_exists = false; - var snap_path_buf: [std.fs.max_path_bytes]u8 = undefined; + var snap_path_buf: [compat.path_buf_size]u8 = undefined; const snap_path = std.fmt.bufPrint(&snap_path_buf, "{s}/codedb.snapshot", .{project_path}) catch project_path; if (std.fs.cwd().access(snap_path, .{})) |_| { snap_exists = true; @@ -1226,7 +1229,7 @@ fn handleIndex(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: * }; // Resolve to absolute path - var abs_buf: [std.fs.max_path_bytes]u8 = undefined; + var abs_buf: [compat.path_buf_size]u8 = undefined; const abs_path = std.fs.cwd().realpath(path, &abs_buf) catch { out.appendSlice(alloc, "error: cannot resolve path: ") catch {}; out.appendSlice(alloc, path) catch {}; @@ -1257,6 +1260,7 @@ fn handleIndex(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: * const result = std.process.Child.run(.{ .allocator = alloc, .argv = &.{ exe_path, abs_path, "snapshot" }, + .cwd = abs_path, .max_output_bytes = 64 * 1024, }) catch { out.appendSlice(alloc, "error: failed to run indexer") catch {}; @@ -1329,14 +1333,20 @@ fn handleFind(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: *s pub fn isPathSafe(path: []const u8) bool { if (path.len == 0) return false; - if (path[0] == '/') return false; + if (path[0] == '/' or path[0] == '\\') return false; // Block null bytes (path truncation attack) if (std.mem.indexOfScalar(u8, path, 0) != null) return false; - // Block backslash separators - if (std.mem.indexOfScalar(u8, path, '\\') != null) return false; - var it = std.mem.splitScalar(u8, path, '/'); - while (it.next()) |component| { + // Reject Windows drive letters (e.g. "C:\...") + if (path.len >= 2 and path[1] == ':') return false; + // Check for ".." traversal with both separator types + var rest: []const u8 = path; + while (rest.len > 0) { + const sep = for (rest, 0..) |c, i| { + if (c == '/' or c == '\\') break i; + } else rest.len; + const component = rest[0..sep]; if (std.mem.eql(u8, component, "..")) return false; + rest = if (sep < rest.len) rest[sep + 1 ..] else &.{}; } return true; } diff --git a/src/root_policy.zig b/src/root_policy.zig index ce150f4..a2659a8 100644 --- a/src/root_policy.zig +++ b/src/root_policy.zig @@ -1,20 +1,37 @@ const std = @import("std"); +const builtin = @import("builtin"); fn isExactOrChild(path: []const u8, prefix: []const u8) bool { if (!std.mem.startsWith(u8, path, prefix)) return false; - return path.len == prefix.len or path[prefix.len] == '/'; + if (path.len == prefix.len) return true; + const sep = path[prefix.len]; + return sep == '/' or sep == '\\'; +} + +fn isExactOrChildCaseInsensitive(path: []const u8, prefix: []const u8) bool { + if (path.len < prefix.len) return false; + for (path[0..prefix.len], prefix) |a, b| { + if (std.ascii.toLower(a) != std.ascii.toLower(b)) return false; + } + if (path.len == prefix.len) return true; + const sep = path[prefix.len]; + return sep == '/' or sep == '\\'; } pub fn isIndexableRoot(path: []const u8) bool { if (path.len == 0) return false; if (std.mem.eql(u8, path, "/")) return false; + // POSIX temp directories if (isExactOrChild(path, "/private/tmp")) return false; if (isExactOrChild(path, "/tmp")) return false; if (isExactOrChild(path, "/var/tmp")) return false; // Block home directory itself (not subdirectories) — prevents 17GB RAM spike (#174) - if (std.posix.getenv("HOME")) |home| { - if (home.len > 0 and std.mem.eql(u8, path, home)) return false; + // std.posix.getenv is unavailable on Windows; use USERPROFILE there. + if (comptime builtin.os.tag != .windows) { + if (std.posix.getenv("HOME")) |home| { + if (home.len > 0 and std.mem.eql(u8, path, home)) return false; + } } // Also block common home patterns directly if (std.mem.eql(u8, path, "/root")) return false; @@ -24,6 +41,15 @@ pub fn isIndexableRoot(path: []const u8) bool { if (std.mem.indexOfScalar(u8, rest, '/') == null and rest.len > 0) return false; } + // Windows temp directories (case-insensitive) + if (builtin.os.tag == .windows) { + if (isExactOrChildCaseInsensitive(path, "C:\\Windows\\Temp")) return false; + if (std.ascii.indexOfIgnoreCase(path, "\\AppData\\Local\\Temp")) |pos| { + const end = pos + "\\AppData\\Local\\Temp".len; + if (end == path.len or path[end] == '\\' or path[end] == '/') return false; + } + } + return true; } @@ -53,3 +79,13 @@ test "issue-80: /tmp is denied" { try testing.expect(!isIndexableRoot("/tmp/foo")); } +test "issue-80: Windows temp paths are denied" { + if (comptime builtin.os.tag != .windows) return error.SkipZigTest; + try testing.expect(!isIndexableRoot("C:\\Windows\\Temp")); + try testing.expect(!isIndexableRoot("C:\\Windows\\Temp\\codedb-test")); + try testing.expect(!isIndexableRoot("C:\\Users\\dev\\AppData\\Local\\Temp")); + try testing.expect(!isIndexableRoot("C:\\Users\\dev\\AppData\\Local\\Temp\\project")); + try testing.expect(isIndexableRoot("C:\\Users\\dev\\AppData\\Local\\TempProject")); + try testing.expect(isIndexableRoot("C:\\Users\\dev\\Projects\\myapp")); + try testing.expect(isIndexableRoot("D:\\GitHub\\codedb")); +} diff --git a/src/server.zig b/src/server.zig index 9336b9e..61f5574 100644 --- a/src/server.zig +++ b/src/server.zig @@ -588,13 +588,9 @@ fn handleConnection( // ── Response helpers ──────────────────────────────────────── fn isPathSafe(path: []const u8) bool { - if (path.len == 0) return false; - if (path[0] == '/') return false; - var it = std.mem.splitScalar(u8, path, '/'); - while (it.next()) |component| { - if (std.mem.eql(u8, component, "..")) return false; - } - return true; + // Delegate to the canonical implementation in mcp.zig + const mcp = @import("mcp.zig"); + return mcp.isPathSafe(path); } fn respondJson(conn: std.net.Server.Connection, status: []const u8, body: []const u8) void { diff --git a/src/snapshot.zig b/src/snapshot.zig index 8dc5e63..9733391 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -610,7 +610,8 @@ pub fn writeSnapshotDual( try writeSnapshot(explorer, root_path, output_path, allocator); const hash = std.hash.Wyhash.hash(0, root_path); - const home = std.process.getEnvVarOwned(allocator, "HOME") catch return; + const home = std.process.getEnvVarOwned(allocator, "HOME") catch + std.process.getEnvVarOwned(allocator, "USERPROFILE") catch return; defer allocator.free(home); const secondary = std.fmt.allocPrint(allocator, "{s}/.codedb/projects/{x}/codedb.snapshot", .{ home, hash }) catch return; defer allocator.free(secondary); diff --git a/src/store.zig b/src/store.zig index 75cc6bf..708e6fb 100644 --- a/src/store.zig +++ b/src/store.zig @@ -80,11 +80,28 @@ pub const Store = struct { if (diff) |d| { if (self.data_log) |log| { // Advisory lock for cross-process safety - const locked = blk: { + const locked = if (comptime @import("builtin").os.tag == .windows) blk: { + const ntdll = std.os.windows.ntdll; + const max_off: i64 = std.math.maxInt(i64); + const zero_off: i64 = 0; + var iosb: std.os.windows.IO_STATUS_BLOCK = undefined; + const status = ntdll.NtLockFile(log.handle, null, null, null, &iosb, &zero_off, &max_off, null, 0, 1); + break :blk (status == .SUCCESS); + } else blk: { log.lock(.exclusive) catch break :blk false; break :blk true; }; - defer if (locked) log.unlock(); + defer if (locked) { + if (comptime @import("builtin").os.tag == .windows) { + const ntdll = std.os.windows.ntdll; + const max_off: i64 = std.math.maxInt(i64); + const zero_off: i64 = 0; + var iosb2: std.os.windows.IO_STATUS_BLOCK = undefined; + _ = ntdll.NtUnlockFile(log.handle, &iosb2, &zero_off, &max_off, null); + } else { + log.unlock(); + } + }; // Re-stat to get current end position (another process may have appended) const stat = compat.fileStat(log) catch return error.Unexpected; diff --git a/src/telemetry.zig b/src/telemetry.zig index eaaa17b..771a368 100644 --- a/src/telemetry.zig +++ b/src/telemetry.zig @@ -38,7 +38,7 @@ pub const Telemetry = struct { file: ?std.fs.File = null, enabled: bool = true, buf: [4096]u8 = undefined, - path_buf: [std.fs.max_path_bytes]u8 = undefined, + path_buf: [compat.path_buf_size]u8 = undefined, path_len: usize = 0, call_count: u32 = 0, write_lock: std.Thread.Mutex = .{}, @@ -162,8 +162,9 @@ pub const Telemetry = struct { const stat = compat.dirStatFile(std.fs.cwd(), path) catch return; if (stat.size == 0) return; - // Use argv-based exec (no shell interpolation) to avoid injection - var data_arg_buf: [std.fs.max_path_bytes + 1]u8 = undefined; + // Use argv-based exec (no shell interpolation) to avoid injection. + // curl is available on modern Windows 10+ and all POSIX systems. + var data_arg_buf: [compat.path_buf_size + 1]u8 = undefined; const data_arg = std.fmt.bufPrint(&data_arg_buf, "@{s}", .{path}) catch return; var child = std.process.Child.init( diff --git a/src/tests.zig b/src/tests.zig index 19647c5..46f5aaa 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -1049,7 +1049,8 @@ test "explorer: removeFile frees owned map key" { try testing.expect(explorer.dep_graph.count() == 0); } test "watcher: queue overflow is explicit" { - var queue = watcher.EventQueue{}; + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); var pushed: usize = 0; while (true) : (pushed += 1) { @@ -1068,7 +1069,8 @@ test "watcher: queue overflow is explicit" { } test "watcher: queue event copies path bytes" { - var queue = watcher.EventQueue{}; + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); const original = try testing.allocator.dupe(u8, "tmp/deleted.zig"); try testing.expect(queue.push(watcher.FsEvent.init(original, .deleted, 99) orelse unreachable)); testing.allocator.free(original); @@ -1452,7 +1454,8 @@ test "regression: searchContent frees empty trigram candidate slice" { } test "regression: queue push stays non-blocking when full" { - var queue = watcher.EventQueue{}; + var queue = try watcher.EventQueue.init(); + defer queue.deinit(); var pushed: usize = 0; while (true) : (pushed += 1) { @@ -1497,6 +1500,19 @@ test "isPathSafe: accepts valid relative paths" { try testing.expect(mcp.isPathSafe("a/b/c/d.txt")); } +test "isPathSafe: rejects Windows-style paths" { + const mcp = @import("mcp.zig"); + // Backslash prefix + try testing.expect(!mcp.isPathSafe("\\Windows\\System32")); + // Drive letters + try testing.expect(!mcp.isPathSafe("C:\\Windows\\System32")); + try testing.expect(!mcp.isPathSafe("D:\\secret.txt")); + // Backslash traversal + try testing.expect(!mcp.isPathSafe("foo\\..\\..\\etc\\passwd")); + // Mixed separators with traversal + try testing.expect(!mcp.isPathSafe("foo/..\\..\\secret")); +} + test "snapshot_json: snapshot builds and is valid JSON" { // Explorer uses arena for internal data var arena = std.heap.ArenaAllocator.init(testing.allocator); @@ -3561,7 +3577,13 @@ test "issue-60: telemetry disabled path is a no-op" { test "issue-77: mcp index accepts temporary-directory roots that cause pathological cache growth" { var tmp_name_buf: [128]u8 = undefined; const tmp_name = try std.fmt.bufPrint(&tmp_name_buf, "codedb-issue-77-{d}", .{std.time.microTimestamp()}); - const tmp_root = try std.fs.path.join(testing.allocator, &.{ "/private/tmp", tmp_name }); + const tmp_base = if (comptime @import("builtin").os.tag == .windows) + std.process.getEnvVarOwned(testing.allocator, "TEMP") catch + return error.SkipZigTest + else + try testing.allocator.dupe(u8, "/private/tmp"); + defer testing.allocator.free(tmp_base); + const tmp_root = try std.fs.path.join(testing.allocator, &.{ tmp_base, tmp_name }); defer testing.allocator.free(tmp_root); std.fs.cwd().makePath(tmp_root) catch |err| switch (err) { @@ -4577,23 +4599,20 @@ test "issue-148: idle timeout is 10 minutes" { } test "issue-148: POLLHUP detects closed pipe" { - // Verify the polling infrastructure works for pipe-based transports + if (comptime @import("builtin").os.tag == .windows) return error.SkipZigTest; const pipe = try std.posix.pipe(); - defer std.posix.close(pipe[0]); - - // Close write end — simulates client disconnect std.posix.close(pipe[1]); - // Poll should detect POLLHUP on the read end - var fds = [_]std.posix.pollfd{.{ + var poll_fds = [_]std.posix.pollfd{.{ .fd = pipe[0], - .events = std.posix.POLL.IN, + .events = std.posix.POLL.IN | std.posix.POLL.HUP, .revents = 0, }}; - const n = try std.posix.poll(&fds, 100); // 100ms timeout - try testing.expect(n > 0); - try testing.expect((fds[0].revents & std.posix.POLL.HUP) != 0); + const result = try std.posix.poll(&poll_fds, 0); + try testing.expect(result > 0); + try testing.expect((poll_fds[0].revents & std.posix.POLL.HUP) != 0); + std.posix.close(pipe[0]); } test "issue-148: idle watchdog exits on shutdown signal" { @@ -4670,6 +4689,7 @@ const MmapTrigramIndex = @import("index.zig").MmapTrigramIndex; const AnyTrigramIndex = @import("index.zig").AnyTrigramIndex; test "issue-164: mmap trigram index returns same candidates as heap index" { + if (comptime @import("builtin").os.tag == .windows) return error.SkipZigTest; var arena = std.heap.ArenaAllocator.init(testing.allocator); defer arena.deinit(); const allocator = arena.allocator(); @@ -4710,6 +4730,7 @@ test "issue-164: mmap trigram index returns same candidates as heap index" { } test "issue-164: mmap binary search on sorted lookup table" { + if (comptime @import("builtin").os.tag == .windows) return error.SkipZigTest; var arena = std.heap.ArenaAllocator.init(testing.allocator); defer arena.deinit(); const allocator = arena.allocator(); @@ -4750,6 +4771,7 @@ test "issue-164: mmap handles missing files gracefully" { } test "issue-164: AnyTrigramIndex dispatches to mmap variant" { + if (comptime @import("builtin").os.tag == .windows) return error.SkipZigTest; var arena = std.heap.ArenaAllocator.init(testing.allocator); defer arena.deinit(); const allocator = arena.allocator(); diff --git a/src/watcher.zig b/src/watcher.zig index 7492085..51687bd 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -10,14 +10,14 @@ pub const EventKind = enum(u8) { }; pub const FsEvent = struct { - path_buf: [std.fs.max_path_bytes]u8 = undefined, + path_buf: [compat.path_buf_size]u8 = undefined, path_len: usize, kind: EventKind, seq: u64, pub fn init(src_path: []const u8, kind: EventKind, seq: u64) ?FsEvent { // Gracefully skip paths exceeding the max instead of panicking. - if (src_path.len > std.fs.max_path_bytes) return null; + if (src_path.len > compat.path_buf_size) return null; var event = FsEvent{ .path_len = src_path.len, .kind = kind, @@ -35,11 +35,23 @@ pub const FsEvent = struct { pub const EventQueue = struct { const CAPACITY = 4096; - events: [CAPACITY]?FsEvent = [_]?FsEvent{null} ** CAPACITY, + // Heap-allocated: each ?FsEvent is ~1KB on Windows (compat.path_buf_size), + // so an inline [4096]?FsEvent would be ~4MB — risky on constrained stacks. + events: *[CAPACITY]?FsEvent, head: std.atomic.Value(usize) = std.atomic.Value(usize).init(0), tail: std.atomic.Value(usize) = std.atomic.Value(usize).init(0), mu: std.Thread.Mutex = .{}, + pub fn init() !EventQueue { + const events = try std.heap.page_allocator.create([CAPACITY]?FsEvent); + events.* = [_]?FsEvent{null} ** CAPACITY; + return .{ .events = events }; + } + + pub fn deinit(self: *EventQueue) void { + std.heap.page_allocator.destroy(self.events); + } + pub fn push(self: *EventQueue, event: FsEvent) bool { self.mu.lock(); defer self.mu.unlock(); @@ -118,6 +130,10 @@ const skip_dirs = [_][]const u8{ ".bundle", }; +fn isSep(c: u8) bool { + return c == '/' or (comptime @import("builtin").os.tag == .windows and c == '\\'); +} + fn shouldSkip(path: []const u8) bool { // Check each path component against skip list var rest = path; @@ -125,13 +141,14 @@ fn shouldSkip(path: []const u8) bool { for (skip_dirs) |skip| { if (rest.len >= skip.len and std.mem.eql(u8, rest[0..skip.len], skip) and - (rest.len == skip.len or rest[skip.len] == '/')) + (rest.len == skip.len or isSep(rest[skip.len]))) return true; } - // Advance to next component - if (std.mem.indexOfScalar(u8, rest, '/')) |sep| { - rest = rest[sep + 1 ..]; + // Advance to next component (handle both / and \ separators) + const sep = for (rest, 0..) |c, i| { + if (isSep(c)) break i; } else break; + rest = rest[sep + 1 ..]; } return false; } @@ -653,7 +670,13 @@ fn indexFileContent(explorer: *Explorer, dir: std.fs.Dir, path: []const u8, allo fn drainNotifyFile(store: *Store, explorer: *Explorer, queue: *EventQueue, known: *FileMap, root: []const u8, alloc: std.mem.Allocator) void { // Atomically read + truncate - const notify_path = "/tmp/codedb-notify"; + const notify_path = if (comptime @import("builtin").os.tag == .windows) blk: { + const tmp = std.process.getEnvVarOwned(alloc, "TEMP") catch + std.process.getEnvVarOwned(alloc, "TMP") catch return; + defer alloc.free(tmp); + break :blk std.fmt.allocPrint(alloc, "{s}\\codedb-notify", .{tmp}) catch return; + } else "/tmp/codedb-notify"; + defer if (comptime @import("builtin").os.tag == .windows) alloc.free(notify_path); const file = std.fs.cwd().openFile(notify_path, .{ .mode = .read_write }) catch return; defer file.close(); @@ -661,9 +684,9 @@ fn drainNotifyFile(store: *Store, explorer: *Explorer, queue: *EventQueue, known defer alloc.free(data); if (data.len == 0) return; - // Truncate after reading + // Truncate after reading (setEndPos is cross-platform) file.seekTo(0) catch return; - std.posix.ftruncate(file.handle, 0) catch return; + file.setEndPos(0) catch return; // Re-index each notified path var dir = std.fs.cwd().openDir(root, .{}) catch return; @@ -674,12 +697,25 @@ fn drainNotifyFile(store: *Store, explorer: *Explorer, queue: *EventQueue, known const path = std.mem.trim(u8, line, " \t\r"); if (path.len == 0) continue; - // Make path relative to root if it's absolute - const rel = if (std.mem.startsWith(u8, path, root)) - std.mem.trimLeft(u8, path[root.len..], "/") + // Make path relative to root if it's absolute (handle both / and \ separators) + const raw_rel = if (std.mem.startsWith(u8, path, root)) + std.mem.trimLeft(u8, path[root.len..], "/\\") else path; + // Normalize backslashes to forward slashes so the path matches the + // walker's convention and avoids duplicate entries in the explorer. + var norm_buf: [compat.path_buf_size]u8 = undefined; + const rel = if (comptime @import("builtin").os.tag == .windows) blk: { + if (raw_rel.len > norm_buf.len) continue; + @memcpy(norm_buf[0..raw_rel.len], raw_rel); + const s = norm_buf[0..raw_rel.len]; + for (s) |*c| { + if (c.* == '\\') c.* = '/'; + } + break :blk s; + } else raw_rel; + indexFileContent(explorer, dir, rel, alloc, false) catch continue; // Update known-file state so incrementalDiff doesn't double-process