justrach · justrach · Apr 13, 2026 · Apr 13, 2026 · chatgpt-codex-connector · Apr 13, 2026
diff --git a/src/explore.zig b/src/explore.zig
@@ -318,6 +318,196 @@ pub const Explorer = struct {
         if (prior_outline) |*old_outline| old_outline.deinit();
     }
 
+fn computeSymbolEnds(content: []const u8, outline: *FileOutline) void {
+    if (outline.symbols.items.len == 0) return;
+
+    // Build a line offset table for O(1) line lookups
+    var line_offsets: std.ArrayList(usize) = .{};
+    defer line_offsets.deinit(outline.allocator);
+    line_offsets.append(outline.allocator, 0) catch return; // line 1 starts at offset 0
+    for (content, 0..) |c, i| {
+        if (c == '\n' and i + 1 <= content.len) {
+            line_offsets.append(outline.allocator, i + 1) catch return;
+        }
+    }
+    const total_lines: u32 = @intCast(line_offsets.items.len);
+
+    const is_brace_lang = outline.language == .zig or outline.language == .c or
+        outline.language == .cpp or outline.language == .typescript or
+        outline.language == .javascript or outline.language == .rust or
+        outline.language == .go_lang or outline.language == .php;
+
+    for (outline.symbols.items) |*sym| {
+        // Skip single-line kinds
+        switch (sym.kind) {
+            .import, .variable, .constant, .comment_block, .type_alias, .macro_def => continue,
+            else => {},
+        }
+
+        if (sym.line_start == 0 or sym.line_start > total_lines) continue;
+
+        if (is_brace_lang) {
+            sym.line_end = findBraceEnd(content, line_offsets.items, sym.line_start, total_lines);
+        } else if (outline.language == .python) {
+            sym.line_end = findPythonEnd(content, line_offsets.items, sym.line_start, total_lines);
+        } else if (outline.language == .ruby) {
+            sym.line_end = findRubyEnd(content, line_offsets.items, sym.line_start, total_lines);
+        }
+    }
+}
+
+fn findBraceEnd(content: []const u8, line_offsets: []const usize, line_start: u32, total_lines: u32) u32 {
+    const start_idx = line_offsets[line_start - 1];
+    var depth: i32 = 0;
+    var found_open = false;
+    var in_string: u8 = 0; // 0=none, '"', '\''
+    var in_line_comment = false;
+    var in_block_comment = false;
+    var i = start_idx;
+    var current_line = line_start;
+
+    while (i < content.len) : (i += 1) {
+        const c = content[i];
+
+        if (c == '\n') {
+            current_line += 1;
+            in_line_comment = false;
+            // Bail out if no opening brace found within 10 lines
+            if (!found_open and current_line > line_start + 10) return line_start;
+            continue;
+        }
+
+        if (in_line_comment) continue;
+
+        if (in_block_comment) {
+            if (c == '*' and i + 1 < content.len and content[i + 1] == '/') {
+                in_block_comment = false;
+                i += 1;
+            }
+            continue;
+        }
+
+        if (in_string != 0) {
+            if (c == '\\') {
+                i += 1; // skip escaped char
+            } else if (c == in_string) {
+                in_string = 0;
+            }
+            continue;
+        }
+
+        // Check for comments
+        if (c == '/' and i + 1 < content.len) {
+            if (content[i + 1] == '/') {
+                in_line_comment = true;
+                continue;
+            } else if (content[i + 1] == '*') {
+                in_block_comment = true;
+                i += 1;
+                continue;
+            }
+        }
+
+        // Check for strings
+        if (c == '"' or c == '\'') {
+            in_string = c;
+            continue;
+        }
+
+        if (c == '{') {
+            depth += 1;
+            found_open = true;
+        } else if (c == '}') {
+            depth -= 1;
+            if (found_open and depth == 0) {
+                return @min(current_line, total_lines);
+            }
+        }
+    }
+
+    return if (found_open) total_lines else line_start;
+}
+
+fn findPythonEnd(content: []const u8, line_offsets: []const usize, line_start: u32, total_lines: u32) u32 {
+    if (line_start >= total_lines) return line_start;
+
+    // Get the indent of the signature line
+    const sig_offset = line_offsets[line_start - 1];
+    const sig_indent = countIndent(content, sig_offset);
+
+    // Find the colon-terminated signature (may span multiple lines)
+    var body_start = line_start + 1;
+    // Check if signature line itself has the colon
+    {
+        const line_end_offset = if (line_start < total_lines) line_offsets[line_start] else content.len;
+        const sig_line = content[sig_offset..line_end_offset];
+        if (std.mem.indexOf(u8, sig_line, ":") == null) {
+            // Multi-line signature — skip ahead to find the colon
+            var ln = line_start + 1;
+            while (ln <= total_lines) : (ln += 1) {
+                const lo = line_offsets[ln - 1];
+                const le = if (ln < total_lines) line_offsets[ln] else content.len;
+                const line = content[lo..le];
+                if (std.mem.indexOf(u8, line, ":") != null) {
+                    body_start = ln + 1;
+                    break;
+                }
+            }
+        }
+    }
+
+    var last_body_line = line_start;
+    var ln = body_start;
+    while (ln <= total_lines) : (ln += 1) {
+        const lo = line_offsets[ln - 1];
+        const le = if (ln < total_lines) line_offsets[ln] else content.len;
+        const line = content[lo..le];
+        const trimmed = std.mem.trim(u8, line, " \t\r\n");
+
+        // Blank lines and comments don't end the body
+        if (trimmed.len == 0 or std.mem.startsWith(u8, trimmed, "#")) {
+            continue;
+        }
+
+        const indent = countIndent(content, lo);
+        if (indent <= sig_indent) break;
+        last_body_line = ln;
+    }
+
+    return if (last_body_line > line_start) last_body_line else line_start;
+}
+
+fn findRubyEnd(content: []const u8, line_offsets: []const usize, line_start: u32, total_lines: u32) u32 {
+    if (line_start >= total_lines) return line_start;
+
+    const sig_offset = line_offsets[line_start - 1];
+    const sig_indent = countIndent(content, sig_offset);
+
+    var ln = line_start + 1;
+    while (ln <= total_lines) : (ln += 1) {
+        const lo = line_offsets[ln - 1];
+        const le = if (ln < total_lines) line_offsets[ln] else content.len;
+        const line = content[lo..le];
+        const trimmed = std.mem.trim(u8, line, " \t\r\n");
+
+        if (std.mem.eql(u8, trimmed, "end")) {
+            const indent = countIndent(content, lo);
+            if (indent <= sig_indent) return ln;
+        }
+    }
+
+    return line_start;
+}
+
+fn countIndent(content: []const u8, offset: usize) usize {
+    var count: usize = 0;
+    var i = offset;
+    while (i < content.len and (content[i] == ' ' or content[i] == '\t')) : (i += 1) {
+        count += if (content[i] == '\t') 4 else 1;
+    }
+    return count;
+}
+
 fn parseOutlineWithParser(parser: *Explorer, path: []const u8, content: []const u8) !FileOutline {
     var outline = FileOutline.init(parser.allocator, path);
     errdefer outline.deinit();
@@ -422,6 +612,7 @@ fn parseOutlineWithParser(parser: *Explorer, path: []const u8, content: []const
         prev_line_trimmed = trimmed;
     }
     outline.line_count = line_num;
+    computeSymbolEnds(content, &outline);
     return outline;
 }
 
@@ -809,14 +1000,29 @@ pub fn parseContentForIndexing(allocator: std.mem.Allocator, path: []const u8, c
                     if (result_list.items.len >= max_results) break;
                 }
             } else {
-                var iter = self.outlines.keyIterator();
-                while (iter.next()) |key_ptr| {
-                    const ref = self.readContentForSearch(key_ptr.*, allocator) orelse continue;
-                    defer ref.deinit();
-                    try searchInContent(key_ptr.*, ref.data, query, allocator, max_results, &result_list);
-                    if (result_list.items.len >= max_results) break;
+                // No trigram/sparse candidates — use word_index to narrow (#250)
+                const word_hits = self.word_index.search(query);
+                if (word_hits.len > 0) {
+                    var word_paths = std.StringHashMap(void).init(allocator);
+                    defer word_paths.deinit();
+                    for (word_hits) |hit| word_paths.put(hit.path, {}) catch {};
+                    var wp_iter = word_paths.keyIterator();
+                    while (wp_iter.next()) |key_ptr| {
+                        const ref = self.readContentForSearch(key_ptr.*, allocator) orelse continue;
+                        defer ref.deinit();
+                        try searched.put(key_ptr.*, {});
+                        try searchInContent(key_ptr.*, ref.data, query, allocator, max_results, &result_list);
+                        if (result_list.items.len >= max_results) break;
+                    }
+                } else {
+                    var iter = self.outlines.keyIterator();
+                    while (iter.next()) |key_ptr| {
+                        const ref = self.readContentForSearch(key_ptr.*, allocator) orelse continue;
+                        defer ref.deinit();
+                        try searchInContent(key_ptr.*, ref.data, query, allocator, max_results, &result_list);
+                        if (result_list.items.len >= max_results) break;
+                    }
                 }
-                return result_list.toOwnedSlice(allocator);
             }
         }
 

diff --git a/src/mcp.zig b/src/mcp.zig
@@ -200,6 +200,14 @@ const ProjectCache = struct {
 
         loadProjectTrigramFromDiskIfPresent(&new_entry.explorer, p, self.alloc);
 
+        // Release raw file contents retained by the snapshot load — outlines,
+        // trigram index, and word index are sufficient for all query tools.
+        const fc = new_entry.explorer.outlines.count();
+        if (fc > 1000) {
+            new_entry.explorer.releaseContents();
+            new_entry.explorer.releaseSecondaryIndexes();
+        }
+
         // Find free slot or evict LRU
         var target_slot: usize = 0;
         var found_free = false;

diff --git a/src/snapshot.zig b/src/snapshot.zig
@@ -413,32 +413,27 @@ pub fn loadSnapshotValidated(
     const file = std.fs.cwd().openFile(snapshot_path, .{}) catch return false;
     defer file.close();
 
-    // Validate magic
-    var magic_buf: [4]u8 = undefined;
-    const lmn = file.readAll(&magic_buf) catch return false;
-    if (lmn != 4) return false;
-    if (!std.mem.eql(u8, &magic_buf, &MAGIC)) return false;
-
-    // Read section table
-    const sections_opt = readSections(snapshot_path, allocator) catch return false;
-    var sections = sections_opt orelse return false;
+    // Read section table (validates magic internally) — reuse already-open file (#253)
+    file.seekTo(0) catch return false;
+    var sections = (readSectionsFromFile(file, allocator) catch return false) orelse return false;
     defer sections.deinit();
 
     // Parse META section to get expected file_count and root_hash
     var expected_file_count: ?u32 = null;
     var meta_root_hash: ?u64 = null;
     if (sections.get(@intFromEnum(SectionId.meta))) |meta_entry| {
-        const meta_bytes = readSectionBytes(snapshot_path, .meta, allocator) catch null;
-        if (meta_bytes) |mb| {
+        if (meta_entry.length <= 256 * 1024 * 1024) blk: {
+            file.seekTo(meta_entry.offset) catch break :blk;
+            const mb = allocator.alloc(u8, @intCast(meta_entry.length)) catch break :blk;
             defer allocator.free(mb);
-            // Simple integer extraction from JSON: "file_count":NNN
+            const nr = file.readAll(mb) catch break :blk;
+            if (nr != mb.len) break :blk;
             if (parseJsonU32(mb, "file_count")) |fc| {
                 expected_file_count = fc;
             }
             if (parseJsonU64(mb, "root_hash")) |rh| {
                 meta_root_hash = rh;
             }
-            _ = meta_entry;
         }
     }
 
@@ -460,23 +455,20 @@ pub fn loadSnapshotValidated(
     // Load CONTENT section — this is the core data
     const content_entry = sections.get(@intFromEnum(SectionId.content)) orelse return false;
 
-    const content_file = std.fs.cwd().openFile(snapshot_path, .{}) catch return false;
-    defer content_file.close();
-
     // Validate content section fits within actual file size (issue-40: truncation detection)
-    const file_stat = compat.fileStat(content_file) catch return false;
+    const file_stat = compat.fileStat(file) catch return false;
     const file_size = file_stat.size;
     if (content_entry.offset + content_entry.length > file_size) return false;
 
-    content_file.seekTo(content_entry.offset) catch return false;
+    file.seekTo(content_entry.offset) catch return false;
 
     const snap_mtime: i128 = file_stat.mtime;
     var bytes_read: u64 = 0;
     var file_count: u32 = 0;
     while (bytes_read < content_entry.length) {
         // Read path_len(u16)
         var pl_buf: [2]u8 = undefined;
-        const pln = content_file.readAll(&pl_buf) catch return false;
+        const pln = file.readAll(&pl_buf) catch return false;
         if (pln != 2) break;
         const path_len = std.mem.readInt(u16, &pl_buf, .little);
         if (path_len == 0 or path_len > 4096) break; // sanity cap
@@ -485,13 +477,13 @@ pub fn loadSnapshotValidated(
         // Read path
         const path_buf = allocator.alloc(u8, path_len) catch return false;
         defer allocator.free(path_buf);
-        const prn = content_file.readAll(path_buf) catch return false;
+        const prn = file.readAll(path_buf) catch return false;
         if (prn != path_len) break;
         bytes_read += path_len;
 
         // Read content_len(u32)
         var cl_buf: [4]u8 = undefined;
-        const cln = content_file.readAll(&cl_buf) catch return false;
+        const cln = file.readAll(&cl_buf) catch return false;
         if (cln != 4) break;
         const content_len = std.mem.readInt(u32, &cl_buf, .little);
         if (content_len > 64 * 1024 * 1024) break; // sanity cap: 64MB per file
@@ -500,7 +492,7 @@ pub fn loadSnapshotValidated(
         // Read content
         const content = allocator.alloc(u8, content_len) catch return false;
         defer allocator.free(content);
-        const crn = content_file.readAll(content) catch return false;
+        const crn = file.readAll(content) catch return false;
         if (crn != content_len) break;
         bytes_read += content_len;
 
@@ -539,15 +531,13 @@ pub fn loadSnapshotValidated(
         if (freq_entry.length == 256 * 256 * 2) {
             const index_mod = @import("index.zig");
             const ft = allocator.create([256][256]u16) catch return file_count > 0;
-            const freq_file = std.fs.cwd().openFile(snapshot_path, .{}) catch return file_count > 0;
-            defer freq_file.close();
-            freq_file.seekTo(freq_entry.offset) catch {
+            file.seekTo(freq_entry.offset) catch {
                 allocator.destroy(ft);
                 return file_count > 0;
             };
             var row_buf: [256 * 2]u8 = undefined;
             for (0..256) |a| {
-                if (freq_file.readAll(&row_buf) catch {
+                if (file.readAll(&row_buf) catch {
                     allocator.destroy(ft);
                     return file_count > 0;
                 } != 512) {