sudo-tee · jensenojs · Apr 20, 2026 · Apr 20, 2026
diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,7 @@ deps/
 
 # Local Claude settings (keep out of repo)
 .claude/
+
+# Dependency topology tool artifacts
+scripts/dependency-topology/__pycache__/
+*.html
diff --git a/AGENTS.md b/AGENTS.md
@@ -11,3 +11,13 @@
 - **Comments:** Avoid obvious comments that merely restate what the code does. Only add comments when necessary to explain _why_ something is done, not _what_ is being done. Prefer self-explanatory code.
 - **Config:** Centralize in `config.lua`. Use deep merge for user overrides.
 - **Types:** Use Lua annotations (`---@class`, `---@field`, etc.) for public APIs/config.
+
+## Dependency Topology Tool
+
+Use `scripts/dependency-topology/scan_topology.py` to inspect and track architectural layering.
+
+- Use `python3 scripts/dependency-topology/scan_topology.py scan` to inspect current-state vs target-policy gap
+- Use `diff` to inspect change direction (improved/regressed/neutral) between snapshots
+- Pass `--snapshot <git-ref>` for historical snapshots
+- Pass `--json` when feeding outputs into scripts or agents
+- Keep architecture cleanup discussions anchored on scanner output instead of ad-hoc grep chains
diff --git a/scripts/dependency-topology/AGENTS.md b/scripts/dependency-topology/AGENTS.md
@@ -0,0 +1,71 @@
+# Dependency Topology Scanner
+
+Static analysis tool for Lua codebase dependency architecture.
+
+## File Structure
+
+```
+scripts/dependency-topology/
+├── scan_topology.py   # CLI entry: scan / diff subcommands
+├── scan_analysis.py   # Core analysis: groups, edge rules, payload builders
+├── graph_utils.py     # Pure graph algorithms (Tarjan SCC, back edges, degree)
+├── html_renderer.py   # Interactive dagre-d3 + d3v5 HTML visualization
+└── topology.jsonc     # Group definitions + review comments (strategy file)
+```
+
+## Quick Start
+
+```bash
+# Scan current HEAD → generate interactive HTML
+python3 scripts/dependency-topology/scan_topology.py scan
+
+# Output to specific path
+python3 scripts/dependency-topology/scan_topology.py scan -o /tmp/deps.html
+
+# JSON output (for scripts/agents)
+python3 scripts/dependency-topology/scan_topology.py scan --json
+
+# Diff — smart default:
+#   worktree has uncommitted Lua changes → HEAD vs worktree
+#   worktree is clean                   → HEAD~1 vs HEAD (last commit)
+python3 scripts/dependency-topology/scan_topology.py diff
+
+# Compare specific refs (branch names, commit SHAs, remote refs)
+python3 scripts/dependency-topology/scan_topology.py diff --from upstream/main --to clean-code-remove-core
+python3 scripts/dependency-topology/scan_topology.py diff --from HEAD~5 --to HEAD
+```
+
+## Snapshot References
+
+- `worktree` — current working tree (uncommitted changes)
+- `HEAD` — latest commit
+- Any git ref — branch name (e.g. `upstream/main`), tag, short or full commit SHA
+- Relative refs — `HEAD~1`, `HEAD^`
+
+**diff defaults (no args):**
+- Worktree has uncommitted Lua changes → `HEAD` vs `worktree`
+- Worktree is clean → `HEAD~1` vs `HEAD`
+
+Note: ambiguous short names (e.g. `upstream` when both a local branch and remote exist)
+produce a git warning. Prefer fully-qualified refs: `upstream/main`, `refs/heads/mybranch`.
+
+## Output
+
+**scan:** One-line summary + HTML file path
+```
+4 cycles, 20 violations, violations=20 → /path/to/dependency-graph.html
+```
+
+**diff:** Change direction summary
+```
+HEAD → worktree: +2/-1 edges, improved=1, regressed=0
+```
+
+## JSON Output Signals
+
+When using `--json`:
+
+- `health` — one-glance status for cycles / violations / ungrouped coverage
+- `cycles` — SCC details with severity, members_by_layer, example_cycle, back_edges_in_scc
+- `violations` — policy violations grouped by rule with full edge lists
+- `group_coverage` — module counts per layer (including ungrouped)
diff --git a/scripts/dependency-topology/graph_utils.py b/scripts/dependency-topology/graph_utils.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+"""Repository-local static Lua dependency graph helpers.
+
+Mechanism only:
+- Parse `require('opencode.*')` edges from `lua/opencode/**/*.lua`
+- Build snapshot graph from worktree or git ref
+- Provide SCC / back-edge utilities
+"""
+
+from __future__ import annotations
+
+from collections import Counter, defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+import re
+import subprocess
+from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple
+
+
+REQUIRE_PATTERNS = [
+    re.compile(r"require\s*\(\s*['\"](opencode(?:\.[^'\"]+)?)['\"]\s*\)"),
+    re.compile(r"require\s+['\"](opencode(?:\.[^'\"]+)?)['\"]"),
+]
+
+
+@dataclass
+class SnapshotGraph:
+    snapshot: str
+    files: int
+    nodes: Dict[str, str]  # module -> relative file path
+    edges: Set[Tuple[str, str]]
+
+
+def module_from_relpath(relpath: str) -> Optional[str]:
+    if not relpath.startswith("lua/opencode/") or not relpath.endswith(".lua"):
+        return None
+    mod = relpath[len("lua/") : -len(".lua")]
+    if mod.endswith("/init"):
+        mod = mod[: -len("/init")]
+    return mod.replace("/", ".")
+
+
+def _worktree_files(repo: Path) -> List[Tuple[str, str]]:
+    out: List[Tuple[str, str]] = []
+    base = repo / "lua" / "opencode"
+    for fp in base.rglob("*.lua"):
+        rel = fp.relative_to(repo).as_posix()
+        text = fp.read_text(encoding="utf-8", errors="ignore")
+        out.append((rel, text))
+    return out
+
+
+def _git_files(repo: Path, ref: str) -> List[Tuple[str, str]]:
+    cmd = ["git", "ls-tree", "-r", "--name-only", ref, "lua/opencode"]
+    try:
+        ls = subprocess.check_output(cmd, cwd=repo, text=True, stderr=subprocess.PIPE)
+    except subprocess.CalledProcessError as e:
+        stderr = e.stderr.strip() if e.stderr else ""
+        raise ValueError(
+            f"Invalid snapshot ref '{ref}'. Valid values: HEAD, worktree, branch name, commit SHA.\n"
+            f"git error: {stderr}"
+        ) from None
+
+    out: List[Tuple[str, str]] = []
+    for rel in ls.splitlines():
+        if not rel.endswith(".lua"):
+            continue
+        show_cmd = ["git", "show", f"{ref}:{rel}"]
+        try:
+            text = subprocess.check_output(show_cmd, cwd=repo, text=True, stderr=subprocess.DEVNULL)
+        except subprocess.CalledProcessError:
+            continue
+        out.append((rel, text))
+    return out
+
+
+def load_snapshot_graph(repo: Path, snapshot: str) -> SnapshotGraph:
+    files = _worktree_files(repo) if snapshot == "worktree" else _git_files(repo, snapshot)
+
+    nodes: Dict[str, str] = {}
+    for rel, _ in files:
+        module = module_from_relpath(rel)
+        if module:
+            nodes[module] = rel
+
+    edges: Set[Tuple[str, str]] = set()
+    for rel, content in files:
+        src = module_from_relpath(rel)
+        if not src:
+            continue
+
+        deps: Set[str] = set()
+        for pat in REQUIRE_PATTERNS:
+            deps.update(m.group(1) for m in pat.finditer(content))
+
+        for dep in deps:
+            if dep in nodes:
+                edges.add((src, dep))
+
+    return SnapshotGraph(snapshot=snapshot, files=len(files), nodes=nodes, edges=edges)
+
+
+def tarjan_scc(nodes: Iterable[str], edges: Iterable[Tuple[str, str]]) -> List[List[str]]:
+    graph: Dict[str, List[str]] = defaultdict(list)
+    for a, b in edges:
+        graph[a].append(b)
+
+    index = 0
+    stack: List[str] = []
+    on_stack: Set[str] = set()
+    indices: Dict[str, int] = {}
+    lowlink: Dict[str, int] = {}
+    result: List[List[str]] = []
+
+    def strongconnect(v: str) -> None:
+        nonlocal index
+        indices[v] = index
+        lowlink[v] = index
+        index += 1
+        stack.append(v)
+        on_stack.add(v)
+
+        for w in graph[v]:
+            if w not in indices:
+                strongconnect(w)
+                lowlink[v] = min(lowlink[v], lowlink[w])
+            elif w in on_stack:
+                lowlink[v] = min(lowlink[v], indices[w])
+
+        if lowlink[v] == indices[v]:
+            comp: List[str] = []
+            while True:
+                w = stack.pop()
+                on_stack.remove(w)
+                comp.append(w)
+                if w == v:
+                    break
+            result.append(comp)
+
+    for n in sorted(set(nodes)):
+        if n not in indices:
+            strongconnect(n)
+
+    return result
+
+
+def back_edges(nodes: Iterable[str], edges: Iterable[Tuple[str, str]]) -> Set[Tuple[str, str]]:
+    graph: Dict[str, List[str]] = defaultdict(list)
+    for a, b in edges:
+        graph[a].append(b)
+    for n in graph:
+        graph[n] = sorted(set(graph[n]))
+
+    white, gray, black = 0, 1, 2
+    color: Dict[str, int] = {n: white for n in set(nodes)}
+    backs: Set[Tuple[str, str]] = set()
+
+    def dfs(v: str) -> None:
+        color[v] = gray
+        for w in graph[v]:
+            c = color.get(w, white)
+            if c == white:
+                dfs(w)
+            elif c == gray:
+                backs.add((v, w))
+        color[v] = black
+
+    for n in sorted(color.keys()):
+        if color[n] == white:
+            dfs(n)
+
+    return backs
+
+
+def degree(edges: Iterable[Tuple[str, str]]) -> Tuple[Counter, Counter]:
+    indeg: Counter = Counter()
+    outdeg: Counter = Counter()
+    for src, dst in edges:
+        outdeg[src] += 1
+        indeg[dst] += 1
+    return indeg, outdeg
+
+
+def find_cycle_in_scc(members: List[str], edges: Iterable[Tuple[str, str]]) -> List[str]:
+    """Return one concrete cycle path within an SCC, e.g. [a, b, c, a].
+
+    Uses DFS from the first member; backtracks until a back-edge is found.
+    Returns [] if no cycle is found (shouldn't happen for a real SCC > 1).
+    """
+    member_set = set(members)
+    graph: Dict[str, List[str]] = defaultdict(list)
+    for a, b in edges:
+        if a in member_set and b in member_set:
+            graph[a].append(b)
+    for n in graph:
+        graph[n] = sorted(set(graph[n]))
+
+    path: List[str] = []
+    on_path: Dict[str, int] = {}  # node -> index in path
+    visited: Set[str] = set()
+
+    def dfs(v: str) -> List[str]:
+        path.append(v)
+        on_path[v] = len(path) - 1
+        for w in graph[v]:
+            if w in on_path:
+                # Found cycle: extract from w's position to end, close it
+                return path[on_path[w]:] + [w]
+            if w not in visited:
+                visited.add(w)
+                result = dfs(w)
+                if result:
+                    return result
+        path.pop()
+        del on_path[v]
+        return []
+
+    start = sorted(members)[0]
+    visited.add(start)
+    return dfs(start)
+
+
+def largest_scc_size(comps: Sequence[Sequence[str]]) -> int:
+    nontrivial = [c for c in comps if len(c) > 1]
+    return max((len(c) for c in nontrivial), default=0)