Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ deps/

# Local Claude settings (keep out of repo)
.claude/

# Dependency topology tool artifacts
scripts/dependency-topology/__pycache__/
*.html
10 changes: 10 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,13 @@
- **Comments:** Avoid obvious comments that merely restate what the code does. Only add comments when necessary to explain _why_ something is done, not _what_ is being done. Prefer self-explanatory code.
- **Config:** Centralize in `config.lua`. Use deep merge for user overrides.
- **Types:** Use Lua annotations (`---@class`, `---@field`, etc.) for public APIs/config.

## Dependency Topology Tool

Use `scripts/dependency-topology/scan_topology.py` to inspect and track architectural layering.

- Use `python3 scripts/dependency-topology/scan_topology.py scan` to inspect current-state vs target-policy gap
- Use `diff` to inspect change direction (improved/regressed/neutral) between snapshots
- Pass `--snapshot <git-ref>` for historical snapshots
- Pass `--json` when feeding outputs into scripts or agents
- Keep architecture cleanup discussions anchored on scanner output instead of ad-hoc grep chains
71 changes: 71 additions & 0 deletions scripts/dependency-topology/AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Dependency Topology Scanner

Static analysis tool for Lua codebase dependency architecture.

## File Structure

```
scripts/dependency-topology/
├── scan_topology.py # CLI entry: scan / diff subcommands
├── scan_analysis.py # Core analysis: groups, edge rules, payload builders
├── graph_utils.py # Pure graph algorithms (Tarjan SCC, back edges, degree)
├── html_renderer.py # Interactive dagre-d3 + d3v5 HTML visualization
└── topology.jsonc # Group definitions + review comments (strategy file)
```

## Quick Start

```bash
# Scan current HEAD → generate interactive HTML
python3 scripts/dependency-topology/scan_topology.py scan

# Output to specific path
python3 scripts/dependency-topology/scan_topology.py scan -o /tmp/deps.html

# JSON output (for scripts/agents)
python3 scripts/dependency-topology/scan_topology.py scan --json

# Diff — smart default:
# worktree has uncommitted Lua changes → HEAD vs worktree
# worktree is clean → HEAD~1 vs HEAD (last commit)
python3 scripts/dependency-topology/scan_topology.py diff

# Compare specific refs (branch names, commit SHAs, remote refs)
python3 scripts/dependency-topology/scan_topology.py diff --from upstream/main --to clean-code-remove-core
python3 scripts/dependency-topology/scan_topology.py diff --from HEAD~5 --to HEAD
```

## Snapshot References

- `worktree` — current working tree (uncommitted changes)
- `HEAD` — latest commit
- Any git ref — branch name (e.g. `upstream/main`), tag, short or full commit SHA
- Relative refs — `HEAD~1`, `HEAD^`

**diff defaults (no args):**
- Worktree has uncommitted Lua changes → `HEAD` vs `worktree`
- Worktree is clean → `HEAD~1` vs `HEAD`

Note: ambiguous short names (e.g. `upstream` when both a local branch and remote exist)
produce a git warning. Prefer fully-qualified refs: `upstream/main`, `refs/heads/mybranch`.

## Output

**scan:** One-line summary + HTML file path
```
4 cycles, 20 violations, violations=20 → /path/to/dependency-graph.html
```

**diff:** Change direction summary
```
HEAD → worktree: +2/-1 edges, improved=1, regressed=0
```

## JSON Output Signals

When using `--json`:

- `health` — one-glance status for cycles / violations / ungrouped coverage
- `cycles` — SCC details with severity, members_by_layer, example_cycle, back_edges_in_scc
- `violations` — policy violations grouped by rule with full edge lists
- `group_coverage` — module counts per layer (including ungrouped)
225 changes: 225 additions & 0 deletions scripts/dependency-topology/graph_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#!/usr/bin/env python3
"""Repository-local static Lua dependency graph helpers.

Mechanism only:
- Parse `require('opencode.*')` edges from `lua/opencode/**/*.lua`
- Build snapshot graph from worktree or git ref
- Provide SCC / back-edge utilities
"""

from __future__ import annotations

from collections import Counter, defaultdict
from dataclasses import dataclass
from pathlib import Path
import re
import subprocess
from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple


REQUIRE_PATTERNS = [
re.compile(r"require\s*\(\s*['\"](opencode(?:\.[^'\"]+)?)['\"]\s*\)"),
re.compile(r"require\s+['\"](opencode(?:\.[^'\"]+)?)['\"]"),
]


@dataclass
class SnapshotGraph:
snapshot: str
files: int
nodes: Dict[str, str] # module -> relative file path
edges: Set[Tuple[str, str]]


def module_from_relpath(relpath: str) -> Optional[str]:
if not relpath.startswith("lua/opencode/") or not relpath.endswith(".lua"):
return None
mod = relpath[len("lua/") : -len(".lua")]
if mod.endswith("/init"):
mod = mod[: -len("/init")]
return mod.replace("/", ".")


def _worktree_files(repo: Path) -> List[Tuple[str, str]]:
out: List[Tuple[str, str]] = []
base = repo / "lua" / "opencode"
for fp in base.rglob("*.lua"):
rel = fp.relative_to(repo).as_posix()
text = fp.read_text(encoding="utf-8", errors="ignore")
out.append((rel, text))
return out


def _git_files(repo: Path, ref: str) -> List[Tuple[str, str]]:
cmd = ["git", "ls-tree", "-r", "--name-only", ref, "lua/opencode"]
try:
ls = subprocess.check_output(cmd, cwd=repo, text=True, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
stderr = e.stderr.strip() if e.stderr else ""
raise ValueError(
f"Invalid snapshot ref '{ref}'. Valid values: HEAD, worktree, branch name, commit SHA.\n"
f"git error: {stderr}"
) from None

out: List[Tuple[str, str]] = []
for rel in ls.splitlines():
if not rel.endswith(".lua"):
continue
show_cmd = ["git", "show", f"{ref}:{rel}"]
try:
text = subprocess.check_output(show_cmd, cwd=repo, text=True, stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
continue
out.append((rel, text))
return out
Comment thread
jensenojs marked this conversation as resolved.


def load_snapshot_graph(repo: Path, snapshot: str) -> SnapshotGraph:
files = _worktree_files(repo) if snapshot == "worktree" else _git_files(repo, snapshot)

nodes: Dict[str, str] = {}
for rel, _ in files:
module = module_from_relpath(rel)
if module:
nodes[module] = rel

edges: Set[Tuple[str, str]] = set()
for rel, content in files:
src = module_from_relpath(rel)
if not src:
continue

deps: Set[str] = set()
for pat in REQUIRE_PATTERNS:
deps.update(m.group(1) for m in pat.finditer(content))

for dep in deps:
if dep in nodes:
edges.add((src, dep))

return SnapshotGraph(snapshot=snapshot, files=len(files), nodes=nodes, edges=edges)


def tarjan_scc(nodes: Iterable[str], edges: Iterable[Tuple[str, str]]) -> List[List[str]]:
graph: Dict[str, List[str]] = defaultdict(list)
for a, b in edges:
graph[a].append(b)

index = 0
stack: List[str] = []
on_stack: Set[str] = set()
indices: Dict[str, int] = {}
lowlink: Dict[str, int] = {}
result: List[List[str]] = []

def strongconnect(v: str) -> None:
nonlocal index
indices[v] = index
lowlink[v] = index
index += 1
stack.append(v)
on_stack.add(v)

for w in graph[v]:
if w not in indices:
strongconnect(w)
lowlink[v] = min(lowlink[v], lowlink[w])
elif w in on_stack:
lowlink[v] = min(lowlink[v], indices[w])

if lowlink[v] == indices[v]:
comp: List[str] = []
while True:
w = stack.pop()
on_stack.remove(w)
comp.append(w)
if w == v:
break
result.append(comp)

for n in sorted(set(nodes)):
if n not in indices:
strongconnect(n)

return result


def back_edges(nodes: Iterable[str], edges: Iterable[Tuple[str, str]]) -> Set[Tuple[str, str]]:
graph: Dict[str, List[str]] = defaultdict(list)
for a, b in edges:
graph[a].append(b)
for n in graph:
graph[n] = sorted(set(graph[n]))

white, gray, black = 0, 1, 2
color: Dict[str, int] = {n: white for n in set(nodes)}
backs: Set[Tuple[str, str]] = set()

def dfs(v: str) -> None:
color[v] = gray
for w in graph[v]:
c = color.get(w, white)
if c == white:
dfs(w)
elif c == gray:
backs.add((v, w))
color[v] = black

for n in sorted(color.keys()):
if color[n] == white:
dfs(n)

return backs


def degree(edges: Iterable[Tuple[str, str]]) -> Tuple[Counter, Counter]:
indeg: Counter = Counter()
outdeg: Counter = Counter()
for src, dst in edges:
outdeg[src] += 1
indeg[dst] += 1
return indeg, outdeg


def find_cycle_in_scc(members: List[str], edges: Iterable[Tuple[str, str]]) -> List[str]:
"""Return one concrete cycle path within an SCC, e.g. [a, b, c, a].

Uses DFS from the first member; backtracks until a back-edge is found.
Returns [] if no cycle is found (shouldn't happen for a real SCC > 1).
"""
member_set = set(members)
graph: Dict[str, List[str]] = defaultdict(list)
for a, b in edges:
if a in member_set and b in member_set:
graph[a].append(b)
for n in graph:
graph[n] = sorted(set(graph[n]))

path: List[str] = []
on_path: Dict[str, int] = {} # node -> index in path
visited: Set[str] = set()

def dfs(v: str) -> List[str]:
path.append(v)
on_path[v] = len(path) - 1
for w in graph[v]:
if w in on_path:
# Found cycle: extract from w's position to end, close it
return path[on_path[w]:] + [w]
if w not in visited:
visited.add(w)
result = dfs(w)
if result:
return result
path.pop()
del on_path[v]
return []

start = sorted(members)[0]
visited.add(start)
return dfs(start)


def largest_scc_size(comps: Sequence[Sequence[str]]) -> int:
nontrivial = [c for c in comps if len(c) > 1]
return max((len(c) for c in nontrivial), default=0)
Loading
Loading