Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 73 additions & 23 deletions .claude/hooks/workflow-context-injector.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,19 +106,30 @@ def get_branch_name() -> str:
return "default"


def load_step_state(branch: str) -> dict | None:
"""Load step state from .map/<branch>/step_state.json."""
def read_step_state(branch: str) -> tuple[dict | None, str | None]:
"""Load step state and return a non-throwing degradation reason on failure."""
project_dir = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd()))
state_file = project_dir / ".map" / branch / "step_state.json"

if not state_file.exists():
return None
return (None, "missing step_state.json")

try:
with open(state_file, encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
return None
state = json.load(f)
if isinstance(state, dict):
Comment thread
azalio marked this conversation as resolved.
return (state, None)
return (None, "step_state.json is not an object")
except json.JSONDecodeError:
return (None, "invalid step_state.json")
except (OSError, UnicodeDecodeError):
return (None, "unreadable step_state.json")


def load_step_state(branch: str) -> dict | None:
"""Load step state from .map/<branch>/step_state.json."""
state, _reason = read_step_state(branch)
return state


def step_state_path(branch: str) -> Path:
Expand Down Expand Up @@ -161,6 +172,13 @@ def record_hook_injection_status(
pass


def record_skip_if_state_available(branch: str, reason: str, tool_name: str) -> None:
"""Persist a skipped hook outcome only when existing state is safe to update."""
state, _state_error = read_step_state(branch)
if state is not None:
record_hook_injection_status(branch, state, "skipped", reason, tool_name)


def should_inject_for_bash(command: str) -> bool:
"""Determine if Bash command needs workflow reminder."""
if not command:
Expand All @@ -186,6 +204,14 @@ def should_inject_for_bash(command: str) -> bool:
return False


def state_string(state: dict, key: str, default: str = "") -> str:
"""Return a stripped state string without trusting persisted JSON field types."""
value = state.get(key)
if isinstance(value, str):
return value.strip()
return default


def required_action_for_step(step_id: str, step_phase: str, state: dict) -> str | None:
"""Return a short required-next-action hint for common steps."""
if step_id == "1.55":
Expand Down Expand Up @@ -263,28 +289,30 @@ def format_reminder(state: dict, branch: str) -> str | None:
if not state:
return None

step_id = (state.get("current_step_id") or "").strip()
step_phase = (state.get("current_step_phase") or "").strip()
subtask_id = (state.get("current_subtask_id") or "-").strip() or "-"
step_id = state_string(state, "current_step_id")
step_phase = state_string(state, "current_step_phase")
subtask_id = state_string(state, "current_subtask_id", "-") or "-"

seq = state.get("subtask_sequence") or []
seq_value = state.get("subtask_sequence")
seq = seq_value if isinstance(seq_value, list) else []
idx = state.get("subtask_index")
progress = "-"
if isinstance(idx, int) and seq:
progress = f"{min(idx + 1, len(seq))}/{len(seq)}"

plan_ok = "y" if state.get("plan_approved") else "n"
mode = (state.get("execution_mode") or "").strip() or "batch"
mode = state_string(state, "execution_mode") or "batch"

# Wave progress display
waves = state.get("execution_waves") or []
waves_value = state.get("execution_waves")
waves = waves_value if isinstance(waves_value, list) else []
wave_idx = state.get("current_wave_index", 0)
wave_hint = ""
if waves:
if waves and isinstance(wave_idx, int):
wave_hint = f" | WAVE {wave_idx + 1}/{len(waves)}"
current_wave = waves[wave_idx] if wave_idx < len(waves) else []
if len(current_wave) > 1:
wave_hint += f" ({', '.join(current_wave)})"
if isinstance(current_wave, list) and len(current_wave) > 1:
wave_hint += f" ({', '.join(str(item) for item in current_wave)})"
mode = "batch:parallel"

required = required_action_for_step(step_id, step_phase, state)
Expand All @@ -301,12 +329,15 @@ def format_reminder(state: dict, branch: str) -> str | None:

# Show recently changed files for context freshness
files_hint = ""
files_changed = state.get("subtask_files_changed", {})
files_changed_value = state.get("subtask_files_changed", {})
files_changed = files_changed_value if isinstance(files_changed_value, dict) else {}
if files_changed and subtask_id != "-":
current_files = files_changed.get(subtask_id, [])
if current_files:
if isinstance(current_files, list) and current_files:
shown = current_files[:5]
files_hint = " | Files: " + ", ".join(Path(f).name for f in shown)
files_hint = " | Files: " + ", ".join(
Path(f).name for f in shown if isinstance(f, str)
)
if len(current_files) > 5:
files_hint += f" +{len(current_files) - 5}"

Expand Down Expand Up @@ -341,33 +372,52 @@ def format_reminder(state: dict, branch: str) -> str | None:


def main() -> None:
branch = get_branch_name()
try:
input_data = json.load(sys.stdin)
except json.JSONDecodeError:
record_skip_if_state_available(branch, "invalid hook input JSON", "unknown")
print("{}")
sys.exit(0)

tool_name = input_data.get("tool_name", "")
if not isinstance(input_data, dict):
record_skip_if_state_available(branch, "hook input is not an object", "unknown")
print("{}")
sys.exit(0)

tool_name_value = input_data.get("tool_name", "")
tool_name = tool_name_value if isinstance(tool_name_value, str) else ""
tool_input = input_data.get("tool_input", {})
if not isinstance(tool_input, dict):
tool_input = {}

# Determine if we should inject
should_inject = False
skip_reason = ""

if tool_name in ("Edit", "Write", "MultiEdit"):
should_inject = True
elif tool_name == "Bash":
command = tool_input.get("command", "")
should_inject = should_inject_for_bash(command)
if not isinstance(command, str):
skip_reason = "bash command is not a string"
else:
should_inject = should_inject_for_bash(command)

if not should_inject:
reason = skip_reason or "tool not configured for workflow injection"
if tool_name == "Bash":
reason = skip_reason or "bash command not significant"
elif not tool_name:
reason = "missing tool_name"
record_skip_if_state_available(branch, reason, tool_name or "unknown")
print("{}")
sys.exit(0)

# Load and format workflow step state
branch = get_branch_name()
state = load_step_state(branch)
state, _state_error = read_step_state(branch)

if not state:
if state is None:
print("{}")
sys.exit(0)

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ MAP review is useful, but it is not a replacement for engineering judgment. Seri
- **Reviewable diffs** - planning and task contracts keep changes small enough to inspect.
- **Useful quality gates** - `/map-check` and `/map-review` validate against the plan instead of just asking whether code "looks fine".
- **Review bundle** - `/map-review` auto-generates `.map/<branch>/review-bundle.json` and `.map/<branch>/review-bundle.md` before launching reviewer agents, bundling spec, plan, tests, verification, and latest code review into a single durable input contract. Reviewers read the bundle first; raw diff is secondary. Use `/map-review --detached` to open an isolated read-only git worktree at `.map/<branch>/detached-review/` for clean-room inspection without touching the source branch. Bundle generation records the `review` stage in `.map/<branch>/artifact_manifest.json`. Section-order flags reduce anchoring bias: `--reverse-sections` (invert order), `--shuffle-sections [--seed N]` (seeded random order), `--compare-orderings` (run default + reverse, aggregate via strict-wins, surface drift).
- **Run health report** - `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` write `.map/<branch>/run_health_report.json` during closeout via `.map/scripts/map_step_runner.py write_run_health_report`. The report is a machine-readable snapshot of terminal status, step progress, retry counters, artifact presence, and latest hook-injection status, giving `/map-resume` and reviewers one place to inspect why a workflow is complete, pending, or blocked.
- **Run health report** - `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` write `.map/<branch>/run_health_report.json` during closeout via `.map/scripts/map_step_runner.py write_run_health_report`. The report is a machine-readable snapshot of terminal status, step progress, retry counters, artifact presence, and latest hook-injection status, including explicit skipped reasons for malformed hook input or insignificant Bash commands when branch state can be safely updated.
- **Project memory** - `/map-learn` turns hard-won fixes and gotchas into reusable context for the next session.

## Core Commands
Expand Down
2 changes: 1 addition & 1 deletion docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ Targeted TDD flows additionally persist `test_contract_<subtask>.md` and `test_h

Missing artifacts are recorded with `present: false` rather than omitted, so bundle generation succeeds at any workflow stage. An optional detached worktree at `.map/<branch>/detached-review/` is created by `prepare_detached_review()` when `/map-review --detached` is invoked.

**Run health artifact** (`run_health` stage in manifest): `write_run_health_report()` in `.map/scripts/map_step_runner.py` writes `.map/<branch>/run_health_report.json` as the machine-readable diagnosis snapshot for the current workflow. `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` call it during closeout with explicit terminal-status mappings. It records `terminal_status`, current step/subtask, completed and pending step counts, artifact presence, retry counters, latest hook-injection status, Predictor skip/call flags when present, and whether final verification evidence exists. The report is a compact index over existing branch artifacts, not a second workflow source of truth.
**Run health artifact** (`run_health` stage in manifest): `write_run_health_report()` in `.map/scripts/map_step_runner.py` writes `.map/<branch>/run_health_report.json` as the machine-readable diagnosis snapshot for the current workflow. `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` call it during closeout with explicit terminal-status mappings. It records `terminal_status`, current step/subtask, completed and pending step counts, artifact presence, retry counters, latest hook-injection status, explicit skipped hook reasons for malformed input or insignificant Bash commands when branch state can be updated safely, Predictor skip/call flags when present, and whether final verification evidence exists. The report is a compact index over existing branch artifacts, not a second workflow source of truth.

#### 1. State Artifact (`state_<branch>.json`)

Expand Down
2 changes: 1 addition & 1 deletion docs/USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ In targeted TDD, `/map-tdd ST-001` now stops after the red phase once it has wri

Philosophically, MAP still ends with `LEARN`. Runtime keeps that step soft and token-aware by auto-writing `.map/<branch>/learning-handoff.md` and `.json` after `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review`, so `/map-learn` can auto-load the workflow context with no manual reconstruction. The same handoff write also updates `learning-metrics.json` with repeated learned-rule violation signals when current findings overlap existing rules, so teams can tell whether saved lessons are actually reducing repeat mistakes.

For workflow diagnosis, `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` now call `python3 .map/scripts/map_step_runner.py write_run_health_report <workflow> [terminal_status]` during closeout. This writes `.map/<branch>/run_health_report.json` and records the `run_health` stage in `artifact_manifest.json`. The report captures terminal status, current step/subtask, completed and pending step counts, artifact presence, retry counters, latest hook-injection status, Predictor skip/call flags when present, and final-verifier evidence when a verification summary exists.
For workflow diagnosis, `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` now call `python3 .map/scripts/map_step_runner.py write_run_health_report <workflow> [terminal_status]` during closeout. This writes `.map/<branch>/run_health_report.json` and records the `run_health` stage in `artifact_manifest.json`. The report captures terminal status, current step/subtask, completed and pending step counts, artifact presence, retry counters, latest hook-injection status, skipped hook reasons for malformed input or insignificant Bash commands when state can be updated safely, Predictor skip/call flags when present, and final-verifier evidence when a verification summary exists.

Implementation note: `/map-learn` is now maintained skill-first. The canonical slash surface lives in `.claude/skills/map-learn/SKILL.md`; MAP no longer ships a duplicate `.claude/commands/map-learn.md`, so there is only one place to update the learning workflow. The slash surface now advertises an optional `[workflow-summary]` argument, but zero-argument mode still auto-loads `.map/<branch>/learning-handoff.md` when present.

Expand Down
8 changes: 8 additions & 0 deletions docs/improvement-done.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
- Synced the shipped skill templates, updated README/usage/architecture docs, and added prompt-contract tests that reject hard-coded `complete` snippets and assert `map-efficient`/`map-debug` sequencing.
- Verified with focused skill/template tests, `make lint`, `pytest -m "not slow"`, a repo-built `uv run --no-sync mapify init <temp-path> --no-git --mcp none` generated-project smoke that inspected `run_health_report.json`, and a read-only review pass. Full unfiltered `pytest` and the slow Claude SDK suite were attempted, but live SDK tests exceeded tool timeouts after making progress; deterministic and no-LLM artifact checks passed.

## Expand hook degradation status coverage [2604.017-3]

- Date: 2026-05-16
- Added explicit skipped hook status recording for malformed hook input, non-object hook payloads, non-injected tools, and insignificant Bash commands when an existing branch `step_state.json` can be safely parsed and updated.
- Preserved the non-blocking hook contract for missing, invalid, non-object, or unreadable `step_state.json` by returning `{}` without creating or clobbering state.
- Synced the shipped hook template, updated README/usage/architecture/roadmap docs, and added regression tests for skipped Bash commands, malformed hook input, non-string Bash command payloads, missing `step_state.json`, and invalid `step_state.json` preservation.
- Verified with focused hook/template tests, run-health schema/writer tests, `make lint`, `pytest -m "not slow"`, and repo-built `uv run --no-sync mapify init <temp-path> --no-git --mcp none` generated-project smokes that executed the shipped hook and inspected the persisted skipped reason.

## Action-first tool use in lightweight workflows [2604.028]

- Date: 2026-05-15
Expand Down
16 changes: 16 additions & 0 deletions docs/improvement-loop-log.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
## 2026-05-16 - Expand hook degradation status coverage [2604.017-3]

- Decision: `implemented`
- Branch: `2604.017-3-hook-degradation-status`
- Baseline: The PreToolUse hook wrote `hook_injection` for emitted reminders and no-reminder formatting skips, but malformed hook input and insignificant Bash commands were silent when safe branch state existed.
- Forward Change: Added safe state reads with explicit degradation reasons, persisted skipped outcomes for malformed hook payloads and insignificant Bash commands when `step_state.json` is parseable, preserved non-blocking/no-clobber behavior for missing or invalid state, synced the shipped hook template, and documented the new diagnostic signal.
- Decisive Validation: `pytest tests/test_workflow_context_injector.py tests/test_template_sync.py -v`, `pytest tests/test_map_step_runner.py::test_write_run_health_report_creates_report_and_manifest tests/test_artifact_schemas.py::test_validate_run_health_report_schema -v`, `make lint`, `pytest -m "not slow"`, and generated-project `uv run --no-sync mapify init <new-dir> --no-git --mcp none` hook smokes passed.
- Review Result: Diff-scoped review found a malformed payload gap where non-string Bash commands could still raise; fixed by normalizing `tool_name` and `command` before classification and added a regression test.
- Next Trigger: Reuse this learning whenever hook code accepts JSON from Claude/tooling before deciding whether to mutate branch state.
- Reusable Learnings:
- command: `pytest tests/test_workflow_context_injector.py tests/test_template_sync.py -v`
- command: `uv run --no-sync mapify init <new-dir> --no-git --mcp none`
- invariant: `Hook inputs are untrusted even after JSON parsing; normalize field types before calling string-specific helpers.`
- invariant: `Hook degradation status may update only parseable existing branch state; missing or invalid state must not be created or clobbered by a diagnostic write.`
- review-check: `When adding a skipped/degraded hook path, test both the persisted reason and the non-blocking/no-state-mutation failure path.`

## 2026-05-16 - Auto-write run health reports from workflow closeout paths [2604.017-2]

- Decision: `implemented`
Expand Down
5 changes: 0 additions & 5 deletions docs/improvement-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,6 @@
- Add CI assertions that the resiliency artifacts are always produced: for workflows using AI agents (/map-efficient, /map-debug, /map-review, /map-learn), validate the health report JSON exists and includes terminal_status values (pending/complete/blocked/won't_do/superseded) exactly as specified in the state artifact section.
- Create a small set of resiliency regression tests: (1) simulate compaction/no checkpoint and confirm hook injection continues without blocking session start (architecture explicitly says session start must always succeed), (2) simulate oversized checkpoint file >256KB and confirm injection is skipped but workflow proceeds, (3) simulate invalid UTF-8 and confirm injection is rejected but session continues, using the existing security validation rules and stated performance characteristics (e.g., <0.5s total hook time).

## Expand hook degradation status coverage [2604.017-3]

**Benefit Hypothesis**: Recording explicit hook outcomes for invalid JSON, missing state, oversized or unreadable context inputs, and skipped-significance decisions will make hook resilience auditable instead of only best-effort.
**Scope**: Keep hook exit behavior non-blocking, but persist structured `hook_injection` reasons whenever a branch state file can be safely updated. Add regression tests for malformed input, missing `step_state.json`, and skipped Bash commands.

## Health report analytics and CI assertions [2604.017-4]

**Benefit Hypothesis**: A deterministic validation command over `run_health_report.json` will catch workflows that claim completion without verification artifacts or with unexplained retry/hook degradation, improving CI and post-mortem signal quality.
Expand Down
Loading
Loading