diff --git a/.claude/hooks/workflow-context-injector.py b/.claude/hooks/workflow-context-injector.py index 3800e311..4e3b1e03 100755 --- a/.claude/hooks/workflow-context-injector.py +++ b/.claude/hooks/workflow-context-injector.py @@ -106,19 +106,30 @@ def get_branch_name() -> str: return "default" -def load_step_state(branch: str) -> dict | None: - """Load step state from .map//step_state.json.""" +def read_step_state(branch: str) -> tuple[dict | None, str | None]: + """Load step state and return a non-throwing degradation reason on failure.""" project_dir = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) state_file = project_dir / ".map" / branch / "step_state.json" if not state_file.exists(): - return None + return (None, "missing step_state.json") try: with open(state_file, encoding="utf-8") as f: - return json.load(f) - except (json.JSONDecodeError, IOError): - return None + state = json.load(f) + if isinstance(state, dict): + return (state, None) + return (None, "step_state.json is not an object") + except json.JSONDecodeError: + return (None, "invalid step_state.json") + except (OSError, UnicodeDecodeError): + return (None, "unreadable step_state.json") + + +def load_step_state(branch: str) -> dict | None: + """Load step state from .map//step_state.json.""" + state, _reason = read_step_state(branch) + return state def step_state_path(branch: str) -> Path: @@ -161,6 +172,13 @@ def record_hook_injection_status( pass +def record_skip_if_state_available(branch: str, reason: str, tool_name: str) -> None: + """Persist a skipped hook outcome only when existing state is safe to update.""" + state, _state_error = read_step_state(branch) + if state is not None: + record_hook_injection_status(branch, state, "skipped", reason, tool_name) + + def should_inject_for_bash(command: str) -> bool: """Determine if Bash command needs workflow reminder.""" if not command: @@ -186,6 +204,14 @@ def should_inject_for_bash(command: str) -> bool: return False +def state_string(state: dict, key: str, default: str = "") -> str: + """Return a stripped state string without trusting persisted JSON field types.""" + value = state.get(key) + if isinstance(value, str): + return value.strip() + return default + + def required_action_for_step(step_id: str, step_phase: str, state: dict) -> str | None: """Return a short required-next-action hint for common steps.""" if step_id == "1.55": @@ -263,28 +289,30 @@ def format_reminder(state: dict, branch: str) -> str | None: if not state: return None - step_id = (state.get("current_step_id") or "").strip() - step_phase = (state.get("current_step_phase") or "").strip() - subtask_id = (state.get("current_subtask_id") or "-").strip() or "-" + step_id = state_string(state, "current_step_id") + step_phase = state_string(state, "current_step_phase") + subtask_id = state_string(state, "current_subtask_id", "-") or "-" - seq = state.get("subtask_sequence") or [] + seq_value = state.get("subtask_sequence") + seq = seq_value if isinstance(seq_value, list) else [] idx = state.get("subtask_index") progress = "-" if isinstance(idx, int) and seq: progress = f"{min(idx + 1, len(seq))}/{len(seq)}" plan_ok = "y" if state.get("plan_approved") else "n" - mode = (state.get("execution_mode") or "").strip() or "batch" + mode = state_string(state, "execution_mode") or "batch" # Wave progress display - waves = state.get("execution_waves") or [] + waves_value = state.get("execution_waves") + waves = waves_value if isinstance(waves_value, list) else [] wave_idx = state.get("current_wave_index", 0) wave_hint = "" - if waves: + if waves and isinstance(wave_idx, int): wave_hint = f" | WAVE {wave_idx + 1}/{len(waves)}" current_wave = waves[wave_idx] if wave_idx < len(waves) else [] - if len(current_wave) > 1: - wave_hint += f" ({', '.join(current_wave)})" + if isinstance(current_wave, list) and len(current_wave) > 1: + wave_hint += f" ({', '.join(str(item) for item in current_wave)})" mode = "batch:parallel" required = required_action_for_step(step_id, step_phase, state) @@ -301,12 +329,15 @@ def format_reminder(state: dict, branch: str) -> str | None: # Show recently changed files for context freshness files_hint = "" - files_changed = state.get("subtask_files_changed", {}) + files_changed_value = state.get("subtask_files_changed", {}) + files_changed = files_changed_value if isinstance(files_changed_value, dict) else {} if files_changed and subtask_id != "-": current_files = files_changed.get(subtask_id, []) - if current_files: + if isinstance(current_files, list) and current_files: shown = current_files[:5] - files_hint = " | Files: " + ", ".join(Path(f).name for f in shown) + files_hint = " | Files: " + ", ".join( + Path(f).name for f in shown if isinstance(f, str) + ) if len(current_files) > 5: files_hint += f" +{len(current_files) - 5}" @@ -341,33 +372,52 @@ def format_reminder(state: dict, branch: str) -> str | None: def main() -> None: + branch = get_branch_name() try: input_data = json.load(sys.stdin) except json.JSONDecodeError: + record_skip_if_state_available(branch, "invalid hook input JSON", "unknown") print("{}") sys.exit(0) - tool_name = input_data.get("tool_name", "") + if not isinstance(input_data, dict): + record_skip_if_state_available(branch, "hook input is not an object", "unknown") + print("{}") + sys.exit(0) + + tool_name_value = input_data.get("tool_name", "") + tool_name = tool_name_value if isinstance(tool_name_value, str) else "" tool_input = input_data.get("tool_input", {}) + if not isinstance(tool_input, dict): + tool_input = {} # Determine if we should inject should_inject = False + skip_reason = "" if tool_name in ("Edit", "Write", "MultiEdit"): should_inject = True elif tool_name == "Bash": command = tool_input.get("command", "") - should_inject = should_inject_for_bash(command) + if not isinstance(command, str): + skip_reason = "bash command is not a string" + else: + should_inject = should_inject_for_bash(command) if not should_inject: + reason = skip_reason or "tool not configured for workflow injection" + if tool_name == "Bash": + reason = skip_reason or "bash command not significant" + elif not tool_name: + reason = "missing tool_name" + record_skip_if_state_available(branch, reason, tool_name or "unknown") print("{}") sys.exit(0) # Load and format workflow step state - branch = get_branch_name() - state = load_step_state(branch) + state, _state_error = read_step_state(branch) - if not state: + if state is None: print("{}") sys.exit(0) diff --git a/README.md b/README.md index 2e9b7c21..d6314fcc 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ MAP review is useful, but it is not a replacement for engineering judgment. Seri - **Reviewable diffs** - planning and task contracts keep changes small enough to inspect. - **Useful quality gates** - `/map-check` and `/map-review` validate against the plan instead of just asking whether code "looks fine". - **Review bundle** - `/map-review` auto-generates `.map//review-bundle.json` and `.map//review-bundle.md` before launching reviewer agents, bundling spec, plan, tests, verification, and latest code review into a single durable input contract. Reviewers read the bundle first; raw diff is secondary. Use `/map-review --detached` to open an isolated read-only git worktree at `.map//detached-review/` for clean-room inspection without touching the source branch. Bundle generation records the `review` stage in `.map//artifact_manifest.json`. Section-order flags reduce anchoring bias: `--reverse-sections` (invert order), `--shuffle-sections [--seed N]` (seeded random order), `--compare-orderings` (run default + reverse, aggregate via strict-wins, surface drift). -- **Run health report** - `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` write `.map//run_health_report.json` during closeout via `.map/scripts/map_step_runner.py write_run_health_report`. The report is a machine-readable snapshot of terminal status, step progress, retry counters, artifact presence, and latest hook-injection status, giving `/map-resume` and reviewers one place to inspect why a workflow is complete, pending, or blocked. +- **Run health report** - `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` write `.map//run_health_report.json` during closeout via `.map/scripts/map_step_runner.py write_run_health_report`. The report is a machine-readable snapshot of terminal status, step progress, retry counters, artifact presence, and latest hook-injection status, including explicit skipped reasons for malformed hook input or insignificant Bash commands when branch state can be safely updated. - **Project memory** - `/map-learn` turns hard-won fixes and gotchas into reusable context for the next session. ## Core Commands diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 7850d2a3..06ad333d 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -303,7 +303,7 @@ Targeted TDD flows additionally persist `test_contract_.md` and `test_h Missing artifacts are recorded with `present: false` rather than omitted, so bundle generation succeeds at any workflow stage. An optional detached worktree at `.map//detached-review/` is created by `prepare_detached_review()` when `/map-review --detached` is invoked. -**Run health artifact** (`run_health` stage in manifest): `write_run_health_report()` in `.map/scripts/map_step_runner.py` writes `.map//run_health_report.json` as the machine-readable diagnosis snapshot for the current workflow. `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` call it during closeout with explicit terminal-status mappings. It records `terminal_status`, current step/subtask, completed and pending step counts, artifact presence, retry counters, latest hook-injection status, Predictor skip/call flags when present, and whether final verification evidence exists. The report is a compact index over existing branch artifacts, not a second workflow source of truth. +**Run health artifact** (`run_health` stage in manifest): `write_run_health_report()` in `.map/scripts/map_step_runner.py` writes `.map//run_health_report.json` as the machine-readable diagnosis snapshot for the current workflow. `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` call it during closeout with explicit terminal-status mappings. It records `terminal_status`, current step/subtask, completed and pending step counts, artifact presence, retry counters, latest hook-injection status, explicit skipped hook reasons for malformed input or insignificant Bash commands when branch state can be updated safely, Predictor skip/call flags when present, and whether final verification evidence exists. The report is a compact index over existing branch artifacts, not a second workflow source of truth. #### 1. State Artifact (`state_.json`) diff --git a/docs/USAGE.md b/docs/USAGE.md index 00715eb0..d7a4d12d 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -47,7 +47,7 @@ In targeted TDD, `/map-tdd ST-001` now stops after the red phase once it has wri Philosophically, MAP still ends with `LEARN`. Runtime keeps that step soft and token-aware by auto-writing `.map//learning-handoff.md` and `.json` after `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review`, so `/map-learn` can auto-load the workflow context with no manual reconstruction. The same handoff write also updates `learning-metrics.json` with repeated learned-rule violation signals when current findings overlap existing rules, so teams can tell whether saved lessons are actually reducing repeat mistakes. -For workflow diagnosis, `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` now call `python3 .map/scripts/map_step_runner.py write_run_health_report [terminal_status]` during closeout. This writes `.map//run_health_report.json` and records the `run_health` stage in `artifact_manifest.json`. The report captures terminal status, current step/subtask, completed and pending step counts, artifact presence, retry counters, latest hook-injection status, Predictor skip/call flags when present, and final-verifier evidence when a verification summary exists. +For workflow diagnosis, `/map-efficient`, `/map-debug`, `/map-check`, and `/map-review` now call `python3 .map/scripts/map_step_runner.py write_run_health_report [terminal_status]` during closeout. This writes `.map//run_health_report.json` and records the `run_health` stage in `artifact_manifest.json`. The report captures terminal status, current step/subtask, completed and pending step counts, artifact presence, retry counters, latest hook-injection status, skipped hook reasons for malformed input or insignificant Bash commands when state can be updated safely, Predictor skip/call flags when present, and final-verifier evidence when a verification summary exists. Implementation note: `/map-learn` is now maintained skill-first. The canonical slash surface lives in `.claude/skills/map-learn/SKILL.md`; MAP no longer ships a duplicate `.claude/commands/map-learn.md`, so there is only one place to update the learning workflow. The slash surface now advertises an optional `[workflow-summary]` argument, but zero-argument mode still auto-loads `.map//learning-handoff.md` when present. diff --git a/docs/improvement-done.md b/docs/improvement-done.md index e5e371d3..f9b21759 100644 --- a/docs/improvement-done.md +++ b/docs/improvement-done.md @@ -17,6 +17,14 @@ - Synced the shipped skill templates, updated README/usage/architecture docs, and added prompt-contract tests that reject hard-coded `complete` snippets and assert `map-efficient`/`map-debug` sequencing. - Verified with focused skill/template tests, `make lint`, `pytest -m "not slow"`, a repo-built `uv run --no-sync mapify init --no-git --mcp none` generated-project smoke that inspected `run_health_report.json`, and a read-only review pass. Full unfiltered `pytest` and the slow Claude SDK suite were attempted, but live SDK tests exceeded tool timeouts after making progress; deterministic and no-LLM artifact checks passed. +## Expand hook degradation status coverage [2604.017-3] + +- Date: 2026-05-16 +- Added explicit skipped hook status recording for malformed hook input, non-object hook payloads, non-injected tools, and insignificant Bash commands when an existing branch `step_state.json` can be safely parsed and updated. +- Preserved the non-blocking hook contract for missing, invalid, non-object, or unreadable `step_state.json` by returning `{}` without creating or clobbering state. +- Synced the shipped hook template, updated README/usage/architecture/roadmap docs, and added regression tests for skipped Bash commands, malformed hook input, non-string Bash command payloads, missing `step_state.json`, and invalid `step_state.json` preservation. +- Verified with focused hook/template tests, run-health schema/writer tests, `make lint`, `pytest -m "not slow"`, and repo-built `uv run --no-sync mapify init --no-git --mcp none` generated-project smokes that executed the shipped hook and inspected the persisted skipped reason. + ## Action-first tool use in lightweight workflows [2604.028] - Date: 2026-05-15 diff --git a/docs/improvement-loop-log.md b/docs/improvement-loop-log.md index da0a073b..f6ca245e 100644 --- a/docs/improvement-loop-log.md +++ b/docs/improvement-loop-log.md @@ -1,3 +1,19 @@ +## 2026-05-16 - Expand hook degradation status coverage [2604.017-3] + +- Decision: `implemented` +- Branch: `2604.017-3-hook-degradation-status` +- Baseline: The PreToolUse hook wrote `hook_injection` for emitted reminders and no-reminder formatting skips, but malformed hook input and insignificant Bash commands were silent when safe branch state existed. +- Forward Change: Added safe state reads with explicit degradation reasons, persisted skipped outcomes for malformed hook payloads and insignificant Bash commands when `step_state.json` is parseable, preserved non-blocking/no-clobber behavior for missing or invalid state, synced the shipped hook template, and documented the new diagnostic signal. +- Decisive Validation: `pytest tests/test_workflow_context_injector.py tests/test_template_sync.py -v`, `pytest tests/test_map_step_runner.py::test_write_run_health_report_creates_report_and_manifest tests/test_artifact_schemas.py::test_validate_run_health_report_schema -v`, `make lint`, `pytest -m "not slow"`, and generated-project `uv run --no-sync mapify init --no-git --mcp none` hook smokes passed. +- Review Result: Diff-scoped review found a malformed payload gap where non-string Bash commands could still raise; fixed by normalizing `tool_name` and `command` before classification and added a regression test. +- Next Trigger: Reuse this learning whenever hook code accepts JSON from Claude/tooling before deciding whether to mutate branch state. +- Reusable Learnings: + - command: `pytest tests/test_workflow_context_injector.py tests/test_template_sync.py -v` + - command: `uv run --no-sync mapify init --no-git --mcp none` + - invariant: `Hook inputs are untrusted even after JSON parsing; normalize field types before calling string-specific helpers.` + - invariant: `Hook degradation status may update only parseable existing branch state; missing or invalid state must not be created or clobbered by a diagnostic write.` + - review-check: `When adding a skipped/degraded hook path, test both the persisted reason and the non-blocking/no-state-mutation failure path.` + ## 2026-05-16 - Auto-write run health reports from workflow closeout paths [2604.017-2] - Decision: `implemented` diff --git a/docs/improvement-plan.md b/docs/improvement-plan.md index f7320a37..c9f5d9ce 100644 --- a/docs/improvement-plan.md +++ b/docs/improvement-plan.md @@ -134,11 +134,6 @@ - Add CI assertions that the resiliency artifacts are always produced: for workflows using AI agents (/map-efficient, /map-debug, /map-review, /map-learn), validate the health report JSON exists and includes terminal_status values (pending/complete/blocked/won't_do/superseded) exactly as specified in the state artifact section. - Create a small set of resiliency regression tests: (1) simulate compaction/no checkpoint and confirm hook injection continues without blocking session start (architecture explicitly says session start must always succeed), (2) simulate oversized checkpoint file >256KB and confirm injection is skipped but workflow proceeds, (3) simulate invalid UTF-8 and confirm injection is rejected but session continues, using the existing security validation rules and stated performance characteristics (e.g., <0.5s total hook time). -## Expand hook degradation status coverage [2604.017-3] - -**Benefit Hypothesis**: Recording explicit hook outcomes for invalid JSON, missing state, oversized or unreadable context inputs, and skipped-significance decisions will make hook resilience auditable instead of only best-effort. -**Scope**: Keep hook exit behavior non-blocking, but persist structured `hook_injection` reasons whenever a branch state file can be safely updated. Add regression tests for malformed input, missing `step_state.json`, and skipped Bash commands. - ## Health report analytics and CI assertions [2604.017-4] **Benefit Hypothesis**: A deterministic validation command over `run_health_report.json` will catch workflows that claim completion without verification artifacts or with unexplained retry/hook degradation, improving CI and post-mortem signal quality. diff --git a/docs/learned/testing-strategies.md b/docs/learned/testing-strategies.md index 25d7ff02..2cb0fd58 100644 --- a/docs/learned/testing-strategies.md +++ b/docs/learned/testing-strategies.md @@ -17,3 +17,4 @@ paths: - **Validate every new branch artifact in all ledgers** (2026-05-15): When adding a `.map//` artifact, update the writer, `artifact_manifest.json` stages, schema tests, review/learning bundle inventories, docs, and generated `mapify init` smoke; otherwise the artifact can exist locally but disappear from resume/review handoffs. [workflow: improvement-plan-loop] - **Make live Claude E2E prompts defeat workflow-fit off-ramps** (2026-05-16): When a slow test asserts `/map-plan` artifacts via `claude -p`, use a shared prompt that explicitly forces the `map-plan` workflow-fit outcome and includes non-trivial invariants; trivial “add function” prompts can correctly off-ramp to `direct-edit` and make the test measure routing variance instead of the artifact contract. [workflow: improvement-plan-loop] - **Test prompt snippets for decision timing and non-happy-path status** (2026-05-16): When adding closeout commands to skill prompts, regression tests should reject both direct hard-coded happy-path arguments and variable defaults like `RUN_HEALTH_STATUS="complete"`; also assert the snippet appears after the verdict/verification section that determines the terminal status. [workflow: improvement-plan-loop] +- **Treat hook JSON fields as untrusted types** (2026-05-16): When a hook accepts JSON from Claude/tooling, test malformed JSON, wrong top-level types, wrong field types, missing state, and invalid state; the hook must keep exit 0, persist skipped reasons only when existing state is parseable, and never create or clobber state just to record diagnostics. [workflow: improvement-plan-loop] diff --git a/docs/roadmap.md b/docs/roadmap.md index 18ef5e71..cddf3c0d 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -52,7 +52,7 @@ The old "Iteration 1" is mostly complete: - `/map-task ST-00N` can resume implementation from the persisted TDD contract. - `learning-handoff.md` / `.json` preserves context for deferred `/map-learn`. - Learning metrics and repeated learned-rule violation tracking exist. -- `run_health_report.json` records terminal status, retry counters, artifact presence, and latest hook-injection status for diagnosis and resume handoff. +- `run_health_report.json` records terminal status, retry counters, artifact presence, latest hook-injection status, and auditable skipped hook reasons for diagnosis and resume handoff. Useful follow-up from that iteration: diff --git a/src/mapify_cli/templates/hooks/workflow-context-injector.py b/src/mapify_cli/templates/hooks/workflow-context-injector.py index 3800e311..4e3b1e03 100755 --- a/src/mapify_cli/templates/hooks/workflow-context-injector.py +++ b/src/mapify_cli/templates/hooks/workflow-context-injector.py @@ -106,19 +106,30 @@ def get_branch_name() -> str: return "default" -def load_step_state(branch: str) -> dict | None: - """Load step state from .map//step_state.json.""" +def read_step_state(branch: str) -> tuple[dict | None, str | None]: + """Load step state and return a non-throwing degradation reason on failure.""" project_dir = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) state_file = project_dir / ".map" / branch / "step_state.json" if not state_file.exists(): - return None + return (None, "missing step_state.json") try: with open(state_file, encoding="utf-8") as f: - return json.load(f) - except (json.JSONDecodeError, IOError): - return None + state = json.load(f) + if isinstance(state, dict): + return (state, None) + return (None, "step_state.json is not an object") + except json.JSONDecodeError: + return (None, "invalid step_state.json") + except (OSError, UnicodeDecodeError): + return (None, "unreadable step_state.json") + + +def load_step_state(branch: str) -> dict | None: + """Load step state from .map//step_state.json.""" + state, _reason = read_step_state(branch) + return state def step_state_path(branch: str) -> Path: @@ -161,6 +172,13 @@ def record_hook_injection_status( pass +def record_skip_if_state_available(branch: str, reason: str, tool_name: str) -> None: + """Persist a skipped hook outcome only when existing state is safe to update.""" + state, _state_error = read_step_state(branch) + if state is not None: + record_hook_injection_status(branch, state, "skipped", reason, tool_name) + + def should_inject_for_bash(command: str) -> bool: """Determine if Bash command needs workflow reminder.""" if not command: @@ -186,6 +204,14 @@ def should_inject_for_bash(command: str) -> bool: return False +def state_string(state: dict, key: str, default: str = "") -> str: + """Return a stripped state string without trusting persisted JSON field types.""" + value = state.get(key) + if isinstance(value, str): + return value.strip() + return default + + def required_action_for_step(step_id: str, step_phase: str, state: dict) -> str | None: """Return a short required-next-action hint for common steps.""" if step_id == "1.55": @@ -263,28 +289,30 @@ def format_reminder(state: dict, branch: str) -> str | None: if not state: return None - step_id = (state.get("current_step_id") or "").strip() - step_phase = (state.get("current_step_phase") or "").strip() - subtask_id = (state.get("current_subtask_id") or "-").strip() or "-" + step_id = state_string(state, "current_step_id") + step_phase = state_string(state, "current_step_phase") + subtask_id = state_string(state, "current_subtask_id", "-") or "-" - seq = state.get("subtask_sequence") or [] + seq_value = state.get("subtask_sequence") + seq = seq_value if isinstance(seq_value, list) else [] idx = state.get("subtask_index") progress = "-" if isinstance(idx, int) and seq: progress = f"{min(idx + 1, len(seq))}/{len(seq)}" plan_ok = "y" if state.get("plan_approved") else "n" - mode = (state.get("execution_mode") or "").strip() or "batch" + mode = state_string(state, "execution_mode") or "batch" # Wave progress display - waves = state.get("execution_waves") or [] + waves_value = state.get("execution_waves") + waves = waves_value if isinstance(waves_value, list) else [] wave_idx = state.get("current_wave_index", 0) wave_hint = "" - if waves: + if waves and isinstance(wave_idx, int): wave_hint = f" | WAVE {wave_idx + 1}/{len(waves)}" current_wave = waves[wave_idx] if wave_idx < len(waves) else [] - if len(current_wave) > 1: - wave_hint += f" ({', '.join(current_wave)})" + if isinstance(current_wave, list) and len(current_wave) > 1: + wave_hint += f" ({', '.join(str(item) for item in current_wave)})" mode = "batch:parallel" required = required_action_for_step(step_id, step_phase, state) @@ -301,12 +329,15 @@ def format_reminder(state: dict, branch: str) -> str | None: # Show recently changed files for context freshness files_hint = "" - files_changed = state.get("subtask_files_changed", {}) + files_changed_value = state.get("subtask_files_changed", {}) + files_changed = files_changed_value if isinstance(files_changed_value, dict) else {} if files_changed and subtask_id != "-": current_files = files_changed.get(subtask_id, []) - if current_files: + if isinstance(current_files, list) and current_files: shown = current_files[:5] - files_hint = " | Files: " + ", ".join(Path(f).name for f in shown) + files_hint = " | Files: " + ", ".join( + Path(f).name for f in shown if isinstance(f, str) + ) if len(current_files) > 5: files_hint += f" +{len(current_files) - 5}" @@ -341,33 +372,52 @@ def format_reminder(state: dict, branch: str) -> str | None: def main() -> None: + branch = get_branch_name() try: input_data = json.load(sys.stdin) except json.JSONDecodeError: + record_skip_if_state_available(branch, "invalid hook input JSON", "unknown") print("{}") sys.exit(0) - tool_name = input_data.get("tool_name", "") + if not isinstance(input_data, dict): + record_skip_if_state_available(branch, "hook input is not an object", "unknown") + print("{}") + sys.exit(0) + + tool_name_value = input_data.get("tool_name", "") + tool_name = tool_name_value if isinstance(tool_name_value, str) else "" tool_input = input_data.get("tool_input", {}) + if not isinstance(tool_input, dict): + tool_input = {} # Determine if we should inject should_inject = False + skip_reason = "" if tool_name in ("Edit", "Write", "MultiEdit"): should_inject = True elif tool_name == "Bash": command = tool_input.get("command", "") - should_inject = should_inject_for_bash(command) + if not isinstance(command, str): + skip_reason = "bash command is not a string" + else: + should_inject = should_inject_for_bash(command) if not should_inject: + reason = skip_reason or "tool not configured for workflow injection" + if tool_name == "Bash": + reason = skip_reason or "bash command not significant" + elif not tool_name: + reason = "missing tool_name" + record_skip_if_state_available(branch, reason, tool_name or "unknown") print("{}") sys.exit(0) # Load and format workflow step state - branch = get_branch_name() - state = load_step_state(branch) + state, _state_error = read_step_state(branch) - if not state: + if state is None: print("{}") sys.exit(0) diff --git a/tests/test_workflow_context_injector.py b/tests/test_workflow_context_injector.py index 16c41473..c6efcc4a 100644 --- a/tests/test_workflow_context_injector.py +++ b/tests/test_workflow_context_injector.py @@ -8,13 +8,17 @@ def _run_hook(tmp_project_dir: Path, stdin_payload: dict) -> tuple[int, str, str]: + return _run_hook_raw(tmp_project_dir, json.dumps(stdin_payload)) + + +def _run_hook_raw(tmp_project_dir: Path, stdin_payload: str) -> tuple[int, str, str]: hook_path = Path(".claude/hooks/workflow-context-injector.py") env = os.environ.copy() env["CLAUDE_PROJECT_DIR"] = str(tmp_project_dir) proc = subprocess.run( ["python3", str(hook_path)], - input=json.dumps(stdin_payload), + input=stdin_payload, text=True, capture_output=True, env=env, @@ -106,6 +110,195 @@ def test_skips_for_readonly_bash(tmp_path: Path) -> None: assert out == "{}" +def test_records_skipped_for_insignificant_bash_when_state_exists( + tmp_path: Path, branch_name: str +) -> None: + branch = branch_name + state_dir = tmp_path / ".map" / branch + state_dir.mkdir(parents=True, exist_ok=True) + (state_dir / "step_state.json").write_text( + json.dumps( + { + "current_step_id": "2.3", + "current_step_phase": "ACTOR", + "current_subtask_id": "ST-001", + } + ), + encoding="utf-8", + ) + + code, out, err = _run_hook( + tmp_path, + {"tool_name": "Bash", "tool_input": {"command": "ls"}}, + ) + + assert code == 0 + assert err == "" + assert out == "{}" + state = json.loads((state_dir / "step_state.json").read_text(encoding="utf-8")) + assert state["hook_injection"]["status"] == "skipped" + assert state["hook_injection"]["reason"] == "bash command not significant" + assert state["hook_injection"]["tool_name"] == "Bash" + assert state["hook_injection_counts"]["skipped"] == 1 + + +def test_records_malformed_hook_input_when_state_exists( + tmp_path: Path, branch_name: str +) -> None: + branch = branch_name + state_dir = tmp_path / ".map" / branch + state_dir.mkdir(parents=True, exist_ok=True) + (state_dir / "step_state.json").write_text( + json.dumps( + { + "current_step_id": "2.3", + "current_step_phase": "ACTOR", + } + ), + encoding="utf-8", + ) + + code, out, err = _run_hook_raw(tmp_path, "{") + + assert code == 0 + assert err == "" + assert out == "{}" + state = json.loads((state_dir / "step_state.json").read_text(encoding="utf-8")) + assert state["hook_injection"]["status"] == "skipped" + assert state["hook_injection"]["reason"] == "invalid hook input JSON" + assert state["hook_injection"]["tool_name"] == "unknown" + assert state["hook_injection_counts"]["skipped"] == 1 + + +def test_non_string_bash_command_remains_non_blocking( + tmp_path: Path, branch_name: str +) -> None: + branch = branch_name + state_dir = tmp_path / ".map" / branch + state_dir.mkdir(parents=True, exist_ok=True) + (state_dir / "step_state.json").write_text( + json.dumps( + { + "current_step_id": "2.3", + "current_step_phase": "ACTOR", + } + ), + encoding="utf-8", + ) + + code, out, err = _run_hook( + tmp_path, + {"tool_name": "Bash", "tool_input": {"command": ["pytest"]}}, + ) + + assert code == 0 + assert err == "" + assert out == "{}" + state = json.loads((state_dir / "step_state.json").read_text(encoding="utf-8")) + assert state["hook_injection"]["status"] == "skipped" + assert state["hook_injection"]["reason"] == "bash command is not a string" + assert state["hook_injection_counts"]["skipped"] == 1 + + +def test_records_unsupported_tool_when_state_exists( + tmp_path: Path, branch_name: str +) -> None: + branch = branch_name + state_dir = tmp_path / ".map" / branch + state_dir.mkdir(parents=True, exist_ok=True) + (state_dir / "step_state.json").write_text( + json.dumps( + { + "current_step_id": "2.3", + "current_step_phase": "ACTOR", + } + ), + encoding="utf-8", + ) + + code, out, err = _run_hook( + tmp_path, + {"tool_name": "Read", "tool_input": {"file_path": "x"}}, + ) + + assert code == 0 + assert err == "" + assert out == "{}" + state = json.loads((state_dir / "step_state.json").read_text(encoding="utf-8")) + assert state["hook_injection"]["status"] == "skipped" + assert state["hook_injection"]["reason"] == "tool not configured for workflow injection" + assert state["hook_injection"]["tool_name"] == "Read" + assert state["hook_injection_counts"]["skipped"] == 1 + + +def test_schema_invalid_step_state_fields_remain_non_blocking( + tmp_path: Path, branch_name: str +) -> None: + branch = branch_name + state_dir = tmp_path / ".map" / branch + state_dir.mkdir(parents=True, exist_ok=True) + (state_dir / "step_state.json").write_text( + json.dumps( + { + "current_step_id": 23, + "current_step_phase": ["ACTOR"], + "current_subtask_id": {"id": "ST-001"}, + "execution_mode": {"mode": "batch"}, + "subtask_sequence": "ST-001", + "execution_waves": {"wave": ["ST-001"]}, + "subtask_files_changed": ["src/example.py"], + } + ), + encoding="utf-8", + ) + + code, out, err = _run_hook( + tmp_path, {"tool_name": "Edit", "tool_input": {"file_path": "x"}} + ) + + assert code == 0 + assert err == "" + assert out == "{}" + state = json.loads((state_dir / "step_state.json").read_text(encoding="utf-8")) + assert state["hook_injection"]["status"] == "skipped" + assert state["hook_injection"]["reason"] == "no reminder formatted" + assert state["hook_injection_counts"]["skipped"] == 1 + + +def test_missing_step_state_remains_non_blocking_without_creating_state( + tmp_path: Path, branch_name: str +) -> None: + state_file = tmp_path / ".map" / branch_name / "step_state.json" + + code, out, err = _run_hook( + tmp_path, {"tool_name": "Edit", "tool_input": {"file_path": "x"}} + ) + + assert code == 0 + assert err == "" + assert out == "{}" + assert not state_file.exists() + + +def test_invalid_step_state_remains_non_blocking_without_clobbering_state( + tmp_path: Path, branch_name: str +) -> None: + branch = branch_name + state_dir = tmp_path / ".map" / branch + state_dir.mkdir(parents=True, exist_ok=True) + state_file = state_dir / "step_state.json" + state_file.write_text("{", encoding="utf-8") + + code, out, err = _run_hook( + tmp_path, {"tool_name": "Edit", "tool_input": {"file_path": "x"}} + ) + + assert code == 0 + assert err == "" + assert out == "{}" + assert state_file.read_text(encoding="utf-8") == "{" + + def test_injects_for_pytest_bash_when_step_state_exists( tmp_path: Path, branch_name: str ) -> None: