#!/usr/bin/env python3
"""SOP Orchestrator — drives claude -p through SOP steps autonomously.

Front-loads planning into a single Opus "outliner" call that produces a
structured JSON task tree, then uses deterministic code to select batches.
"""

import argparse
import difflib
import json
import os
import tempfile
import subprocess
import sys
import uuid
from datetime import datetime
from pathlib import Path
from typing import TypedDict

sys.path.insert(0, str(Path(__file__).resolve().parent))

class TaskNode(TypedDict, total=False):
    id: str
    description: str
    content: str
    line_start: int
    line_end: int
    done: bool
    verify: str  # "builtin" | "independent" | "none"
    review_gate: bool
    notes: list[str]
    values: dict[str, str]
    attempt_count: int
    children: list[TaskNode]


class SidecarState(TypedDict):
    sop_source: str
    started_at: str
    tasks: list[TaskNode]


class PlanResult(TypedDict, total=False):
    action: str  # "next" | "review" | "done" | "blocked"
    tasks: list[TaskNode]
    reason: str

EXIT_DONE = 0
EXIT_ERROR = 1
EXIT_REVIEW = 2
EXIT_BLOCKED = 3
EXIT_DRY_RUN = 4

_OUTLINER_COMMON = """\
You are an outliner for an SOP orchestrator. Your job is to read the SOP and \
decompose it into a hierarchical JSON task tree.

SOP location: {sop_location}

{sop_inline}

Read the SOP carefully. For each step or section, create a TaskNode:

```json
{{
  "id": "1",
  "description": "Short summary of what to do",
  {content_field}
  "done": false,
  "verify": "builtin",
  "review_gate": false,
  "notes": [],
  "values": {{}},
  "attempt_count": 0,
  "children": []
}}
```

Rules:
- Use hierarchical IDs: "1", "1.1", "1.1.2", etc.
- Set `done: true` for steps already marked complete in the SOP (checkboxes checked, \
status labels showing done, strikethrough, etc.)
- For `verify`:
  - "builtin" if the step includes its own verification command/test/assertion
  - "independent" if the step has no built-in verification and needs a separate verifier
  - "none" for trivial steps (documentation updates, config changes with no runtime effect)
- Set `review_gate: true` for steps that explicitly require human approval before proceeding
{content_rules}
- Preserve document order faithfully
- Extract any existing values/notes from completed steps into the `values` and `notes` fields
- Use `children` for sub-steps within a section; leaf nodes are the actual executable tasks

Respond with ONLY a valid JSON array of top-level TaskNode objects on stdout. \
No markdown fences, no commentary, no file writes.
"""

_CONTENT_FIELD_LOCAL = """\
"line_start": 10,
  "line_end": 25,"""

_CONTENT_RULES_LOCAL = """\
- Set `line_start` and `line_end` (1-indexed, inclusive) to the SOP line range \
containing the full instructions for this step. A fresh worker with no memory \
should be able to execute from just those lines.
- Leaf nodes MUST have line_start/line_end. Parent nodes may omit them."""

_CONTENT_FIELD_URL = '"content": "Full instructions from the SOP for this step",'

_CONTENT_RULES_URL = """\
- Keep `content` self-contained — a fresh worker with no memory should be able to execute \
from just the content field"""

WORKER_PROMPT = """\
Ultrathink. You are a worker executing step(s) of an SOP. Complete the tasks described \
below in order. You have full access to the codebase, tools, and any files or \
resources referenced. If a task mentions a pattern file, Google Doc, or other \
reference, read it yourself before starting.

SOP source: {sop_source}
All work should target the project described by this SOP. If it's a file path, \
work in the directory containing it. Do NOT search for or work on other projects.

IMPORTANT: Do NOT modify the SOP file. Only modify project files (source code, \
configs, assets, etc.). Your changes will be recorded in the SOP by a separate \
process.

Tasks (complete each by its ID):
{tasks_formatted}

When done, write a JSON report to {report_path}:
{{
  "completed": ["1.1", "1.2"],
  "partial": {{"1.3": "reason why incomplete"}},
  "values": {{"1.1.port": "8443"}},
  "notes": {{"1.1": "implementation detail worth recording"}}
}}

Rules:
- List completed task IDs in "completed"
- For tasks you couldn't finish, put them in "partial" with the reason
- Record any discovered values (ports, URLs, paths, credentials) in "values"
- Add implementation notes worth preserving in "notes"
- Read files from disk to get exact values — do not rely on memory
- Only valid task IDs from the list above are accepted
"""

VERIFIER_PROMPT = """\
You are a verifier for an SOP orchestrator. A worker just completed one or \
more steps. Your job is to determine if the actual work was done successfully.

SOP source: {sop_source}

You have tool access. Spot-check the worker's claims: read files it says it \
created or modified, verify values it reports, check that builds or tests pass \
if relevant. Do NOT re-do the work — just verify it exists and looks right.

Be skeptical but efficient — check 2-3 key claims, not every line.
Do NOT modify the SOP file — your role is read-only verification.

Tasks that need verification:
{tasks_to_verify}

Worker report:
{report}

Respond with ONLY valid JSON:
{{"completed": true, "evidence": "what you checked and why you believe it's done"}}
or
{{"completed": false, "evidence": "what's missing or wrong"}}
"""

UPDATER_PROMPT = """\
You are an SOP updater. One or more steps were just completed. Update the SOP \
to reflect this.

The SOP lives at: {sop_location}

1. Read the document at that location.
2. Mark each completed step below as done (use whatever completion marker the \
document already uses — e.g. checkboxes, strikethrough, status labels).
3. Add the worker's summary as a brief note under each completed step.
4. Fill in any blank fields with the values reported by the worker.
5. Do NOT change any other steps or content.
6. Make surgical edits — do not rewrite the whole document.

Steps completed:
{completed_summary}

After updating, respond with ONLY: {{"updated": true}}
"""


def ts():
    return datetime.now().strftime("[%Y/%m/%d %H:%M:%S]")


class Logger:
    def __init__(self, directory: str):
        self.path = Path(directory) / "orchestrator.log"
        self.file = open(self.path, "w")

    def log(self, role: str, direction: str, content: str):
        ts = datetime.now().isoformat()
        self.file.write(f"\n{'='*60}\n")
        self.file.write(f"[{ts}] {role} {direction}\n")
        self.file.write(f"{'='*60}\n")
        self.file.write(content + "\n")
        self.file.flush()

    def close(self):
        self.file.close()


def is_url(s):
    return s.startswith("http://") or s.startswith("https://")


def generate_unified_diff(old_content, new_content, path):
    old_lines = old_content.splitlines(keepends=True)
    new_lines = new_content.splitlines(keepends=True)
    return ''.join(difflib.unified_diff(old_lines, new_lines, fromfile=path, tofile=path))


_last_ctx = [0]
_last_ctx_role = [None]

def parse_stream_json_line(line, role):
    try:
        msg = json.loads(line)
    except json.JSONDecodeError:
        return None

    if msg.get("type") != "assistant":
        return None

    message = msg.get("message", {})
    content = message.get("content", [])
    parts = []
    for block in content:
        if block.get("type") == "tool_use":
            name = block.get("name", "?")
            inp = block.get("input", {})
            if name in ("Read", "Edit", "Write"):
                key_arg = inp.get("file_path", "")
            elif name == "Bash":
                key_arg = inp.get("command", "")[:80]
            elif name in ("Grep", "Glob"):
                key_arg = inp.get("pattern", "")
            elif name == "Task":
                key_arg = inp.get("description", "")
            else:
                key_arg = ""
            parts.append(f"{ts()} [{role}] {name} {key_arg}")
        elif block.get("type") == "text":
            text = block.get("text", "").strip()
            if text:
                for tline in text.split('\n'):
                    tline = tline.strip()
                    if tline:
                        parts.append(f'{ts()} [{role}] "{tline}"')
    usage = message.get("usage", {})
    ctx = (usage.get("input_tokens", 0)
           + usage.get("cache_creation_input_tokens", 0)
           + usage.get("cache_read_input_tokens", 0))
    if ctx:
        pct = 100 * (1 - ctx / 200_000)
        parts.append(f"{ts()} [{role}] {ctx // 1000}K/200K tokens ({pct:.0f}% left)")
        if _last_ctx_role[0] != role:
            _last_ctx[0] = 0
            _last_ctx_role[0] = role
        if _last_ctx[0] and ctx < _last_ctx[0] * 0.8:
            parts.append(f"{ts()} [{role}] ⚠ context compacted: {_last_ctx[0] // 1000}K → {ctx // 1000}K")
        _last_ctx[0] = ctx

    return '\n'.join(parts) if parts else None


def extract_result_text(line):
    try:
        msg = json.loads(line)
        if msg.get("type") == "result":
            return msg.get("result", "")
    except json.JSONDecodeError:
        pass
    return None


def claude_call(prompt, verbose=False, timeout=None, allow_permissions=False,
                session_id=None, resume_id=None, logger=None, role="claude",
                model=None, effort=None):
    cmd = ["claude", "-p"]
    if session_id:
        cmd += ["--session-id", session_id]
    if resume_id:
        cmd += ["--resume", resume_id]
    if model:
        cmd += ["--model", model]
    if effort:
        cmd += ["--effort", effort]
    if allow_permissions:
        cmd.append("--dangerously-skip-permissions")

    stream_json = verbose
    if stream_json:
        cmd += ["--output-format", "stream-json"]

    env = {**os.environ}
    env.pop("CLAUDECODE", None)

    if logger:
        logger.log(role, "PROMPT", prompt)

    if verbose:
        proc = subprocess.Popen(
            cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT, text=True, env=env,
        )
        proc.stdin.write(prompt)
        proc.stdin.close()
        result_text = ""
        while True:
            line = proc.stdout.readline()
            if not line:
                break
            if logger:
                logger.log(role, "STREAM", line.rstrip())

            rt = extract_result_text(line)
            if rt is not None:
                result_text = rt
                continue
            display = parse_stream_json_line(line, role)
            if display:
                print(display)
                sys.stdout.flush()

        proc.wait(timeout=timeout)

        if logger:
            logger.log(role, "RESPONSE", result_text)

        return result_text, proc.returncode
    else:
        try:
            result = subprocess.run(
                cmd, input=prompt, capture_output=True, text=True,
                timeout=timeout, env=env,
            )
            if logger:
                logger.log(role, "RESPONSE", result.stdout)
            return result.stdout, result.returncode
        except subprocess.TimeoutExpired:
            print(f"\n{ts()} [orchestrator] Step timed out after {timeout}s", file=sys.stderr)
            return "", 1


def parse_json_response(text):
    text = text.strip()
    if text.startswith("```"):
        text = text.split("\n", 1)[1] if "\n" in text else text[3:]
        if text.endswith("```"):
            text = text[:-3]
        text = text.strip()
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        return None


# --- Task tree operations ---

def iter_leaves(tasks: list[TaskNode]) -> list[TaskNode]:
    result = []
    for task in tasks:
        children = task.get("children", [])
        if children:
            result.extend(iter_leaves(children))
        else:
            result.append(task)
    return result


def find_task_by_id(tasks: list[TaskNode], task_id: str) -> TaskNode | None:
    for task in tasks:
        if task.get("id") == task_id:
            return task
        children = task.get("children", [])
        if children:
            found = find_task_by_id(children, task_id)
            if found:
                return found
    return None


def plan_next_batch(tasks: list[TaskNode], batch_size: int = 5) -> PlanResult:
    leaves = iter_leaves(tasks)
    undone = [t for t in leaves if not t.get("done", False)]

    if not undone:
        return {"action": "done"}

    batch: list[TaskNode] = []
    for task in undone:
        if task.get("attempt_count", 0) >= 3:
            return {
                "action": "blocked",
                "reason": f"Task {task['id']} failed 3 times: {'; '.join(task.get('notes', [])[-2:])}",
            }
        if task.get("review_gate", False):
            if not batch:
                return {"action": "review", "tasks": [task]}
            break
        batch.append(task)
        if len(batch) >= batch_size:
            break

    if not batch:
        return {"action": "done"}

    return {"action": "next", "tasks": batch}


def format_tasks_for_worker(tasks: list[TaskNode]) -> str:
    parts = []
    for task in tasks:
        header = f"[{task['id']}] {task['description']}"
        parts.append(header)
        parts.append(task.get("content", ""))
        notes = task.get("notes", [])
        if notes:
            parts.append("Previous attempt notes:")
            for note in notes:
                parts.append(f"  - {note}")
        values = task.get("values", {})
        if values:
            parts.append("Known values:")
            for k, v in values.items():
                parts.append(f"  {k}: {v}")
        parts.append("")
    return "\n".join(parts)


def update_sidecar(state: SidecarState, report: dict) -> list[str]:
    """Phase 1 updater: code updates JSON sidecar. Returns list of newly completed task IDs."""
    completed_ids = report.get("completed", [])
    partial = report.get("partial", {})
    values = report.get("values", {})
    notes = report.get("notes", {})
    newly_completed = []

    for task_id in completed_ids:
        task = find_task_by_id(state["tasks"], task_id)
        if task and not task.get("done", False):
            task["done"] = True
            newly_completed.append(task_id)

    for task_id, reason in partial.items():
        task = find_task_by_id(state["tasks"], task_id)
        if task:
            task["attempt_count"] = task.get("attempt_count", 0) + 1
            task_notes = task.get("notes", [])
            task_notes.append(reason)
            task["notes"] = task_notes

    for key, value in values.items():
        task_id = key.rsplit(".", 1)[0] if "." in key else key
        task = find_task_by_id(state["tasks"], task_id)
        if task:
            task_values = task.get("values", {})
            task_values[key] = value
            task["values"] = task_values

    for task_id, note in notes.items():
        task = find_task_by_id(state["tasks"], task_id)
        if task:
            task_notes = task.get("notes", [])
            task_notes.append(note)
            task["notes"] = task_notes

    return newly_completed


def mark_verification_failed(state: SidecarState, failed_ids: list[str], evidence: str):
    for task_id in failed_ids:
        task = find_task_by_id(state["tasks"], task_id)
        if task:
            task["done"] = False
            task["attempt_count"] = task.get("attempt_count", 0) + 1
            task_notes = task.get("notes", [])
            task_notes.append(f"Verification failed: {evidence}")
            task["notes"] = task_notes


def build_completed_summary(state: SidecarState, completed_ids: list[str]) -> str:
    parts = []
    for task_id in completed_ids:
        task = find_task_by_id(state["tasks"], task_id)
        if task:
            parts.append(f"[{task_id}] {task['description']}")
            notes = task.get("notes", [])
            if notes:
                parts.append(f"  Notes: {notes[-1]}")
            values = task.get("values", {})
            if values:
                for k, v in values.items():
                    parts.append(f"  {k}: {v}")
    return "\n".join(parts)


def get_tasks_needing_verification(batch: list[TaskNode]) -> list[TaskNode]:
    return [t for t in batch if t.get("verify") == "independent"]


# --- Main orchestrator ---

def _number_lines(text: str) -> str:
    return '\n'.join(f"  {i+1}: {line}" for i, line in enumerate(text.splitlines()))


def populate_content(tasks: list[TaskNode], sop_lines: list[str]):
    for task in tasks:
        ls = task.get("line_start")
        le = task.get("line_end")
        if ls is not None and le is not None:
            task["content"] = '\n'.join(sop_lines[ls - 1:le])
        children = task.get("children", [])
        if children:
            populate_content(children, sop_lines)


def run_outliner(source: str, verbose: bool, logger: Logger) -> list[TaskNode]:
    url_mode = is_url(source)
    sop_inline = ""
    sop_lines: list[str] = []

    if url_mode:
        content_field = _CONTENT_FIELD_URL
        content_rules = _CONTENT_RULES_URL
    else:
        content_field = _CONTENT_FIELD_LOCAL
        content_rules = _CONTENT_RULES_LOCAL
        try:
            sop_content = Path(source).read_text()
            sop_lines = sop_content.splitlines()
            sop_inline = (
                "The SOP content is below (numbered for line references) — do not read it again.\n"
                "---\n" + _number_lines(sop_content) + "\n---\n"
            )
        except Exception:
            sop_inline = ""

    prompt = _OUTLINER_COMMON.format(
        sop_location=source, sop_inline=sop_inline,
        content_field=content_field, content_rules=content_rules,
    )

    raw, _ = claude_call(
        prompt, verbose=verbose,
        logger=logger, role="outliner",
    )
    tasks = parse_json_response(raw)

    if tasks is None:
        retry_prompt = prompt + "\n\nYour previous response was not valid JSON. Respond with ONLY a valid JSON array."
        raw, _ = claude_call(
            retry_prompt, verbose=verbose,
            logger=logger, role="outliner-retry",
        )
        tasks = parse_json_response(raw)

    if tasks is None:
        print(f"{ts()} [orchestrator] Outliner failed to produce valid JSON.", file=sys.stderr)
        sys.exit(EXIT_ERROR)

    if isinstance(tasks, dict) and "tasks" in tasks:
        tasks = tasks["tasks"]

    if not isinstance(tasks, list):
        print(f"{ts()} [orchestrator] Outliner returned non-array JSON.", file=sys.stderr)
        sys.exit(EXIT_ERROR)

    if not url_mode and sop_lines:
        populate_content(tasks, sop_lines)

    return tasks


def run(args):
    source = args.sop_file
    url_mode = is_url(source)

    if not url_mode:
        p = Path(source)
        if not p.exists():
            print(f"{ts()} [orchestrator] SOP file not found: {source}", file=sys.stderr)
            sys.exit(EXIT_ERROR)

    stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    run_dir = Path(f".sop.{stamp}")
    run_dir.mkdir(exist_ok=True)

    logger = Logger(str(run_dir))
    print(f"{ts()} [orchestrator] Run directory: {run_dir}")
    print(f"{ts()} [orchestrator] Log file: {logger.path}")

    # --- Outliner ---
    print(f"\n{'='*60}")
    print(f"{ts()} [orchestrator] Running outliner (Opus)...")
    print(f"{'='*60}")

    tasks = run_outliner(source, args.verbose, logger)
    state: SidecarState = {
        "sop_source": source,
        "started_at": datetime.now().isoformat(),
        "tasks": tasks,
    }
    state_path = run_dir / "state.json"
    state_path.write_text(json.dumps(state, indent=2))

    leaves = iter_leaves(tasks)
    leaf_count = len(leaves)
    done_count = len([t for t in leaves if t.get("done", False)])
    print(f"{ts()} [orchestrator] Task tree: {leaf_count} leaf tasks, {done_count} already done")

    # --- Main loop ---
    iteration = 0
    consecutive_failures = 0
    last_known_mtime = os.path.getmtime(source) if not url_mode else 0

    while iteration < args.max_iterations:
        iteration += 1

        # Drift check (external modification only, not our own updater writes)
        if not url_mode:
            try:
                mtime = os.path.getmtime(source)
                if mtime > last_known_mtime:
                    print(f"{ts()} [orchestrator] ⚠ SOP file modified externally — results may be stale")
                    last_known_mtime = mtime
            except Exception:
                pass

        print(f"\n{'='*60}")
        print(f"{ts()} [orchestrator] Iteration {iteration}")
        print(f"{'='*60}")

        plan = plan_next_batch(state["tasks"], batch_size=args.batch_size)
        action = plan["action"]
        print(f"{ts()} [orchestrator] Plan: {action}")
        logger.log("planner", "DECISION", json.dumps(plan, indent=2, default=str))

        if action == "done":
            print(f"\n{ts()} [orchestrator] SOP complete!")
            logger.close()
            sys.exit(EXIT_DONE)

        if action == "blocked":
            print(f"\n{ts()} [orchestrator] Blocked: {plan.get('reason', '(no reason)')}")
            logger.close()
            sys.exit(EXIT_BLOCKED)

        if action == "review":
            review_tasks = plan.get("tasks", [])
            desc = review_tasks[0]["description"] if review_tasks else "(unknown)"
            print(f"\n{ts()} [orchestrator] Review gate: {desc}")
            if args.no_review:
                print(f"{ts()} [orchestrator] --no-review: auto-approving")
            else:
                try:
                    input("[orchestrator] Press Enter to continue after review (Ctrl-C to abort)... ")
                except (KeyboardInterrupt, EOFError):
                    print()
                    logger.close()
                    sys.exit(EXIT_REVIEW)
            for rt in review_tasks:
                rt["done"] = True
            state_path.write_text(json.dumps(state, indent=2))
            # Update SOP for review gate
            completed_summary = build_completed_summary(state, [rt["id"] for rt in review_tasks])
            review_note = "Auto-approved (--no-review)" if args.no_review else "User reviewed and approved"
            updater_prompt = UPDATER_PROMPT.format(
                sop_location=source,
                completed_summary=f"{review_note}\n{completed_summary}",
            )
            claude_call(updater_prompt, verbose=args.verbose,
                        allow_permissions=True,
                        logger=logger, role="updater", model="haiku")
            print(f"{ts()} [orchestrator] SOP updated after review.")
            continue

        batch = plan["tasks"]
        batch_ids = [t["id"] for t in batch]
        n = len(batch)
        label = f"{n} task{'s' if n > 1 else ''}"
        print(f"{ts()} [orchestrator] Worker batch ({label}): {', '.join(batch_ids)}")

        # --- Worker ---
        report_file = tempfile.NamedTemporaryFile(
            suffix='.json', prefix='sop-report-', delete=False
        )
        report_path = report_file.name
        report_file.close()

        tasks_formatted = format_tasks_for_worker(batch)
        worker_prompt = WORKER_PROMPT.format(
            sop_source=source,
            tasks_formatted=tasks_formatted,
            report_path=report_path,
        )
        session_id = str(uuid.uuid4())
        worker_result, worker_rc = claude_call(
            worker_prompt, verbose=True, timeout=args.timeout,
            allow_permissions=args.allow_permissions, session_id=session_id,
            logger=logger, role="worker",
        )

        # --- Read worker report ---
        report = None
        try:
            report_text = Path(report_path).read_text()
            logger.log("worker", "REPORT", report_text)
            report = json.loads(report_text)
        except FileNotFoundError:
            print(f"{ts()} [orchestrator] Worker did not write report file.", file=sys.stderr)
            logger.log("worker", "REPORT", "(missing)")
        except json.JSONDecodeError:
            print(f"{ts()} [orchestrator] Worker report is not valid JSON.", file=sys.stderr)
            logger.log("worker", "REPORT", f"(invalid JSON) {report_text}")
        finally:
            try:
                os.unlink(report_path)
            except OSError:
                pass

        if report is None:
            consecutive_failures += 1
            if consecutive_failures >= 3:
                print(f"{ts()} [orchestrator] 3 consecutive failures — blocking.", file=sys.stderr)
                logger.close()
                sys.exit(EXIT_BLOCKED)
            for task in batch:
                task["attempt_count"] = task.get("attempt_count", 0) + 1
                task_notes = task.get("notes", [])
                task_notes.append("Worker failed to write a valid JSON report.")
                task["notes"] = task_notes
            state_path.write_text(json.dumps(state, indent=2))
            continue

        # Scope report to batch IDs only
        valid_ids = {t["id"] for t in batch}
        report["completed"] = [tid for tid in report.get("completed", []) if tid in valid_ids]
        report["partial"] = {tid: r for tid, r in report.get("partial", {}).items() if tid in valid_ids}
        value_task_ids = {k.rsplit(".", 1)[0] if "." in k else k for k in report.get("values", {}).keys()}
        unknown_ids = (set(report.get("notes", {}).keys()) | value_task_ids) - valid_ids
        if unknown_ids:
            print(f"{ts()} [orchestrator] Warning: worker reported unknown task IDs: {unknown_ids}")

        report_str = json.dumps(report, indent=2)

        # --- Verifier (conditional) ---
        failed_verification_ids: set[str] = set()
        verify_tasks = get_tasks_needing_verification(batch)
        if verify_tasks:
            completed_ids = set(report.get("completed", []))
            verify_tasks_in_completed = [t for t in verify_tasks if t["id"] in completed_ids]

            if verify_tasks_in_completed:
                print(f"{ts()} [orchestrator] Running verifier for {len(verify_tasks_in_completed)} task(s)...")
                tasks_to_verify = "\n".join(
                    f"[{t['id']}] {t['description']}" for t in verify_tasks_in_completed
                )
                verifier_prompt = VERIFIER_PROMPT.format(
                    sop_source=source,
                    tasks_to_verify=tasks_to_verify,
                    report=report_str,
                )
                verifier_output, _ = claude_call(
                    verifier_prompt, verbose=args.verbose,
                    logger=logger, role="verifier", model="haiku",
                )
                verdict = parse_json_response(verifier_output)
                if verdict is None:
                    verifier_output, _ = claude_call(
                        verifier_prompt + "\n\nRespond with ONLY valid JSON.",
                        verbose=args.verbose,
                        logger=logger, role="verifier-retry", model="haiku",
                    )
                    verdict = parse_json_response(verifier_output)

                if verdict and not verdict.get("completed", False):
                    evidence = verdict.get("evidence", "(no evidence)")
                    print(f"{ts()} [orchestrator] Verification failed: {evidence}")
                    failed_verification_ids = {t["id"] for t in verify_tasks_in_completed}
                    report["completed"] = [tid for tid in report["completed"]
                                           if tid not in failed_verification_ids]
                elif verdict:
                    print(f"{ts()} [orchestrator] Verified: {verdict.get('evidence', 'ok')}")

                logger.log("verifier", "DECISION",
                           json.dumps(verdict, indent=2) if verdict else "(parse failed)")

        # --- Phase 1: Code updates JSON sidecar (always) ---
        newly_completed = update_sidecar(state, report)
        if failed_verification_ids:
            mark_verification_failed(state, list(failed_verification_ids),
                                     verdict.get("evidence", "(no evidence)"))
        state_path.write_text(json.dumps(state, indent=2))

        done_count = len([t for t in iter_leaves(state["tasks"]) if t.get("done", False)])
        total_count = len(iter_leaves(state["tasks"]))
        print(f"{ts()} [orchestrator] Progress: {done_count}/{total_count} tasks done")

        consecutive_failures = 0

        # --- Phase 2: LLM merges into SOP (only if tasks completed) ---
        if newly_completed:
            print(f"{ts()} [orchestrator] Updating SOP ({len(newly_completed)} tasks)...")
            completed_summary = build_completed_summary(state, newly_completed)

            if url_mode:
                updater_prompt = UPDATER_PROMPT.format(
                    sop_location=source,
                    completed_summary=completed_summary,
                )
                claude_call(updater_prompt, verbose=args.verbose,
                            allow_permissions=True,
                            logger=logger, role="updater", model="haiku")
                print(f"{ts()} [orchestrator] SOP updated at remote location.")
            else:
                sop_before = Path(source).read_text()
                updater_prompt = UPDATER_PROMPT.format(
                    sop_location=source,
                    completed_summary=completed_summary,
                )
                claude_call(updater_prompt, verbose=args.verbose,
                            allow_permissions=True,
                            logger=logger, role="updater", model="haiku")
                sop_after = Path(source).read_text()
                diff = generate_unified_diff(sop_before, sop_after, source)
                if diff:
                    print(f"{ts()} [orchestrator] Updater diff:\n{diff}")
                    logger.log("updater", "DIFF", diff)
                else:
                    print(f"{ts()} [orchestrator] Warning: updater made no changes — retrying...")
                    retry_prompt = (
                        updater_prompt
                        + "\n\nCRITICAL: The previous update attempt made NO changes to the file. "
                        "You MUST mark the completed steps as done and add notes. "
                        "Read the file, find the steps, and use the Edit tool to update them."
                    )
                    claude_call(retry_prompt, verbose=args.verbose,
                                allow_permissions=True,
                                logger=logger, role="updater-retry", model="haiku")
                    sop_after_retry = Path(source).read_text()
                    diff_retry = generate_unified_diff(sop_before, sop_after_retry, source)
                    if diff_retry:
                        print(f"{ts()} [orchestrator] Retry diff:\n{diff_retry}")
                        logger.log("updater", "RETRY_DIFF", diff_retry)
                    else:
                        print(f"{ts()} [orchestrator] Error: updater retry also made no changes", file=sys.stderr)
                        logger.log("updater", "NO_DIFF", "Both attempts failed to update SOP")

                print(f"{ts()} [orchestrator] SOP updated on disk.")
                last_known_mtime = os.path.getmtime(source)

        if args.one:
            print(f"\n{ts()} [orchestrator] --one flag set. Exiting after one iteration.")
            logger.close()
            sys.exit(EXIT_DONE)

    print(f"\n{ts()} [orchestrator] Max iterations ({args.max_iterations}) reached.", file=sys.stderr)
    logger.close()
    sys.exit(EXIT_BLOCKED)


def _print_task_tree(tasks: list[TaskNode], indent: int = 0):
    for task in tasks:
        status = "✓" if task.get("done", False) else "○"
        gate = " [REVIEW GATE]" if task.get("review_gate", False) else ""
        verify = f" ({task.get('verify', 'none')})" if task.get("verify") else ""
        prefix = "  " * indent
        print(f"{prefix}{status} [{task['id']}] {task['description']}{verify}{gate}")
        children = task.get("children", [])
        if children:
            _print_task_tree(children, indent + 1)


def main():
    parser = argparse.ArgumentParser(description="SOP Orchestrator")
    parser.add_argument("sop_file", help="Path to SOP file or URL (Google Doc, etc.)")
    parser.add_argument("--one", action="store_true", help="Run only one iteration")
    parser.add_argument("--quiet", action="store_true", help="Suppress child Claude output")
    parser.add_argument("--dry-run", action="store_true", help="Run outliner, print task tree, exit")
    parser.add_argument("--timeout", type=int, default=None, help="Per-step timeout in seconds")
    parser.add_argument("--max-iterations", type=int, default=50, help="Max loop iterations")
    parser.add_argument("--batch-size", type=int, default=5, help="Max tasks per worker batch (cap 10)")
    parser.add_argument("--allow-permissions", action="store_true", default=True,
                        help="Pass --dangerously-skip-permissions to workers")
    parser.add_argument("--no-review", action="store_true",
                        help="Auto-approve review gates without prompting")

    args = parser.parse_args()
    args.verbose = not args.quiet
    args.batch_size = min(args.batch_size, 10)

    if args.dry_run:
        source = args.sop_file
        url_mode = is_url(source)
        if not url_mode and not Path(source).exists():
            print(f"{ts()} [orchestrator] SOP file not found: {source}", file=sys.stderr)
            sys.exit(EXIT_ERROR)

        stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        run_dir = Path(f".sop.{stamp}")
        run_dir.mkdir(exist_ok=True)
        logger = Logger(str(run_dir))

        print(f"{ts()} [dry-run] Running outliner...\n")
        tasks = run_outliner(source, args.verbose, logger)
        print(f"\n{ts()} [dry-run] Task tree:")
        _print_task_tree(tasks)
        leaf_count = len(iter_leaves(tasks))
        done_count = len([t for t in iter_leaves(tasks) if t.get("done", False)])
        print(f"\n{ts()} [dry-run] {leaf_count} leaf tasks, {done_count} already done")
        logger.close()
        sys.exit(EXIT_DRY_RUN)

    run(args)


if __name__ == "__main__":
    main()