#!/usr/bin/env bash
# verify.sh — smoke-test the PR Review OpenEnv project without Docker or live LLM.
set -euo pipefail

cd "$(dirname "$0")"

PASS=0
FAIL=0

_check() {
    local label=$1; shift
    if "$@" 2>/dev/null; then
        printf "  PASS  %s\n" "$label"
        PASS=$((PASS + 1))
    else
        printf "  FAIL  %s\n" "$label"
        FAIL=$((FAIL + 1))
    fi
}

echo "=== PR Review OpenEnv — verify.sh ==="
echo ""

# ---------------------------------------------------------------------------
# 1. Required files
# ---------------------------------------------------------------------------
echo "[1] Required files"
for f in \
    openenv.yaml \
    Dockerfile \
    requirements.txt \
    README.md \
    src/__init__.py \
    src/api.py \
    src/env.py \
    src/models.py \
    src/grader.py \
    inference.py; do
    _check "$f" test -f "$f"
done

# ---------------------------------------------------------------------------
# 2. Scenario JSON files
# ---------------------------------------------------------------------------
echo ""
echo "[2] Scenario JSON files"
_check "at least 15 scenario files" bash -c \
    '[ "$(ls data/scenarios/*.json 2>/dev/null | wc -l)" -ge 15 ]'
_check "all JSON files parse cleanly" venv/bin/python - <<'EOF'
import json, glob, sys
files = glob.glob("data/scenarios/*.json")
for p in files:
    with open(p) as f:
        d = json.load(f)
    for key in ("pr_title", "pr_description", "diff", "ground_truth"):
        assert key in d, f"Missing '{key}' in {p}"
    gt = d["ground_truth"]
    assert "bugs" in gt and "should_approve" in gt, f"Bad ground_truth in {p}"
print(f"  {len(files)} files OK")
EOF
_check "at least one should_approve=true scenario" venv/bin/python - <<'EOF'
import json, glob
files = glob.glob("data/scenarios/*.json")
clean = [p for p in files if json.load(open(p))["ground_truth"]["should_approve"]]
assert len(clean) >= 1, "No approvable scenario found"
EOF

# ---------------------------------------------------------------------------
# 3. Python imports (package style — no PYTHONPATH tricks)
# ---------------------------------------------------------------------------
echo ""
echo "[3] Python imports"
_check "src.grader imports" venv/bin/python -c "from src.grader import grade"
_check "src.models imports" venv/bin/python -c \
    "from src.models import PRReviewAction, PRReviewObservation, PRReviewReward"
_check "src.env imports"    venv/bin/python -c "from src.env import PRReviewEnv"
_check "src.api imports"    venv/bin/python -c "import src.api"

# ---------------------------------------------------------------------------
# 4. Grader unit tests
# ---------------------------------------------------------------------------
echo ""
echo "[4] Grader"
_check "perfect score = 1.0" venv/bin/python - <<'EOF'
from src.grader import grade
gt = {"bugs": [["off-by-one", "IndexError"], ["inner loop"]], "should_approve": False}
r = grade(gt, ["off-by-one in inner loop causes IndexError"], "reject")
assert r["score"] == 1.0, r
EOF
_check "zero score = 0.0" venv/bin/python - <<'EOF'
from src.grader import grade
gt = {"bugs": [["null"]], "should_approve": False}
r = grade(gt, [], "approve")
assert r["score"] == 0.0, r
EOF
_check "clean PR approved = 1.0" venv/bin/python - <<'EOF'
from src.grader import grade
r = grade({"bugs": [], "should_approve": True}, [], "approve")
assert r["score"] == 1.0, r
EOF
_check "no false positive: null not in nullable" venv/bin/python - <<'EOF'
from src.grader import grade
r = grade({"bugs": [["null"]], "should_approve": False}, ["nullable field"], "reject")
assert r["bugs_found"] == 0, r
EOF
_check "false rejection penalty applied" venv/bin/python - <<'EOF'
from src.grader import grade
r = grade({"bugs": [], "should_approve": True}, [], "reject")
assert r["false_rejection_penalty"] == -0.2, r
assert r["score"] < 1.0, r
EOF

# ---------------------------------------------------------------------------
# 5. PRReviewEnv unit tests
# ---------------------------------------------------------------------------
echo ""
echo "[5] PRReviewEnv"
_check "reset returns valid observation" venv/bin/python - <<'EOF'
from src.env import PRReviewEnv
e = PRReviewEnv(task="easy")
obs = e.reset()
assert obs.diff and obs.pr_title
assert obs.step_count == 0 and not obs.done
EOF
_check "comment step: not done, step_count incremented" venv/bin/python - <<'EOF'
from src.env import PRReviewEnv
from src.models import PRReviewAction
e = PRReviewEnv(task="easy"); e.reset()
obs, rew, done, _ = e.step(PRReviewAction(action_type="comment", body="test"))
assert not done and obs.step_count == 1 and -1.0 <= rew.value <= 1.0
EOF
_check "approve step: done=True, score set" venv/bin/python - <<'EOF'
from src.env import PRReviewEnv
from src.models import PRReviewAction
e = PRReviewEnv(task="easy"); e.reset()
_, rew, done, info = e.step(PRReviewAction(action_type="approve"))
assert done and -0.3 <= rew.value <= 0.3 and "score" in info
EOF
_check "request_changes step: done=True" venv/bin/python - <<'EOF'
from src.env import PRReviewEnv
from src.models import PRReviewAction
e = PRReviewEnv(task="medium"); e.reset()
_, _, done, _ = e.step(PRReviewAction(action_type="request_changes"))
assert done
EOF
_check "invalid task raises ValueError" venv/bin/python - <<'EOF'
from src.env import PRReviewEnv
try:
    PRReviewEnv(task="impossible")
    assert False
except ValueError:
    pass
EOF
_check "all three task tiers have scenarios" venv/bin/python - <<'EOF'
from src.env import PRReviewEnv
for task in ("easy", "medium", "hard"):
    e = PRReviewEnv(task=task)
    obs = e.reset()
    assert obs.scenario_id, f"No scenario_id for task={task}"
EOF

# ---------------------------------------------------------------------------
# 6. FastAPI endpoints
# ---------------------------------------------------------------------------
echo ""
echo "[6] FastAPI endpoints"
_check "GET / returns ok" venv/bin/python - <<'EOF'
from fastapi.testclient import TestClient
import src.api as api
with TestClient(api.app) as c:
    r = c.get("/")
    assert r.json() == {"status": "ok"}
EOF
_check "POST /reset returns observation" venv/bin/python - <<'EOF'
from fastapi.testclient import TestClient
import src.api as api
with TestClient(api.app) as c:
    for task in ("easy", "medium", "hard"):
        r = c.post(f"/reset?task={task}")
        assert r.status_code == 200, r.text
        obs = r.json()
        assert "diff" in obs and "step_count" in obs and not obs["done"]
EOF
_check "POST /step comment keeps episode open" venv/bin/python - <<'EOF'
from fastapi.testclient import TestClient
import src.api as api
with TestClient(api.app) as c:
    c.post("/reset?task=easy")
    r = c.post("/step", json={"action_type": "comment", "body": "found a bug"})
    assert r.status_code == 200 and r.json()["done"] == False
EOF
_check "POST /step approve ends episode" venv/bin/python - <<'EOF'
from fastapi.testclient import TestClient
import src.api as api
with TestClient(api.app) as c:
    c.post("/reset?task=easy")
    r = c.post("/step", json={"action_type": "approve", "body": ""})
    assert r.status_code == 200 and r.json()["done"] == True
    assert -0.3 <= r.json()["reward"]["value"] <= 0.3
EOF
_check "GET /state returns state" venv/bin/python - <<'EOF'
from fastapi.testclient import TestClient
import src.api as api
with TestClient(api.app) as c:
    c.post("/reset?task=hard")
    r = c.get("/state")
    assert r.status_code == 200
    s = r.json()
    assert s["task"] == "hard" and s["done"] == False
EOF

# ---------------------------------------------------------------------------
# 7. openenv.yaml structure
# ---------------------------------------------------------------------------
echo ""
echo "[7] openenv.yaml"
_check "has name field"           grep -q "^name:" openenv.yaml
_check "has version field"        grep -q "^version:" openenv.yaml
_check "has easy task"            grep -q "id: easy" openenv.yaml
_check "has medium task"          grep -q "id: medium" openenv.yaml
_check "has hard task"            grep -q "id: hard" openenv.yaml
_check "has observation_space"    grep -q "^observation_space:" openenv.yaml
_check "has action_space"         grep -q "^action_space:" openenv.yaml
_check "has reward_range"         grep -q "^reward_range:" openenv.yaml

# ---------------------------------------------------------------------------
# 8. inference.py structure (via grep — avoids needing live env vars)
# ---------------------------------------------------------------------------
echo ""
echo "[8] inference.py structure"
_check "MODEL_NAME from os.environ" \
    grep -q 'MODEL_NAME.*=.*os\.environ\["MODEL_NAME"\]' inference.py
_check "HF_TOKEN from os.environ" \
    grep -q 'HF_TOKEN.*=.*os\.environ\["HF_TOKEN"\]' inference.py
_check "log_start defined" \
    grep -q "def log_start" inference.py
_check "log_step defined" \
    grep -q "def log_step" inference.py
_check "log_end defined" \
    grep -q "def log_end" inference.py
_check "runs easy task"   grep -q '"easy"'   inference.py
_check "runs medium task" grep -q '"medium"' inference.py
_check "runs hard task"   grep -q '"hard"'   inference.py

# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
echo ""
echo "======================================="
printf "  PASSED: %d\n" "$PASS"
printf "  FAILED: %d\n" "$FAIL"
echo "======================================="
[ "$FAIL" -eq 0 ]