name: CI – Docker Build, Server Tests, Inference Smoke Test

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

env:
  PYTHON_VERSION: "3.11"

jobs:
  # --------------------------------------------------------------------------
  # Job 1: Docker image builds successfully
  # --------------------------------------------------------------------------
  docker-build:
    name: Docker Build
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Build Docker image
        run: docker build -t data-cleaning-env .

      - name: Start container
        run: |
          docker run -d --name env-server -p 7860:7860 data-cleaning-env
          echo "Waiting for server startup..."
          for i in $(seq 1 30); do
            if curl -sf http://localhost:7860/health > /dev/null 2>&1; then
              echo "Server is up after ${i}s"
              break
            fi
            sleep 2
          done

      - name: Health check
        run: |
          STATUS=$(curl -sf http://localhost:7860/health | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
          echo "Health: $STATUS"
          [ "$STATUS" = "ok" ] || exit 1

      - name: Test /tasks returns 4 tasks
        run: |
          COUNT=$(curl -sf http://localhost:7860/tasks | python3 -c "import sys,json; print(len(json.load(sys.stdin)['tasks']))")
          echo "Task count: $COUNT"
          [ "$COUNT" = "4" ] || exit 1

      - name: Test /reset for each task
        run: |
          for task in easy medium hard expert; do
            RESP=$(curl -sf -X POST http://localhost:7860/reset \
              -H "Content-Type: application/json" -d "{\"task\":\"$task\"}")
            EID=$(echo "$RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['state']['episode_id'])")
            echo "$task: episode_id=$EID"
            [ -n "$EID" ] || exit 1
          done

      - name: Test /grader returns score in [0,1]
        run: |
          RESP=$(curl -sf -X POST http://localhost:7860/reset \
            -H "Content-Type: application/json" -d '{"task":"easy"}')
          EID=$(echo "$RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['state']['episode_id'])")
          GRADE=$(curl -sf -X POST http://localhost:7860/grader \
            -H "Content-Type: application/json" -d "{\"episode_id\":\"$EID\"}")
          echo "Grade response: $GRADE"
          python3 -c "
          import json, sys
          d = json.loads('$GRADE')
          assert 0.0 <= d['score'] <= 1.0, f'Score out of range: {d[\"score\"]}'
          assert 'breakdown' in d, 'Missing breakdown'
          for k in ['completeness', 'consistency', 'accuracy', 'format']:
              assert k in d['breakdown'], f'Missing {k} in breakdown'
          print('Grader: OK')
          "

      - name: Test /step with action
        run: |
          RESP=$(curl -sf -X POST http://localhost:7860/reset \
            -H "Content-Type: application/json" -d '{"task":"easy"}')
          EID=$(echo "$RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['state']['episode_id'])")
          COL=$(echo "$RESP" | python3 -c "import sys,json; print(json.load(sys.stdin)['observation']['columns'][0])")
          STEP=$(curl -sf -X POST http://localhost:7860/step \
            -H "Content-Type: application/json" \
            -d "{\"episode_id\":\"$EID\",\"action\":{\"action_type\":\"fill_missing\",\"column\":\"$COL\",\"strategy\":\"median\"}}")
          python3 -c "
          import json
          d = json.loads('$STEP')
          assert 'reward' in d, 'Missing reward'
          assert 'done' in d, 'Missing done'
          assert 'observation' in d, 'Missing observation'
          obs = d['observation']
          assert 'sample_rows' in obs, 'Missing sample_rows'
          assert 'action_history' in obs, 'Missing action_history'
          assert 'budget_remaining' in obs, 'Missing budget_remaining'
          print(f'Step: reward={d[\"reward\"]:.4f}, budget={obs[\"budget_remaining\"]}')
          "

      - name: Cleanup
        if: always()
        run: docker rm -f env-server 2>/dev/null || true

  # --------------------------------------------------------------------------
  # Job 2: Unit tests + openenv validate (no Docker needed)
  # --------------------------------------------------------------------------
  unit-tests:
    name: Unit Tests & Validation
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Install uv
        uses: astral-sh/setup-uv@v4

      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}

      - name: Install dependencies
        run: |
          uv venv .venv
          uv pip install --python .venv/bin/python -r requirements.txt
          uv pip install --python .venv/bin/python openenv-core pytest

      - name: Install package in dev mode
        run: |
          uv pip install --python .venv/bin/python -e .

      - name: Run pytest suite
        run: |
          .venv/bin/python -m pytest tests/ -v

      - name: Wheel install smoke test
        run: |
          uv pip install --python .venv/bin/python build
          .venv/bin/python -m build --wheel
          uv venv .venv-smoke
          uv pip install --python .venv-smoke/bin/python dist/*.whl
          .venv-smoke/bin/python -c "
          import data_cleaning_env
          from data_cleaning_env.models import ActionType, Observation, CleaningAction
          from data_cleaning_env.grader import compute_quality_score
          from data_cleaning_env.client import DataCleaningEnvClient
          actions = list(ActionType)
          assert len(actions) == 16, f'Expected 16 actions, got {len(actions)}'
          print(f'Wheel smoke test: OK ({len(actions)} actions)')
          "

      - name: openenv validate
        run: |
          .venv/bin/openenv validate

      - name: YAML validation
        run: |
          .venv/bin/python -c "
          import yaml
          with open('openenv.yaml') as f:
              data = yaml.safe_load(f)
          assert 'openenv' in data['tags']
          assert data['license'] == 'MIT'
          assert len(data['tasks']) == 4
          print('YAML: OK')
          "

  # --------------------------------------------------------------------------
  # Job 3: Inference smoke test (heuristic mode, no LLM credits needed)
  # --------------------------------------------------------------------------
  inference-smoke:
    name: Inference Smoke Test
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Install uv
        uses: astral-sh/setup-uv@v4

      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}

      - name: Install dependencies
        run: |
          uv venv .venv
          uv pip install --python .venv/bin/python -r requirements.txt
          uv pip install --python .venv/bin/python -e .

      - name: Start server in background
        run: |
          .venv/bin/uvicorn data_cleaning_env.server.app:app --host 0.0.0.0 --port 8000 &
          echo "Waiting for startup..."
          for i in $(seq 1 30); do
            if curl -sf http://localhost:8000/health > /dev/null 2>&1; then
              echo "Server up after ${i}s"
              break
            fi
            sleep 2
          done

      - name: Run inference (heuristic mode)
        run: |
          OUTPUT=$(.venv/bin/python inference.py --url http://localhost:8000 2>&1)
          echo "$OUTPUT"

          python3 -c "
          import re, sys

          output = '''$OUTPUT'''
          lines = [l for l in output.strip().split('\n') if l.startswith('[')]

          start_re = re.compile(r'^\[START\] task=\S+ env=\S+ model=\S+$')
          step_re = re.compile(r'^\[STEP\] step=\d+ action=.+ reward=-?\d+\.\d{2} done=(true|false) error=.+$')
          end_re = re.compile(r'^\[END\] success=(true|false) steps=\d+ score=\d+\.\d{3} rewards=[\d,.\-]+$')

          starts = [l for l in lines if start_re.match(l)]
          ends = [l for l in lines if end_re.match(l)]
          steps = [l for l in lines if step_re.match(l)]

          assert len(starts) == 4, f'Expected 4 START, got {len(starts)}: {starts}'
          assert len(ends) == 4, f'Expected 4 END, got {len(ends)}: {ends}'
          assert len(steps) > 0, 'No STEP lines found'

          print(f'Log format: {len(starts)} START, {len(steps)} STEP, {len(ends)} END -- COMPLIANT')
          "

      - name: Run baseline
        run: |
          RESP=$(curl -sf -X POST http://localhost:8000/baseline)
          echo "Baseline: $RESP"
          python3 -c "
          import json
          d = json.loads('$RESP')
          scores = d['baseline_scores']
          assert len(scores) == 4, f'Expected 4 tasks, got {len(scores)}'
          for task, score in scores.items():
              assert 0.0 <= score <= 1.0, f'{task} score out of range: {score}'
          print('Baseline: OK')
          "