name: CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  test:
    name: Tests + sample pipeline
    runs-on: ubuntu-latest

    env:
      # Keep transformers torch-only; never pull TensorFlow/Flax into CI.
      USE_TF: "0"
      USE_FLAX: "0"
      TRANSFORMERS_NO_TF: "1"
      # Cache HF downloads between runs.
      HF_HOME: ${{ github.workspace }}/.hf_cache
      TRANSFORMERS_CACHE: ${{ github.workspace }}/.hf_cache
      SENTENCE_TRANSFORMERS_HOME: ${{ github.workspace }}/.hf_cache

    steps:
      - uses: actions/checkout@v4

      - name: Set up Python 3.10
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"
          cache: pip
          cache-dependency-path: |
            requirements.txt
            pyproject.toml

      - name: Cache Hugging Face models
        uses: actions/cache@v4
        with:
          path: ${{ env.HF_HOME }}
          key: hf-${{ runner.os }}-minilm-deberta-v1
          restore-keys: |
            hf-${{ runner.os }}-

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          # Install torch CPU wheel first to avoid pulling the huge CUDA build.
          pip install --index-url https://download.pytorch.org/whl/cpu torch
          pip install -r requirements.txt
          pip install -e .

      - name: Run unit tests
        run: pytest -q

      - name: Run pipeline on sample data
        run: |
          python -m citeguard.cli evaluate \
            --input data/samples/custom_rag_examples.jsonl \
            --config configs/default.yaml \
            --output outputs/ \
            --no-progress

      - name: Generate figures
        run: python scripts/make_figures.py

      - name: Verify expected outputs exist
        run: |
          set -e
          test -s outputs/tables/claim_eval.csv
          test -s outputs/tables/example_summary.csv
          test -s outputs/tables/aggregate_metrics.json
          test -s outputs/evidence_cards/evidence_cards.jsonl
          for fig in fig_error_distribution fig_confusion_matrix \
                     fig_baseline_comparison fig_ablation \
                     fig_retrieval_vs_support fig_runtime; do
            test -s "outputs/figures/${fig}.png"
          done
          echo "All expected outputs present."

      - name: Upload outputs artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: citeguard-outputs-py310
          path: outputs/
          retention-days: 14