Spaces:

rohitsar567
/

InsuranceBot

Sleeping

App Files Files Community

InsuranceBot / .github /workflows /eval.yml

rohitsar567

Deploy v1 — single-Docker FastAPI + Next.js + RAG + voice + faithfulness

7863797 verified about 2 months ago

Raw

History Blame Contribute Delete

2.49 kB

	name: Gold-QA eval gate

	on:
	pull_request:
	branches: [main]
	push:
	branches: [main]
	workflow_dispatch:

	jobs:
	eval:
	runs-on: ubuntu-latest
	timeout-minutes: 25
	steps:
	- uses: actions/checkout@v4
	with:
	lfs: true

	- name: Set up Python 3.11
	uses: actions/setup-python@v5
	with:
	python-version: "3.11"
	cache: "pip"

	- name: Cache HuggingFace model
	uses: actions/cache@v4
	with:
	path: ~/.cache/huggingface
	key: hf-bge-small-${{ runner.os }}

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements.txt

	- name: Pre-download embedding model
	run: python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-small-en-v1.5')"

	- name: Rebuild Chroma if missing
	run: \|
	if [ ! -f rag/vectors/chroma.sqlite3 ]; then
	python -m rag.ingest 2>&1 \| tail -30
	fi

	- name: Run eval on first 25 gold pairs
	env:
	SARVAM_API_KEY: ${{ secrets.SARVAM_API_KEY }}
	VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
	GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
	OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
	run: python -m eval.run --limit 25 2>&1 \| tee eval/ci_output.log

	- name: Enforce accuracy floor
	run: \|
	python <<'PY'
	import json, sys
	d = json.load(open("eval/results.json"))
	s = d.get("summary", {})
	factual = s.get("factual_accuracy", 0.0)
	citation = s.get("citation_accuracy", 0.0)
	print(f"Factual: {factual*100:.1f}%")
	print(f"Citation: {citation*100:.1f}%")
	floor_factual = 0.65
	floor_citation = 0.55
	if factual < floor_factual:
	print(f"FAIL — factual below floor {floor_factual*100:.0f}%")
	sys.exit(1)
	if citation < floor_citation:
	print(f"FAIL — citation below floor {floor_citation*100:.0f}%")
	sys.exit(1)
	print("PASS")
	PY

	- name: Upload eval results
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: eval-results-${{ github.run_number }}
	path: \|
	eval/results.md
	eval/results.json
	eval/ci_output.log
	retention-days: 30