Spaces:

rohitsar567
/

InsuranceBot

Sleeping

App Files Files Community

InsuranceBot / tools /git-hooks /pre-commit

rohitsar567

Deploy v1 — single-Docker FastAPI + Next.js + RAG + voice + faithfulness

d2e3503 verified about 2 months ago

Raw

History Blame Contribute Delete

1.82 kB

	#!/bin/sh
	# Local eval regression gate — runs before every commit.
	# Same outcome as the .github/workflows/eval.yml that GitHub blocked for PAT scope.

	# Only run on commits that touch backend/, rag/, or eval/ paths
	CHANGED=$(git diff --cached --name-only \| grep -E "^(backend/\|rag/\|eval/\|docs/04-failure-modes.md\|backend/persona.py)" \| head -1)
	if [ -z "$CHANGED" ]; then
	exit 0 # No relevant changes — skip eval
	fi

	echo "[pre-commit] running quick eval (limit 10) to catch regressions..."
	cd "$(git rev-parse --show-toplevel)" \|\| exit 1

	if [ ! -f ".venv/bin/python" ]; then
	echo "[pre-commit] no .venv — skipping eval"
	exit 0
	fi

	if [ ! -f "eval/gold_qa.json" ]; then
	echo "[pre-commit] no gold_qa.json — skipping eval"
	exit 0
	fi

	# Run eval; capture exit + parse accuracy
	.venv/bin/python -m eval.run --limit 10 > /tmp/pre_commit_eval.log 2>&1
	EVAL_EXIT=$?
	if [ $EVAL_EXIT -ne 0 ]; then
	echo "[pre-commit] eval failed to run (exit=$EVAL_EXIT). See /tmp/pre_commit_eval.log"
	echo "[pre-commit] WARN: allowing commit but flagging this in the audit log."
	exit 0
	fi

	ACCURACY=$(.venv/bin/python -c "
	import json
	try:
	s = json.load(open('eval/results.json')).get('summary', {})
	print(f\"{s.get('factual_accuracy', 0):.3f}\")
	except: print('0.000')
	")

	# Floor: 0.55 (lenient since gold Q&A is small; raise to 0.65 once eval set grows)
	FLOOR="0.55"
	RESULT=$(awk -v a="$ACCURACY" -v f="$FLOOR" 'BEGIN { print (a + 0 < f + 0) ? "FAIL" : "PASS" }')

	if [ "$RESULT" = "FAIL" ]; then
	echo "[pre-commit] ❌ FAIL — factual accuracy $ACCURACY < floor $FLOOR"
	echo "[pre-commit] See eval/results.md for the misses. Commit blocked."
	echo "[pre-commit] To bypass anyway: git commit --no-verify"
	exit 1
	fi
	echo "[pre-commit] ✓ PASS — factual accuracy $ACCURACY ≥ floor $FLOOR"
	exit 0