InsuranceBot / tools /git-hooks /pre-commit
rohitsar567's picture
Deploy v1 β€” single-Docker FastAPI + Next.js + RAG + voice + faithfulness
d2e3503 verified
Raw
History Blame Contribute Delete
1.82 kB
#!/bin/sh
# Local eval regression gate β€” runs before every commit.
# Same outcome as the .github/workflows/eval.yml that GitHub blocked for PAT scope.
# Only run on commits that touch backend/, rag/, or eval/ paths
CHANGED=$(git diff --cached --name-only | grep -E "^(backend/|rag/|eval/|docs/04-failure-modes.md|backend/persona.py)" | head -1)
if [ -z "$CHANGED" ]; then
exit 0 # No relevant changes β€” skip eval
fi
echo "[pre-commit] running quick eval (limit 10) to catch regressions..."
cd "$(git rev-parse --show-toplevel)" || exit 1
if [ ! -f ".venv/bin/python" ]; then
echo "[pre-commit] no .venv β€” skipping eval"
exit 0
fi
if [ ! -f "eval/gold_qa.json" ]; then
echo "[pre-commit] no gold_qa.json β€” skipping eval"
exit 0
fi
# Run eval; capture exit + parse accuracy
.venv/bin/python -m eval.run --limit 10 > /tmp/pre_commit_eval.log 2>&1
EVAL_EXIT=$?
if [ $EVAL_EXIT -ne 0 ]; then
echo "[pre-commit] eval failed to run (exit=$EVAL_EXIT). See /tmp/pre_commit_eval.log"
echo "[pre-commit] WARN: allowing commit but flagging this in the audit log."
exit 0
fi
ACCURACY=$(.venv/bin/python -c "
import json
try:
s = json.load(open('eval/results.json')).get('summary', {})
print(f\"{s.get('factual_accuracy', 0):.3f}\")
except: print('0.000')
")
# Floor: 0.55 (lenient since gold Q&A is small; raise to 0.65 once eval set grows)
FLOOR="0.55"
RESULT=$(awk -v a="$ACCURACY" -v f="$FLOOR" 'BEGIN { print (a + 0 < f + 0) ? "FAIL" : "PASS" }')
if [ "$RESULT" = "FAIL" ]; then
echo "[pre-commit] ❌ FAIL β€” factual accuracy $ACCURACY < floor $FLOOR"
echo "[pre-commit] See eval/results.md for the misses. Commit blocked."
echo "[pre-commit] To bypass anyway: git commit --no-verify"
exit 1
fi
echo "[pre-commit] βœ“ PASS β€” factual accuracy $ACCURACY β‰₯ floor $FLOOR"
exit 0