Spaces:

vxa8502
/

Sage

Running

App Files Files Community

Sage / Makefile

vxa8502

Split CI into quick and full modes

1bb3f41 about 2 months ago

raw

history blame contribute delete

23.4 kB

	.PHONY: all setup data data-validate eval eval-full eval-quick eval-summary demo demo-interview reset reset-eval reset-hard check-env qdrant-up qdrant-down qdrant-status eda serve serve-dev docker-build docker-run deploy-info deploy-health human-eval-workflow human-eval-generate human-eval human-eval-analyze human-eval-status fmt test lint typecheck ci ci-full info metrics-snapshot health load-test load-test-quick kaggle-test help

	# ---------------------------------------------------------------------------
	# Configurable Variables (override: make demo QUERY="gaming mouse")
	# ---------------------------------------------------------------------------

	VENV_BIN := .venv/bin
	PYTHON := $(VENV_BIN)/python
	RUFF := $(VENV_BIN)/ruff
	MYPY := $(VENV_BIN)/mypy

	QUERY ?= wireless headphones with noise cancellation
	TOP_K ?= 1
	SAMPLES ?= 10
	SEED ?= 42
	PORT ?= 8000
	URL ?= https://vxa8502-sage.hf.space
	REQUESTS ?= 50

	# ---------------------------------------------------------------------------
	# Environment Check
	# ---------------------------------------------------------------------------

	check-env:
	@echo "Checking environment..."
	@python -c "\
	import os; from dotenv import load_dotenv; load_dotenv(); \
	a = os.getenv('ANTHROPIC_API_KEY', ''); o = os.getenv('OPENAI_API_KEY', ''); \
	exit(0) if (a or o) else exit(1)" \|\| \
	(echo "ERROR: Neither ANTHROPIC_API_KEY nor OPENAI_API_KEY is set (checked shell + .env)" && exit 1)
	@python -c "\
	from sage.adapters.vector_store import get_client; \
	c = get_client(); c.get_collections(); print('Qdrant OK')" 2>/dev/null \|\| \
	(echo "ERROR: Cannot connect to Qdrant. Check QDRANT_URL in .env or run 'make qdrant-up' for local." && exit 1)
	@echo "Environment OK"

	# ---------------------------------------------------------------------------
	# Setup
	# ---------------------------------------------------------------------------

	setup:
	@echo "=== SETUP ==="
	python -m venv .venv
	. .venv/bin/activate && pip install -e ".[pipeline,api,anthropic,openai]"
	@echo ""
	@echo "Setup complete. Activate with: source .venv/bin/activate"

	# ---------------------------------------------------------------------------
	# Data Pipeline
	# ---------------------------------------------------------------------------

	# Download, filter, chunk, embed, index to Qdrant
	data: check-env
	@echo "=== DATA PIPELINE ==="
	python scripts/pipeline.py
	@echo "Verifying outputs..."
	@test -d data/splits \|\| (echo "FAIL: data/splits/ not created" && exit 1)
	@test -f data/splits/train.parquet \|\| (echo "FAIL: train.parquet not created" && exit 1)
	@echo "Data pipeline complete"

	# Validate data outputs exist and have expected structure
	data-validate:
	@echo "Validating data outputs..."
	@test -f data/splits/train.parquet \|\| (echo "FAIL: train.parquet missing" && exit 1)
	@test -f data/splits/test.parquet \|\| (echo "FAIL: test.parquet missing" && exit 1)
	@python -c "\
	import pandas as pd; import numpy as np; from pathlib import Path; \
	t = pd.read_parquet('data/splits/train.parquet'); \
	e = list(Path('data').glob('embeddings_*.npy')); \
	emb = np.load(e[0]) if e else None; \
	print(f'Train: {len(t):,} rows, {t.parent_asin.nunique():,} products'); \
	print(f'Embeddings: {emb.shape if emb is not None else \"not found\"}'); \
	assert len(t) > 1000, 'Train set too small'; \
	assert emb is not None and emb.shape[1] == 384, 'Embedding dimension mismatch'; \
	print('Validation passed')"

	# Exploratory data analysis (queries production Qdrant)
	eda: check-env
	@echo "=== PRODUCTION EDA ==="
	@mkdir -p assets reports
	python scripts/eda.py

	# ---------------------------------------------------------------------------
	# Evaluation Suite (layered: quick → standard → complete)
	# ---------------------------------------------------------------------------

	# Quick: Fast iteration, no RAGAS (~1 min)
	# - Primary retrieval metrics (NDCG, Hit@K, MRR)
	# - Basic faithfulness (HHEM only, 5 samples)
	eval-quick: check-env
	@echo "=== QUICK EVALUATION ===" && \
	python scripts/build_natural_eval_dataset.py && \
	python scripts/evaluation.py --dataset eval_natural_queries.json --section primary && \
	python scripts/faithfulness.py --samples 5 && \
	echo "=== QUICK EVAL COMPLETE ==="

	# Standard: Pre-commit validation (~5 min)
	# - Primary retrieval metrics
	# - Explanation tests (basic, gate, verify, cold-start)
	# - Faithfulness (HHEM + RAGAS)
	# - Spot checks
	eval: check-env
	@echo "=== EVALUATION SUITE ===" && \
	echo "" && \
	echo "--- [1/4] Retrieval metrics ---" && \
	python scripts/build_natural_eval_dataset.py && \
	python scripts/evaluation.py --dataset eval_natural_queries.json --section primary && \
	echo "" && \
	echo "--- [2/4] Explanation tests ---" && \
	python scripts/explanation.py --section basic && \
	python scripts/explanation.py --section gate && \
	python scripts/explanation.py --section verify && \
	python scripts/explanation.py --section cold && \
	echo "" && \
	echo "--- [3/4] Faithfulness (HHEM + RAGAS) ---" && \
	python scripts/faithfulness.py --samples $(SAMPLES) --ragas && \
	echo "" && \
	echo "--- [4/4] Sanity checks ---" && \
	python scripts/sanity_checks.py --section spot && \
	echo "" && \
	echo "=== EVALUATION COMPLETE ==="

	# Complete: Full reproducible suite (~15 min automated)
	# - EDA (production data stats + figures)
	# - All retrieval metrics + ablations (aggregation, rating, K, weights)
	# - Baseline comparison (Random, Popularity, ItemKNN)
	# - All explanation tests
	# - Faithfulness (HHEM + RAGAS)
	# - Grounding delta (WITH vs WITHOUT evidence)
	# Full reproducibility: complete automated eval + load test (~17 min)
	# Human evaluation is a SEPARATE workflow (see: make human-eval-workflow)
	# Run after: make reset-eval
	eval-full: check-env
	@echo "=== FULL REPRODUCIBLE EVALUATION ===" && \
	echo "" && \
	echo "--- [1/10] EDA (production data) ---" && \
	mkdir -p assets reports && \
	python scripts/eda.py && \
	echo "" && \
	echo "--- [2/10] Retrieval metrics + ablations ---" && \
	python scripts/build_natural_eval_dataset.py && \
	python scripts/evaluation.py --dataset eval_natural_queries.json --section all && \
	echo "" && \
	echo "--- [3/10] Baseline comparison ---" && \
	python scripts/evaluation.py --dataset eval_natural_queries.json --section primary --baselines && \
	echo "" && \
	echo "--- [4/10] Explanation tests ---" && \
	python scripts/explanation.py --section basic && \
	python scripts/explanation.py --section gate && \
	python scripts/explanation.py --section verify && \
	python scripts/explanation.py --section cold && \
	echo "" && \
	echo "--- [5/10] Faithfulness (HHEM + RAGAS) ---" && \
	python scripts/faithfulness.py --samples $(SAMPLES) --ragas && \
	echo "" && \
	echo "--- [6/10] Grounding delta experiment ---" && \
	python scripts/faithfulness.py --delta && \
	echo "" && \
	echo "--- [7/10] Failure analysis ---" && \
	python scripts/faithfulness.py --analyze && \
	python scripts/faithfulness.py --adjusted && \
	echo "" && \
	echo "--- [8/10] All sanity checks ---" && \
	python scripts/sanity_checks.py --section all && \
	echo "" && \
	echo "--- [9/10] Human eval analysis ---" && \
	(python scripts/human_eval.py --analyze 2>/dev/null \|\| echo " (skipped - no annotations found)") && \
	echo "" && \
	echo "--- [10/10] Load test ---" && \
	python scripts/load_test.py --url $(URL) --requests $(REQUESTS) --save && \
	echo "" && \
	python scripts/summary.py && \
	echo "" && \
	echo "=== AUTOMATED EVALUATION COMPLETE ===" && \
	echo "" && \
	echo "Results saved to: data/eval_results/" && \
	echo " - eval_natural_queries_latest.json (NDCG, Hit@K, MRR)" && \
	echo " - faithfulness_latest.json (HHEM, RAGAS)" && \
	echo " - grounding_delta_latest.json (WITH vs WITHOUT evidence)" && \
	echo " - load_test_latest.json (P99 latency)" && \
	echo "" && \
	echo "NEXT STEPS:" && \
	echo " 1. make human-eval-workflow # ~1 hour manual annotation" && \
	echo " 2. make eval-summary # view complete results"

	# ---------------------------------------------------------------------------
	# Demo
	# ---------------------------------------------------------------------------

	# Interactive recommendation with explanation
	demo: check-env
	@echo "=== DEMO ==="
	python scripts/demo.py --query "$(QUERY)" --top-k $(TOP_K)

	# Interview demo: 3 queries showcasing cache hit
	demo-interview: check-env
	@echo "=== SAGE INTERVIEW DEMO ==="
	@echo ""
	@echo "--- Query 1: Basic ---"
	python scripts/demo.py --query "wireless earbuds for running" --top-k 1
	@echo ""
	@echo "--- Query 2: Complex (retrieval depth) ---"
	python scripts/demo.py --query "noise cancelling headphones for office with long battery" --top-k 1
	@echo ""
	@echo "--- Query 3: Cache Hit (same as Query 1) ---"
	python scripts/demo.py --query "wireless earbuds for running" --top-k 1
	@echo ""
	@echo "=== Demo Complete ==="

	# ---------------------------------------------------------------------------
	# Full Pipeline
	# ---------------------------------------------------------------------------

	# Complete reproducible pipeline: data + full eval + demo
	all: qdrant-up data eval-full demo
	@echo "=== FULL PIPELINE COMPLETE ==="

	# ---------------------------------------------------------------------------
	# API
	# ---------------------------------------------------------------------------

	serve: check-env
	@echo "=== SAGE API ==="
	python -m sage.api.run

	serve-dev: check-env
	@echo "=== SAGE API (dev) ==="
	uvicorn sage.api.app:create_app --factory --reload --port $${PORT:-8000}

	docker-build:
	docker build -t sage:latest .

	docker-run:
	docker run --rm -p 8000:8000 --env-file .env -e PORT=8000 sage:latest

	deploy-info:
	@echo "DEPLOY TO HUGGING FACE SPACES:"
	@echo " 1. Push to GitHub"
	@echo " 2. Create Space at https://huggingface.co/spaces"
	@echo " 3. Set secrets: QDRANT_URL, QDRANT_API_KEY, ANTHROPIC_API_KEY"
	@echo " 4. Link GitHub repo (Settings -> Repository)"
	@echo ""
	@echo "Live: $(URL)"

	deploy-health:
	@curl -sf $(URL)/health \| python -m json.tool 2>/dev/null \|\| \
	(echo "Deployment not healthy at $(URL)" && exit 1)

	# ---------------------------------------------------------------------------
	# Human Evaluation (separate workflow from automated eval)
	# ---------------------------------------------------------------------------

	# Complete human eval workflow: generate → annotate → analyze
	# Run this AFTER make eval-full completes
	human-eval-workflow: check-env
	@echo "=== HUMAN EVALUATION WORKFLOW ===" && \
	echo "" && \
	echo "This is a separate ~1 hour manual process." && \
	echo "You can pause anytime with Ctrl+C and resume with 'make human-eval'" && \
	echo "" && \
	echo "--- Step 1/3: Generating 50 samples ---" && \
	python scripts/human_eval.py --generate --seed $(SEED) && \
	echo "" && \
	echo "--- Step 2/3: Interactive annotation ---" && \
	echo "Rate each sample 1-5 on: comprehension, trust, usefulness, satisfaction" && \
	echo "" && \
	python scripts/human_eval.py --annotate && \
	echo "" && \
	echo "--- Step 3/3: Computing results ---" && \
	python scripts/human_eval.py --analyze && \
	echo "" && \
	echo "=== HUMAN EVALUATION COMPLETE ===" && \
	echo "Results: data/eval_results/human_eval_latest.json" && \
	echo "" && \
	echo "Run 'make eval-summary' to see updated metrics."

	# Generate samples only (non-blocking)
	human-eval-generate: check-env
	@echo "=== GENERATING HUMAN EVAL SAMPLES ==="
	python scripts/human_eval.py --generate --seed $(SEED)

	# Interactive annotation (can pause with Ctrl+C, resume anytime)
	human-eval: check-env
	@echo "=== HUMAN EVALUATION ==="
	@echo "Pause anytime with Ctrl+C. Resume with 'make human-eval'"
	@echo ""
	python scripts/human_eval.py --annotate

	# Compute results from annotations
	human-eval-analyze: check-env
	@echo "=== HUMAN EVAL ANALYSIS ==="
	python scripts/human_eval.py --analyze

	# Check annotation progress
	human-eval-status:
	@python scripts/human_eval.py --status 2>/dev/null \|\| echo "No samples yet. Run: make human-eval-generate"

	# ---------------------------------------------------------------------------
	# Quality
	# ---------------------------------------------------------------------------

	fmt:
	$(RUFF) format sage/ scripts/ tests/
	$(RUFF) check --fix sage/ scripts/ tests/

	lint:
	$(RUFF) check sage/ scripts/ tests/
	$(RUFF) format --check sage/ scripts/ tests/

	typecheck:
	$(MYPY) sage/ --ignore-missing-imports

	test:
	$(PYTHON) -m pytest tests/ -v

	# Quick CI: uses existing venv (fast iteration)
	ci: lint typecheck test
	@echo "CI checks passed"

	# Full CI: fresh venv install + all checks (pre-commit validation)
	ci-full:
	rm -rf .venv
	python -m venv .venv
	. .venv/bin/activate && pip install -e ".[dev,api,anthropic,openai,pipeline]" && \
	$(RUFF) check sage/ scripts/ tests/ && \
	$(RUFF) format --check sage/ scripts/ tests/ && \
	$(MYPY) sage/ --ignore-missing-imports && \
	$(PYTHON) -m pytest tests/ -v
	@echo "Full CI passed (fresh venv)"

	# ---------------------------------------------------------------------------
	# Info & Metrics
	# ---------------------------------------------------------------------------

	info:
	@python -c "\
	import sys; from sage.config import EMBEDDING_MODEL, QDRANT_URL, LLM_PROVIDER, ANTHROPIC_MODEL, OPENAI_MODEL; \
	print('Sage v0.1.0'); \
	print(f'Python: {sys.version_info.major}.{sys.version_info.minor}'); \
	print(f'Embedding: {EMBEDDING_MODEL}'); \
	print(f'Qdrant: {QDRANT_URL}'); \
	print(f'LLM: {LLM_PROVIDER} ({ANTHROPIC_MODEL if LLM_PROVIDER == \"anthropic\" else OPENAI_MODEL})')"

	# Comprehensive evaluation summary (handles missing human eval gracefully)
	eval-summary:
	@python scripts/summary.py

	metrics-snapshot:
	@python -c "\
	import json; from pathlib import Path; \
	r = Path('data/eval_results'); \
	nq = json.load(open(r/'eval_natural_queries_latest.json', encoding='utf-8')) if (r/'eval_natural_queries_latest.json').exists() else {}; \
	faith = json.load(open(r/'faithfulness_latest.json', encoding='utf-8')) if (r/'faithfulness_latest.json').exists() else {}; \
	human = json.load(open(r/'human_eval_latest.json', encoding='utf-8')) if (r/'human_eval_latest.json').exists() else {}; \
	load = json.load(open(r/'load_test_latest.json', encoding='utf-8')) if (r/'load_test_latest.json').exists() else {}; \
	pm = nq.get('primary_metrics', {}); mm = faith.get('multi_metric', {}); \
	print('=== SAGE METRICS ==='); \
	print(f'NDCG@10: {pm.get(\"ndcg_at_10\", \"n/a\")}'); \
	print(f'Claim HHEM: {mm.get(\"claim_level_avg_score\", \"n/a\")}'); \
	print(f'Quote Verif: {mm.get(\"quote_verification_rate\", \"n/a\")}'); \
	print(f'Human Eval: {human.get(\"overall_helpfulness\", \"n/a\")}/5.0 (n={human.get(\"n_samples\", 0)})'); \
	print(f'P99 Latency: {load.get(\"p99_ms\", \"n/a\")}ms')"

	health:
	@curl -sf http://localhost:$(PORT)/health \| python -m json.tool 2>/dev/null \|\| \
	echo "API not running at localhost:$(PORT). Start with: make serve"

	# ---------------------------------------------------------------------------
	# Reset
	# ---------------------------------------------------------------------------

	# Clear processed data, keep raw download cache and Qdrant Cloud data
	# After reset, run: make eval-full (full reproducible suite)
	reset:
	@echo "Clearing processed data..."
	rm -f data/reviews_prepared_*.parquet
	rm -f data/embeddings_*.npy
	rm -rf data/splits/
	rm -rf data/eval/
	rm -f data/eval_results/eval_*.json
	rm -f data/eval_results/faithfulness_*.json
	rm -f data/eval_results/failure_analysis_*.json
	rm -f data/eval_results/adjusted_faithfulness_*.json
	rm -f data/eval_results/grounding_delta_*.json
	rm -f data/eda_stats_*.json
	@echo " (human_eval_*.json preserved — run 'make human-eval' to re-annotate)"
	rm -rf assets/*.png
	@echo "Done. Run 'make eval-full' to reproduce full evaluation suite."
	@echo " (Use 'make reset-hard' to also clear Qdrant + raw cache)"

	# Clear ALL local artifacts for pristine reproducibility (preserves Qdrant Cloud only)
	# Use this for complete fresh eval run
	reset-eval: reset
	@echo "Clearing human eval and load test data..."
	rm -rf data/human_eval/
	rm -f data/eval_results/human_eval_*.json
	rm -f data/eval_results/load_test_*.json
	@echo "Clearing raw download cache..."
	rm -f data/reviews_[0-9]*.parquet
	rm -f data/reviews_full.parquet
	@echo "Clearing local Qdrant storage..."
	rm -rf data/qdrant_storage/
	@echo "Clearing any remaining eval results..."
	rm -rf data/eval_results/
	@echo "Ground zero. Ready for: make eval-full"

	# ---------------------------------------------------------------------------
	# Load Testing
	# ---------------------------------------------------------------------------

	# Run load test against production (or local with URL=http://localhost:8000)
	# Target: P99 < 500ms
	load-test:
	@echo "=== LOAD TEST ==="
	python scripts/load_test.py --url $(URL) --requests $(REQUESTS) --save

	# Quick load test (20 requests, no explanations - tests retrieval only)
	load-test-quick:
	@echo "=== QUICK LOAD TEST (retrieval only) ==="
	python scripts/load_test.py --url $(URL) --requests 20 --no-explain

	# Hard reset: remove EVERYTHING (ground zero for fresh start)
	reset-hard: reset
	@echo "Clearing Qdrant collection..."
	@python -c "\
	from sage.adapters.vector_store import get_client; \
	c = get_client(); c.delete_collection('sage_reviews'); \
	print(' Collection deleted')" 2>/dev/null \|\| \
	echo " Qdrant not reachable, skipping collection cleanup"
	@echo "Removing raw download cache..."
	rm -f data/reviews_[0-9]*.parquet
	rm -f data/reviews_full.parquet
	rm -rf data/qdrant_storage/
	@echo "Removing human eval data..."
	rm -rf data/human_eval/
	rm -f data/eval_results/human_eval_*.json
	@echo "Removing any remaining eval results..."
	rm -rf data/eval_results/
	@echo "Hard reset complete. Project at ground zero."

	# ---------------------------------------------------------------------------
	# Qdrant Management
	# ---------------------------------------------------------------------------

	qdrant-up:
	@echo "Starting Qdrant..."
	@docker info > /dev/null 2>&1 \|\| \
	(echo "ERROR: Docker is not running. Start Docker Desktop first." && exit 1)
	@docker run -d --name qdrant -p 6333:6333 -p 6334:6334 \
	-v "$$(pwd)/data/qdrant_storage:/qdrant/storage" \
	qdrant/qdrant:latest 2>/dev/null \|\| \
	docker start qdrant 2>/dev/null \|\| true
	@echo "Waiting for Qdrant..."
	@for i in 1 2 3 4 5 6 7 8 9 10; do \
	python -c "from sage.adapters.vector_store import get_client; get_client().get_collections()" 2>/dev/null && break; \
	sleep 1; \
	done
	@python -c "\
	from sage.adapters.vector_store import get_client; from sage.config import QDRANT_URL; \
	get_client().get_collections(); print(f'Qdrant running at {QDRANT_URL}')" 2>/dev/null \|\| \
	(echo "ERROR: Qdrant failed to start within 10 seconds" && exit 1)

	qdrant-down:
	@echo "Stopping Qdrant..."
	@docker stop qdrant 2>/dev/null \|\| true
	@docker rm qdrant 2>/dev/null \|\| true
	@echo "Qdrant stopped"

	qdrant-status:
	@python -c "\
	from sage.adapters.vector_store import get_client, get_collection_info; \
	c = get_client(); info = get_collection_info(c); \
	[print(f' {k}: {v}') for k, v in info.items()]" 2>/dev/null \|\| \
	echo "Qdrant not reachable"

	# ---------------------------------------------------------------------------
	# Help
	# ---------------------------------------------------------------------------

	help:
	@echo "Sage - RAG Recommendation System"
	@echo ""
	@echo "QUICK START:"
	@echo " make setup Create venv and install dependencies"
	@echo " make data Load, chunk, embed, and index reviews"
	@echo " make demo Run demo query (customizable: QUERY, TOP_K)"
	@echo " make all Full pipeline (data + eval + demo + summary)"
	@echo ""
	@echo "DEMO:"
	@echo " make demo Single recommendation with explanation"
	@echo " make demo QUERY=\"gaming mouse\" Custom query"
	@echo " make demo-interview 3-query showcase (includes cache hit)"
	@echo ""
	@echo "INFO & METRICS:"
	@echo " make info Show version, models, and URLs"
	@echo " make eval-summary Print comprehensive evaluation results"
	@echo " make metrics-snapshot Quick metrics display"
	@echo " make health Check API health (requires running server)"
	@echo ""
	@echo "PIPELINE:"
	@echo " make data Load, chunk, embed, and index reviews (local)"
	@echo " make data-validate Validate data outputs"
	@echo " make eda Exploratory data analysis (queries Qdrant)"
	@echo " make kaggle-test Test Kaggle pipeline locally (100K subset)"
	@echo ""
	@echo "EVALUATION:"
	@echo " make eval-quick Quick iteration: NDCG + HHEM only (~1 min)"
	@echo " make eval Standard: metrics + explanation + faithfulness (~5 min)"
	@echo " make eval-full Complete automated suite + load test (~17 min)"
	@echo " make eval-summary View comprehensive results (handles missing data)"
	@echo ""
	@echo "LOAD TESTING:"
	@echo " make load-test Run 50 requests against production (P99 target)"
	@echo " make load-test URL=... Test against custom URL"
	@echo " make load-test-quick 20 requests, no explanations (retrieval only)"
	@echo ""
	@echo "API:"
	@echo " make serve Start API server (PORT=8000)"
	@echo " make serve-dev Start API with auto-reload"
	@echo " make docker-build Build Docker image"
	@echo " make docker-run Run Docker container"
	@echo " make deploy-info Show HuggingFace Spaces deployment info"
	@echo " make deploy-health Check production deployment health"
	@echo ""
	@echo "HUMAN EVALUATION (separate workflow, ~1 hour):"
	@echo " make human-eval-workflow Complete workflow: generate → annotate → analyze"
	@echo " make human-eval-status Check annotation progress"
	@echo " make human-eval-generate Generate 50 eval samples (SEED=42)"
	@echo " make human-eval Rate samples interactively (Ctrl+C to pause)"
	@echo " make human-eval-analyze Compute results from ratings"
	@echo ""
	@echo "QUALITY:"
	@echo " make fmt Auto-format code with ruff"
	@echo " make lint Run ruff linter and formatter check"
	@echo " make typecheck Run mypy type checking"
	@echo " make test Run unit tests"
	@echo " make ci Quick CI: lint + typecheck + test (uses existing venv)"
	@echo " make ci-full Full CI: fresh venv + all checks (pre-commit)"
	@echo ""
	@echo "QDRANT:"
	@echo " make qdrant-up Start Qdrant vector database (Docker)"
	@echo " make qdrant-down Stop Qdrant"
	@echo " make qdrant-status Check Qdrant status"
	@echo ""
	@echo "CLEANUP:"
	@echo " make reset Clear eval data (preserves human_eval, raw cache, Qdrant)"
	@echo " make reset-eval Ground zero: clear ALL local artifacts (preserves Qdrant Cloud)"
	@echo " make reset-hard Nuclear: clear everything INCLUDING Qdrant collection"
	@echo ""
	@echo "VARIABLES:"
	@echo " QUERY Demo query (default: wireless headphones...)"
	@echo " TOP_K Number of results (default: 1)"
	@echo " SAMPLES Faithfulness eval samples (default: 10)"
	@echo " SEED Random seed for human eval (default: 42)"
	@echo " PORT API port (default: 8000)"
	@echo " URL Load test target (default: https://vxa8502-sage.hf.space)"
	@echo " REQUESTS Load test request count (default: 50)"