| .PHONY: all setup data data-validate eval eval-full eval-quick eval-summary demo demo-interview reset reset-eval reset-hard check-env qdrant-up qdrant-down qdrant-status eda serve serve-dev docker-build docker-run deploy-info deploy-health human-eval-workflow human-eval-generate human-eval human-eval-analyze human-eval-status fmt test lint typecheck ci ci-full info metrics-snapshot health load-test load-test-quick kaggle-test help |
|
|
| |
| |
| |
|
|
| VENV_BIN := .venv/bin |
| PYTHON := $(VENV_BIN)/python |
| RUFF := $(VENV_BIN)/ruff |
| MYPY := $(VENV_BIN)/mypy |
|
|
| QUERY ?= wireless headphones with noise cancellation |
| TOP_K ?= 1 |
| SAMPLES ?= 10 |
| SEED ?= 42 |
| PORT ?= 8000 |
| URL ?= https://vxa8502-sage.hf.space |
| REQUESTS ?= 50 |
|
|
| |
| |
| |
|
|
| check-env: |
| @echo "Checking environment..." |
| @python -c "\ |
| import os; from dotenv import load_dotenv; load_dotenv(); \ |
| a = os.getenv('ANTHROPIC_API_KEY', ''); o = os.getenv('OPENAI_API_KEY', ''); \ |
| exit(0) if (a or o) else exit(1)" || \ |
| (echo "ERROR: Neither ANTHROPIC_API_KEY nor OPENAI_API_KEY is set (checked shell + .env)" && exit 1) |
| @python -c "\ |
| from sage.adapters.vector_store import get_client; \ |
| c = get_client(); c.get_collections(); print('Qdrant OK')" 2>/dev/null || \ |
| (echo "ERROR: Cannot connect to Qdrant. Check QDRANT_URL in .env or run 'make qdrant-up' for local." && exit 1) |
| @echo "Environment OK" |
|
|
| |
| |
| |
|
|
| setup: |
| @echo "=== SETUP ===" |
| python -m venv .venv |
| . .venv/bin/activate && pip install -e ".[pipeline,api,anthropic,openai]" |
| @echo "" |
| @echo "Setup complete. Activate with: source .venv/bin/activate" |
|
|
| |
| |
| |
|
|
| |
| data: check-env |
| @echo "=== DATA PIPELINE ===" |
| python scripts/pipeline.py |
| @echo "Verifying outputs..." |
| @test -d data/splits || (echo "FAIL: data/splits/ not created" && exit 1) |
| @test -f data/splits/train.parquet || (echo "FAIL: train.parquet not created" && exit 1) |
| @echo "Data pipeline complete" |
| |
| # Validate data outputs exist and have expected structure |
| data-validate: |
| @echo "Validating data outputs..." |
| @test -f data/splits/train.parquet || (echo "FAIL: train.parquet missing" && exit 1) |
| @test -f data/splits/test.parquet || (echo "FAIL: test.parquet missing" && exit 1) |
| @python -c "\ |
| import pandas as pd; import numpy as np; from pathlib import Path; \ |
| t = pd.read_parquet('data/splits/train.parquet'); \ |
| e = list(Path('data').glob('embeddings_*.npy')); \ |
| emb = np.load(e[0]) if e else None; \ |
| print(f'Train: {len(t):,} rows, {t.parent_asin.nunique():,} products'); \ |
| print(f'Embeddings: {emb.shape if emb is not None else \"not found\"}'); \ |
| assert len(t) > 1000, 'Train set too small'; \ |
| assert emb is not None and emb.shape[1] == 384, 'Embedding dimension mismatch'; \ |
| print('Validation passed')" |
| |
| # Exploratory data analysis (queries production Qdrant) |
| eda: check-env |
| @echo "=== PRODUCTION EDA ===" |
| @mkdir -p assets reports |
| python scripts/eda.py |
| |
| # --------------------------------------------------------------------------- |
| # Evaluation Suite (layered: quick → standard → complete) |
| # --------------------------------------------------------------------------- |
| |
| # Quick: Fast iteration, no RAGAS (~1 min) |
| # - Primary retrieval metrics (NDCG, Hit@K, MRR) |
| # - Basic faithfulness (HHEM only, 5 samples) |
| eval-quick: check-env |
| @echo "=== QUICK EVALUATION ===" && \ |
| python scripts/build_natural_eval_dataset.py && \ |
| python scripts/evaluation.py --dataset eval_natural_queries.json --section primary && \ |
| python scripts/faithfulness.py --samples 5 && \ |
| echo "=== QUICK EVAL COMPLETE ===" |
| |
| # Standard: Pre-commit validation (~5 min) |
| # - Primary retrieval metrics |
| # - Explanation tests (basic, gate, verify, cold-start) |
| # - Faithfulness (HHEM + RAGAS) |
| # - Spot checks |
| eval: check-env |
| @echo "=== EVALUATION SUITE ===" && \ |
| echo "" && \ |
| echo "--- [1/4] Retrieval metrics ---" && \ |
| python scripts/build_natural_eval_dataset.py && \ |
| python scripts/evaluation.py --dataset eval_natural_queries.json --section primary && \ |
| echo "" && \ |
| echo "--- [2/4] Explanation tests ---" && \ |
| python scripts/explanation.py --section basic && \ |
| python scripts/explanation.py --section gate && \ |
| python scripts/explanation.py --section verify && \ |
| python scripts/explanation.py --section cold && \ |
| echo "" && \ |
| echo "--- [3/4] Faithfulness (HHEM + RAGAS) ---" && \ |
| python scripts/faithfulness.py --samples $(SAMPLES) --ragas && \ |
| echo "" && \ |
| echo "--- [4/4] Sanity checks ---" && \ |
| python scripts/sanity_checks.py --section spot && \ |
| echo "" && \ |
| echo "=== EVALUATION COMPLETE ===" |
| |
| # Complete: Full reproducible suite (~15 min automated) |
| # - EDA (production data stats + figures) |
| # - All retrieval metrics + ablations (aggregation, rating, K, weights) |
| # - Baseline comparison (Random, Popularity, ItemKNN) |
| # - All explanation tests |
| # - Faithfulness (HHEM + RAGAS) |
| # - Grounding delta (WITH vs WITHOUT evidence) |
| # Full reproducibility: complete automated eval + load test (~17 min) |
| # Human evaluation is a SEPARATE workflow (see: make human-eval-workflow) |
| # Run after: make reset-eval |
| eval-full: check-env |
| @echo "=== FULL REPRODUCIBLE EVALUATION ===" && \ |
| echo "" && \ |
| echo "--- [1/10] EDA (production data) ---" && \ |
| mkdir -p assets reports && \ |
| python scripts/eda.py && \ |
| echo "" && \ |
| echo "--- [2/10] Retrieval metrics + ablations ---" && \ |
| python scripts/build_natural_eval_dataset.py && \ |
| python scripts/evaluation.py --dataset eval_natural_queries.json --section all && \ |
| echo "" && \ |
| echo "--- [3/10] Baseline comparison ---" && \ |
| python scripts/evaluation.py --dataset eval_natural_queries.json --section primary --baselines && \ |
| echo "" && \ |
| echo "--- [4/10] Explanation tests ---" && \ |
| python scripts/explanation.py --section basic && \ |
| python scripts/explanation.py --section gate && \ |
| python scripts/explanation.py --section verify && \ |
| python scripts/explanation.py --section cold && \ |
| echo "" && \ |
| echo "--- [5/10] Faithfulness (HHEM + RAGAS) ---" && \ |
| python scripts/faithfulness.py --samples $(SAMPLES) --ragas && \ |
| echo "" && \ |
| echo "--- [6/10] Grounding delta experiment ---" && \ |
| python scripts/faithfulness.py --delta && \ |
| echo "" && \ |
| echo "--- [7/10] Failure analysis ---" && \ |
| python scripts/faithfulness.py --analyze && \ |
| python scripts/faithfulness.py --adjusted && \ |
| echo "" && \ |
| echo "--- [8/10] All sanity checks ---" && \ |
| python scripts/sanity_checks.py --section all && \ |
| echo "" && \ |
| echo "--- [9/10] Human eval analysis ---" && \ |
| (python scripts/human_eval.py --analyze 2>/dev/null || echo " (skipped - no annotations found)") && \ |
| echo "" && \ |
| echo "--- [10/10] Load test ---" && \ |
| python scripts/load_test.py --url $(URL) --requests $(REQUESTS) --save && \ |
| echo "" && \ |
| python scripts/summary.py && \ |
| echo "" && \ |
| echo "=== AUTOMATED EVALUATION COMPLETE ===" && \ |
| echo "" && \ |
| echo "Results saved to: data/eval_results/" && \ |
| echo " - eval_natural_queries_latest.json (NDCG, Hit@K, MRR)" && \ |
| echo " - faithfulness_latest.json (HHEM, RAGAS)" && \ |
| echo " - grounding_delta_latest.json (WITH vs WITHOUT evidence)" && \ |
| echo " - load_test_latest.json (P99 latency)" && \ |
| echo "" && \ |
| echo "NEXT STEPS:" && \ |
| echo " 1. make human-eval-workflow |
| echo " 2. make eval-summary # view complete results" |
|
|
| |
| |
| |
|
|
| |
| demo: check-env |
| @echo "=== DEMO ===" |
| python scripts/demo.py --query "$(QUERY)" --top-k $(TOP_K) |
|
|
| |
| demo-interview: check-env |
| @echo "=== SAGE INTERVIEW DEMO ===" |
| @echo "" |
| @echo "--- Query 1: Basic ---" |
| python scripts/demo.py --query "wireless earbuds for running" --top-k 1 |
| @echo "" |
| @echo "--- Query 2: Complex (retrieval depth) ---" |
| python scripts/demo.py --query "noise cancelling headphones for office with long battery" --top-k 1 |
| @echo "" |
| @echo "--- Query 3: Cache Hit (same as Query 1) ---" |
| python scripts/demo.py --query "wireless earbuds for running" --top-k 1 |
| @echo "" |
| @echo "=== Demo Complete ===" |
|
|
| |
| |
| |
|
|
| |
| all: qdrant-up data eval-full demo |
| @echo "=== FULL PIPELINE COMPLETE ===" |
|
|
| |
| |
| |
|
|
| serve: check-env |
| @echo "=== SAGE API ===" |
| python -m sage.api.run |
|
|
| serve-dev: check-env |
| @echo "=== SAGE API (dev) ===" |
| uvicorn sage.api.app:create_app --factory --reload --port $${PORT:-8000} |
|
|
| docker-build: |
| docker build -t sage:latest . |
|
|
| docker-run: |
| docker run --rm -p 8000:8000 --env-file .env -e PORT=8000 sage:latest |
|
|
| deploy-info: |
| @echo "DEPLOY TO HUGGING FACE SPACES:" |
| @echo " 1. Push to GitHub" |
| @echo " 2. Create Space at https://huggingface.co/spaces" |
| @echo " 3. Set secrets: QDRANT_URL, QDRANT_API_KEY, ANTHROPIC_API_KEY" |
| @echo " 4. Link GitHub repo (Settings -> Repository)" |
| @echo "" |
| @echo "Live: $(URL)" |
|
|
| deploy-health: |
| @curl -sf $(URL)/health | python -m json.tool 2>/dev/null || \ |
| (echo "Deployment not healthy at $(URL)" && exit 1) |
|
|
| |
| |
| |
|
|
| |
| |
| human-eval-workflow: check-env |
| @echo "=== HUMAN EVALUATION WORKFLOW ===" && \ |
| echo "" && \ |
| echo "This is a separate ~1 hour manual process." && \ |
| echo "You can pause anytime with Ctrl+C and resume with 'make human-eval'" && \ |
| echo "" && \ |
| echo "--- Step 1/3: Generating 50 samples ---" && \ |
| python scripts/human_eval.py --generate --seed $(SEED) && \ |
| echo "" && \ |
| echo "--- Step 2/3: Interactive annotation ---" && \ |
| echo "Rate each sample 1-5 on: comprehension, trust, usefulness, satisfaction" && \ |
| echo "" && \ |
| python scripts/human_eval.py --annotate && \ |
| echo "" && \ |
| echo "--- Step 3/3: Computing results ---" && \ |
| python scripts/human_eval.py --analyze && \ |
| echo "" && \ |
| echo "=== HUMAN EVALUATION COMPLETE ===" && \ |
| echo "Results: data/eval_results/human_eval_latest.json" && \ |
| echo "" && \ |
| echo "Run 'make eval-summary' to see updated metrics." |
|
|
| |
| human-eval-generate: check-env |
| @echo "=== GENERATING HUMAN EVAL SAMPLES ===" |
| python scripts/human_eval.py --generate --seed $(SEED) |
|
|
| |
| human-eval: check-env |
| @echo "=== HUMAN EVALUATION ===" |
| @echo "Pause anytime with Ctrl+C. Resume with 'make human-eval'" |
| @echo "" |
| python scripts/human_eval.py --annotate |
|
|
| |
| human-eval-analyze: check-env |
| @echo "=== HUMAN EVAL ANALYSIS ===" |
| python scripts/human_eval.py --analyze |
|
|
| |
| human-eval-status: |
| @python scripts/human_eval.py --status 2>/dev/null || echo "No samples yet. Run: make human-eval-generate" |
|
|
| |
| |
| |
|
|
| fmt: |
| $(RUFF) format sage/ scripts/ tests/ |
| $(RUFF) check --fix sage/ scripts/ tests/ |
|
|
| lint: |
| $(RUFF) check sage/ scripts/ tests/ |
| $(RUFF) format --check sage/ scripts/ tests/ |
|
|
| typecheck: |
| $(MYPY) sage/ --ignore-missing-imports |
|
|
| test: |
| $(PYTHON) -m pytest tests/ -v |
|
|
| |
| ci: lint typecheck test |
| @echo "CI checks passed" |
|
|
| |
| ci-full: |
| rm -rf .venv |
| python -m venv .venv |
| . .venv/bin/activate && pip install -e ".[dev,api,anthropic,openai,pipeline]" && \ |
| $(RUFF) check sage/ scripts/ tests/ && \ |
| $(RUFF) format --check sage/ scripts/ tests/ && \ |
| $(MYPY) sage/ --ignore-missing-imports && \ |
| $(PYTHON) -m pytest tests/ -v |
| @echo "Full CI passed (fresh venv)" |
|
|
| |
| |
| |
|
|
| info: |
| @python -c "\ |
| import sys; from sage.config import EMBEDDING_MODEL, QDRANT_URL, LLM_PROVIDER, ANTHROPIC_MODEL, OPENAI_MODEL; \ |
| print('Sage v0.1.0'); \ |
| print(f'Python: {sys.version_info.major}.{sys.version_info.minor}'); \ |
| print(f'Embedding: {EMBEDDING_MODEL}'); \ |
| print(f'Qdrant: {QDRANT_URL}'); \ |
| print(f'LLM: {LLM_PROVIDER} ({ANTHROPIC_MODEL if LLM_PROVIDER == \"anthropic\" else OPENAI_MODEL})')" |
|
|
| |
| eval-summary: |
| @python scripts/summary.py |
|
|
| metrics-snapshot: |
| @python -c "\ |
| import json; from pathlib import Path; \ |
| r = Path('data/eval_results'); \ |
| nq = json.load(open(r/'eval_natural_queries_latest.json', encoding='utf-8')) if (r/'eval_natural_queries_latest.json').exists() else {}; \ |
| faith = json.load(open(r/'faithfulness_latest.json', encoding='utf-8')) if (r/'faithfulness_latest.json').exists() else {}; \ |
| human = json.load(open(r/'human_eval_latest.json', encoding='utf-8')) if (r/'human_eval_latest.json').exists() else {}; \ |
| load = json.load(open(r/'load_test_latest.json', encoding='utf-8')) if (r/'load_test_latest.json').exists() else {}; \ |
| pm = nq.get('primary_metrics', {}); mm = faith.get('multi_metric', {}); \ |
| print('=== SAGE METRICS ==='); \ |
| print(f'NDCG@10: {pm.get(\"ndcg_at_10\", \"n/a\")}'); \ |
| print(f'Claim HHEM: {mm.get(\"claim_level_avg_score\", \"n/a\")}'); \ |
| print(f'Quote Verif: {mm.get(\"quote_verification_rate\", \"n/a\")}'); \ |
| print(f'Human Eval: {human.get(\"overall_helpfulness\", \"n/a\")}/5.0 (n={human.get(\"n_samples\", 0)})'); \ |
| print(f'P99 Latency: {load.get(\"p99_ms\", \"n/a\")}ms')" |
|
|
| health: |
| @curl -sf http://localhost:$(PORT)/health | python -m json.tool 2>/dev/null || \ |
| echo "API not running at localhost:$(PORT). Start with: make serve" |
|
|
| |
| |
| |
|
|
| |
| |
| reset: |
| @echo "Clearing processed data..." |
| rm -f data/reviews_prepared_*.parquet |
| rm -f data/embeddings_*.npy |
| rm -rf data/splits/ |
| rm -rf data/eval/ |
| rm -f data/eval_results/eval_*.json |
| rm -f data/eval_results/faithfulness_*.json |
| rm -f data/eval_results/failure_analysis_*.json |
| rm -f data/eval_results/adjusted_faithfulness_*.json |
| rm -f data/eval_results/grounding_delta_*.json |
| rm -f data/eda_stats_*.json |
| @echo " (human_eval_*.json preserved — run 'make human-eval' to re-annotate)" |
| rm -rf assets/*.png |
| @echo "Done. Run 'make eval-full' to reproduce full evaluation suite." |
| @echo " (Use 'make reset-hard' to also clear Qdrant + raw cache)" |
|
|
| |
| |
| reset-eval: reset |
| @echo "Clearing human eval and load test data..." |
| rm -rf data/human_eval/ |
| rm -f data/eval_results/human_eval_*.json |
| rm -f data/eval_results/load_test_*.json |
| @echo "Clearing raw download cache..." |
| rm -f data/reviews_[0-9]*.parquet |
| rm -f data/reviews_full.parquet |
| @echo "Clearing local Qdrant storage..." |
| rm -rf data/qdrant_storage/ |
| @echo "Clearing any remaining eval results..." |
| rm -rf data/eval_results/ |
| @echo "Ground zero. Ready for: make eval-full" |
|
|
| |
| |
| |
|
|
| |
| |
| load-test: |
| @echo "=== LOAD TEST ===" |
| python scripts/load_test.py --url $(URL) --requests $(REQUESTS) --save |
|
|
| |
| load-test-quick: |
| @echo "=== QUICK LOAD TEST (retrieval only) ===" |
| python scripts/load_test.py --url $(URL) --requests 20 --no-explain |
|
|
| |
| reset-hard: reset |
| @echo "Clearing Qdrant collection..." |
| @python -c "\ |
| from sage.adapters.vector_store import get_client; \ |
| c = get_client(); c.delete_collection('sage_reviews'); \ |
| print(' Collection deleted')" 2>/dev/null || \ |
| echo " Qdrant not reachable, skipping collection cleanup" |
| @echo "Removing raw download cache..." |
| rm -f data/reviews_[0-9]*.parquet |
| rm -f data/reviews_full.parquet |
| rm -rf data/qdrant_storage/ |
| @echo "Removing human eval data..." |
| rm -rf data/human_eval/ |
| rm -f data/eval_results/human_eval_*.json |
| @echo "Removing any remaining eval results..." |
| rm -rf data/eval_results/ |
| @echo "Hard reset complete. Project at ground zero." |
|
|
| |
| |
| |
|
|
| qdrant-up: |
| @echo "Starting Qdrant..." |
| @docker info > /dev/null 2>&1 || \ |
| (echo "ERROR: Docker is not running. Start Docker Desktop first." && exit 1) |
| @docker run -d --name qdrant -p 6333:6333 -p 6334:6334 \ |
| -v "$$(pwd)/data/qdrant_storage:/qdrant/storage" \ |
| qdrant/qdrant:latest 2>/dev/null || \ |
| docker start qdrant 2>/dev/null || true |
| @echo "Waiting for Qdrant..." |
| @for i in 1 2 3 4 5 6 7 8 9 10; do \ |
| python -c "from sage.adapters.vector_store import get_client; get_client().get_collections()" 2>/dev/null && break; \ |
| sleep 1; \ |
| done |
| @python -c "\ |
| from sage.adapters.vector_store import get_client; from sage.config import QDRANT_URL; \ |
| get_client().get_collections(); print(f'Qdrant running at {QDRANT_URL}')" 2>/dev/null || \ |
| (echo "ERROR: Qdrant failed to start within 10 seconds" && exit 1) |
|
|
| qdrant-down: |
| @echo "Stopping Qdrant..." |
| @docker stop qdrant 2>/dev/null || true |
| @docker rm qdrant 2>/dev/null || true |
| @echo "Qdrant stopped" |
|
|
| qdrant-status: |
| @python -c "\ |
| from sage.adapters.vector_store import get_client, get_collection_info; \ |
| c = get_client(); info = get_collection_info(c); \ |
| [print(f' {k}: {v}') for k, v in info.items()]" 2>/dev/null || \ |
| echo "Qdrant not reachable" |
|
|
| |
| |
| |
|
|
| help: |
| @echo "Sage - RAG Recommendation System" |
| @echo "" |
| @echo "QUICK START:" |
| @echo " make setup Create venv and install dependencies" |
| @echo " make data Load, chunk, embed, and index reviews" |
| @echo " make demo Run demo query (customizable: QUERY, TOP_K)" |
| @echo " make all Full pipeline (data + eval + demo + summary)" |
| @echo "" |
| @echo "DEMO:" |
| @echo " make demo Single recommendation with explanation" |
| @echo " make demo QUERY=\"gaming mouse\" Custom query" |
| @echo " make demo-interview 3-query showcase (includes cache hit)" |
| @echo "" |
| @echo "INFO & METRICS:" |
| @echo " make info Show version, models, and URLs" |
| @echo " make eval-summary Print comprehensive evaluation results" |
| @echo " make metrics-snapshot Quick metrics display" |
| @echo " make health Check API health (requires running server)" |
| @echo "" |
| @echo "PIPELINE:" |
| @echo " make data Load, chunk, embed, and index reviews (local)" |
| @echo " make data-validate Validate data outputs" |
| @echo " make eda Exploratory data analysis (queries Qdrant)" |
| @echo " make kaggle-test Test Kaggle pipeline locally (100K subset)" |
| @echo "" |
| @echo "EVALUATION:" |
| @echo " make eval-quick Quick iteration: NDCG + HHEM only (~1 min)" |
| @echo " make eval Standard: metrics + explanation + faithfulness (~5 min)" |
| @echo " make eval-full Complete automated suite + load test (~17 min)" |
| @echo " make eval-summary View comprehensive results (handles missing data)" |
| @echo "" |
| @echo "LOAD TESTING:" |
| @echo " make load-test Run 50 requests against production (P99 target)" |
| @echo " make load-test URL=... Test against custom URL" |
| @echo " make load-test-quick 20 requests, no explanations (retrieval only)" |
| @echo "" |
| @echo "API:" |
| @echo " make serve Start API server (PORT=8000)" |
| @echo " make serve-dev Start API with auto-reload" |
| @echo " make docker-build Build Docker image" |
| @echo " make docker-run Run Docker container" |
| @echo " make deploy-info Show HuggingFace Spaces deployment info" |
| @echo " make deploy-health Check production deployment health" |
| @echo "" |
| @echo "HUMAN EVALUATION (separate workflow, ~1 hour):" |
| @echo " make human-eval-workflow Complete workflow: generate → annotate → analyze" |
| @echo " make human-eval-status Check annotation progress" |
| @echo " make human-eval-generate Generate 50 eval samples (SEED=42)" |
| @echo " make human-eval Rate samples interactively (Ctrl+C to pause)" |
| @echo " make human-eval-analyze Compute results from ratings" |
| @echo "" |
| @echo "QUALITY:" |
| @echo " make fmt Auto-format code with ruff" |
| @echo " make lint Run ruff linter and formatter check" |
| @echo " make typecheck Run mypy type checking" |
| @echo " make test Run unit tests" |
| @echo " make ci Quick CI: lint + typecheck + test (uses existing venv)" |
| @echo " make ci-full Full CI: fresh venv + all checks (pre-commit)" |
| @echo "" |
| @echo "QDRANT:" |
| @echo " make qdrant-up Start Qdrant vector database (Docker)" |
| @echo " make qdrant-down Stop Qdrant" |
| @echo " make qdrant-status Check Qdrant status" |
| @echo "" |
| @echo "CLEANUP:" |
| @echo " make reset Clear eval data (preserves human_eval, raw cache, Qdrant)" |
| @echo " make reset-eval Ground zero: clear ALL local artifacts (preserves Qdrant Cloud)" |
| @echo " make reset-hard Nuclear: clear everything INCLUDING Qdrant collection" |
| @echo "" |
| @echo "VARIABLES:" |
| @echo " QUERY Demo query (default: wireless headphones...)" |
| @echo " TOP_K Number of results (default: 1)" |
| @echo " SAMPLES Faithfulness eval samples (default: 10)" |
| @echo " SEED Random seed for human eval (default: 42)" |
| @echo " PORT API port (default: 8000)" |
| @echo " URL Load test target (default: https://vxa8502-sage.hf.space)" |
| @echo " REQUESTS Load test request count (default: 50)" |
|
|