Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| """ | |
| Three-Tier Source Attribution Test Script | |
| ========================================== | |
| Tests that AI responses correctly attribute their source using the right | |
| trailing emoji indicators: | |
| π Tier 1 β Verified plant database (plants_database.json) | |
| π Tier 2 β Verified RAG document (knowledge_chunks, similarity >=85%) | |
| β οΈ Tier 3 β AI-generated estimate (Cerebras LLM fallback) | |
| Usage: | |
| python tests/test_source_attribution.py | |
| """ | |
| import asyncio | |
| import sys | |
| from pathlib import Path | |
| # Force UTF-8 output so emojis render correctly on all terminals | |
| sys.stdout.reconfigure(encoding="utf-8") | |
| # Add parent directory to path so imports work | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| MOCK_SENSORS = {"temp": 28.5, "rh": 70.0, "light": 15000} | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Test Cases | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| TEST_CASES = [ | |
| # --- Tier 1: Deterministic DB (plant in plants_database.json) --- | |
| { | |
| "id": "T1-A", | |
| "description": "Known plant in DB - Indonesian query (tomat)", | |
| "query": "berapa suhu ideal tomat fase vegetatif?", | |
| "expected_emoji": "π", | |
| }, | |
| { | |
| "id": "T1-B", | |
| "description": "Known plant in DB - English query (watermelon)", | |
| "query": "what are the parameters for growing watermelon seedling?", | |
| "expected_emoji": "π", | |
| }, | |
| { | |
| "id": "T1-C", | |
| "description": "Known plant in DB - Indonesian (semangka)", | |
| "query": "apa parameter pertumbuhan semangka?", | |
| "expected_emoji": "π", | |
| }, | |
| { | |
| "id": "T1-D", | |
| "description": "Known plant in DB - lettuce germination", | |
| "query": "suhu dan kelembaban untuk perkecambahan selada?", | |
| "expected_emoji": "π", | |
| }, | |
| { | |
| "id": "T1-E", | |
| "description": "Chamber status - real-time sensor data", | |
| "query": "berapa suhu chamber sekarang?", | |
| "expected_emoji": "π", | |
| }, | |
| # --- Tier 3: LLM Fallback (plant NOT in DB) --- | |
| { | |
| "id": "T3-A", | |
| "description": "Unknown plant - LLM fallback (durian)", | |
| "query": "apa parameter pertumbuhan durian?", | |
| "expected_emoji": "β οΈ", | |
| }, | |
| { | |
| "id": "T3-B", | |
| "description": "Unknown plant - LLM fallback (strawberry)", | |
| "query": "what temperature does strawberry need to grow?", | |
| "expected_emoji": "β οΈ", | |
| }, | |
| # --- General / Technical (always AI Generated) --- | |
| { | |
| "id": "T3-C", | |
| "description": "General plant question (no specific plant)", | |
| "query": "bagaimana cara mempercepat pertumbuhan tanaman secara umum?", | |
| "expected_emoji": "β οΈ", | |
| }, | |
| { | |
| "id": "T3-D", | |
| "description": "Technical IoT question", | |
| "query": "how does a DHT22 humidity sensor work?", | |
| "expected_emoji": "β οΈ", | |
| }, | |
| { | |
| "id": "T3-E", | |
| "description": "General knowledge question", | |
| "query": "what is photosynthesis?", | |
| "expected_emoji": "β οΈ", | |
| }, | |
| ] | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Helpers | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def detect_source_emoji(response_text: str) -> str | None: | |
| """ | |
| Detect the trailing source attribution emoji in the response. | |
| Checks the last 8 lines for Source attribution lines. | |
| """ | |
| lines = response_text.strip().split("\n") | |
| for line in reversed(lines[-8:]): | |
| if "π" in line and "Source" in line: | |
| return "π" | |
| if "π" in line and "Source" in line: | |
| return "π" | |
| if "β οΈ" in line and "Source" in line: | |
| return "β οΈ" | |
| # Fallback: scan full response for opener indicators | |
| if "π According" in response_text or "π Source" in response_text: | |
| return "π" | |
| if "π Source" in response_text: | |
| return "π" | |
| if "β οΈ Note" in response_text or "β οΈ Source" in response_text: | |
| return "β οΈ" | |
| return None | |
| def truncate(text: str, n: int = 400) -> str: | |
| return text[:n] + "\n ...[truncated]" if len(text) > n else text | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Main Test Runner | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def run_tests(): | |
| from app.ai_engine import generate_context_aware_response | |
| passed = 0 | |
| failed = 0 | |
| results = [] | |
| W = 72 | |
| print("\n" + "=" * W) | |
| print(" THREE-TIER SOURCE ATTRIBUTION TEST SUITE") | |
| print("=" * W) | |
| print(f" π Tier 1: Verified DB | π Tier 2: Verified Doc (>=85%) | β οΈ Tier 3: AI Generated") | |
| print("=" * W) | |
| print(f"{'ID':<8} {'EXPECTED':<12} {'GOT':<12} {'STATUS':<8} DESCRIPTION") | |
| print("-" * W) | |
| for case in TEST_CASES: | |
| try: | |
| result = await generate_context_aware_response( | |
| query=case["query"], | |
| sensors=MOCK_SENSORS, | |
| ) | |
| response_text = result.get("response", "") | |
| detected = detect_source_emoji(response_text) | |
| ok = (detected == case["expected_emoji"]) | |
| status = "β PASS" if ok else "β FAIL" | |
| if ok: | |
| passed += 1 | |
| else: | |
| failed += 1 | |
| print(f"{case['id']:<8} {case['expected_emoji']:<12} {str(detected):<12} {status:<8} {case['description']}") | |
| results.append({ | |
| "case": case, | |
| "response": response_text, | |
| "detected": detected, | |
| "data_source": result.get("data_source", "unknown"), | |
| "query_type": result.get("query_type", "unknown"), | |
| "passed": ok, | |
| }) | |
| except Exception as e: | |
| failed += 1 | |
| print(f"{case['id']:<8} {'?':<12} {'ERROR':<12} β FAIL {case['description']}") | |
| print(f" Exception: {e}") | |
| results.append({ | |
| "case": case, | |
| "response": f"ERROR: {e}", | |
| "detected": None, | |
| "data_source": "error", | |
| "query_type": "error", | |
| "passed": False, | |
| }) | |
| # βββ Detailed Response Printout βββββββββββββββββββββββββββββββββββββββββββ | |
| print("\n" + "=" * W) | |
| print(" DETAILED RESPONSES") | |
| print("=" * W) | |
| for r in results: | |
| case = r["case"] | |
| icon = "β " if r["passed"] else "β" | |
| print(f"\n{icon} [{case['id']}] {case['description']}") | |
| print(f" Query : {case['query']}") | |
| print(f" Query Type : {r['query_type']}") | |
| print(f" Data Source: {r['data_source']}") | |
| print(f" Detected : {r['detected']} (Expected: {case['expected_emoji']})") | |
| print(f" Response :") | |
| for line in truncate(r["response"], 500).split("\n"): | |
| print(f" {line}") | |
| # βββ Summary βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| total = len(TEST_CASES) | |
| print("\n" + "=" * W) | |
| print(f" RESULTS: {passed}/{total} passed | {failed} failed") | |
| if failed == 0: | |
| print(" β All source attribution tests PASSED!") | |
| else: | |
| print(" β Some tests FAILED β check prompt attribution rules above.") | |
| print("=" * W + "\n") | |
| if __name__ == "__main__": | |
| asyncio.run(run_tests()) | |