test / tests /test_rag.py
Kirtan001's picture
Fresh Start: Clean Repo without binaries
ad06665
import sys
import os
import time
# Fix Windows Unicode printing issues
if sys.platform == "win32":
sys.stdout.reconfigure(encoding='utf-8')
# Ensure src is in path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from src.rag_engine import SatelliteRAG
def run_tests():
print("Initializing SatelliteRAG Engine...")
try:
engine = SatelliteRAG()
print("[OK] Engine Initialized Successfully.")
except Exception as e:
print(f"[ERROR] Failed to initialize engine: {e}")
return
questions = [
# --- 1. Specific Facts (Previously Failed) ---
"What is the launch mass of Chang'e 5?",
"Who operates the Fengyun satellites?",
"List the instruments on ZY-3 01.",
# --- 2. Inference & Lists ---
"How many communication satellites are there? List examples.",
"What categories of satellites does China have?",
# --- 3. Comparisons ---
"What is the difference between Gaofen 1 and Gaofen 2?",
"Compare the orbits of Beidou-2 and Beidou-3 satellites.",
# --- 4. Specific Details ---
"When was the first Dong Fang Hong satellite launched?",
"What is the purpose of the Shiyan 10 satellite?",
"Which rocket launched the Tianwen-1 mission?",
"What is the resolution of Gaofen 7?",
# --- 5. Aggregation/Counts ---
"List three Earth Observation satellites.",
"Name two satellites launched in 2022.",
# --- 6. Edge Cases / Negative Tests ---
"Who is the CEO of SpaceX?", # Should be unknown/unrelated to context
"What is the price of a ticket to the moon?", # Should be unknown
# --- 7. Complex/Multi-hop ---
"Describe the payload of the TanSat satellite.",
"What is the lifetime of the Tiangong-2 space station?",
"Which satellite uses the Phoenix-Eye-1 bus?",
"Who manufactured the APStar 6D satellite?",
"What is the COSPAR ID of Yaogan 30-01?"
]
print(f"\n[START] Starting Test Suite: {len(questions)} Questions\n" + "="*50)
results = []
for i, q in enumerate(questions, 1):
print(f"\n[Test {i}/{len(questions)}] Question: {q}")
start_time = time.time()
try:
answer, docs = engine.query(q)
elapsed = time.time() - start_time
print(f"[Time]: {elapsed:.2f}s")
print(f"[Answer]: {answer.strip()[:200]}...") # Print first 200 chars
print(f"[Sources]: {[d.metadata.get('name') for d in docs[:3]]}")
# Basic validation logic (can be expanded)
if "I don't have" in answer and "CEO" not in q and "price" not in q:
status = "[POTENTIAL FAIL] (No Data)"
elif "CEO" in q and "I don't have" not in answer:
status = "[POTENTIAL FAIL] (Hallucination)"
else:
status = "[PASS]"
print(f"Status: {status}")
results.append((q, status, elapsed))
except Exception as e:
print(f" ERROR: {e}")
results.append((q, "ERROR", 0))
print("\n" + "="*50)
print("Test Summary")
print("="*50)
for q, status, elapsed in results:
print(f"{status} | {elapsed:.2f}s | {q}")
if __name__ == "__main__":
run_tests()