import logging import os import sys from datetime import datetime # Adjust the path to import from the root directory sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from retrieval_manager import RetrievalManager # Configure logging logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') # --- Test Configuration --- DB_PATH = "./chroma_db" EMBEDDING_MODEL = 'BAAI/bge-large-en-v1.5' REPORT_DIR = "./logs" REPORT_FILE = os.path.join(REPORT_DIR, f"retrieval_evaluation_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html") # The test set of queries from the retrieval plan EVALUATION_QUERIES = [ { "id": 1, "query": "What laptops do you have?", "expected_collections": ["products"], "notes": "Should return a variety of laptops from the products collection." }, { "id": 2, "query": "Do you have any Gaming laptops?", "expected_collections": ["products"], "notes": "Should return laptops with 'gaming' in their description or specs." }, { "id": 3, "query": "What Lightweight laptops do you have", "expected_collections": ["products"], "notes": "Pure semantic search. Should find laptops described as lightweight, portable, etc." }, { "id": 4, "query": "Budget camera under $300", "expected_collections": ["products"], "notes": "Filters by price (< 300) and performs semantic search for 'Budget camera'." }, { "id": 5, "query": "Share more details on SmartX ProPhone camera reviews", "expected_collections": ["reviews"], "notes": "Should retrieve reviews specifically for the 'SmartX ProPhone'." }, { "id": 6, "query": "What do customers say about battery life of TechPro Ultrabook?", "expected_collections": ["reviews"], "notes": "Semantic search on reviews. Should find reviews mentioning battery about TechPro Ultrabook." }, { "id": 7, "query": "What TV under $500 do you have?", "expected_collections": ["products"], "notes": "Filters by price (< 500) and performs semantic search for 'What TV do you have?'." }, { "id": 8, "query": "What Audio products do you have", "expected_collections": ["products"], "notes": "Should retrieve products from the 'Audio' category." }, { "id": 9, "query": "Customer complaints about Ultrabook", "expected_collections": ["reviews"], "notes": "Should find negative reviews (complaints) for products named 'Ultrabook'." }, { "id": 10, "query": "Compare GameSphere X and Y", "expected_collections": ["products", "reviews"], "notes": "Should retrieve specs for both products and potentially reviews comparing them." } ] def generate_report_header(): """Generates the header for the HTML report.""" header = f""" Retrieval Evaluation Report

Retrieval Evaluation Report

Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

Database Path: {DB_PATH}

Embedding Model: {EMBEDDING_MODEL}

How to Interpret the 'Dist.' (Distance) Value

The Dist. value represents the dissimilarity between the query and the retrieved document. A lower value is better, indicating higher semantic relevance.

""" return header def generate_report_footer(): """Generates the footer for the HTML report.""" return """

Query ID	Query	Expected	Retrieved	Pass/Fail	Notes

""" def run_evaluation(): """ Runs the full evaluation process: executes queries, prints results to console, and generates an HTML report. """ logger.info("--- Starting Retrieval Evaluation Script ---") # --- Pre-run Check --- if not os.path.exists(DB_PATH): logger.error(f"FATAL: ChromaDB path '{DB_PATH}' not found.") logger.error("Please run 'vector_db_manager.py' first to create and populate the database.") return # --- Initialize Manager --- try: retrieval_manager = RetrievalManager(db_path=DB_PATH, model_name=EMBEDDING_MODEL) except Exception as e: logger.error(f"Failed to initialize RetrievalManager: {e}", exc_info=True) return # --- Prepare Report --- os.makedirs(REPORT_DIR, exist_ok=True) report_content = generate_report_header() # --- Execute Queries --- for item in EVALUATION_QUERIES: query_id = item["id"] query = item["query"] print("\n" + "="*80) logger.info(f"Executing Query #{query_id}: '{query}'") print("="*80) search_results = retrieval_manager.search(query) retrieved_summary = [] for collection_name, results in search_results.items(): print(f"\n--- Results from '{collection_name}' collection ---") if results and results.get('documents') and results['documents'][0]: for i, doc_id in enumerate(results['ids'][0]): doc_text = results['documents'][0][i] distance = results['distances'][0][i] metadata = results['metadatas'][0][i] display_text = "" if collection_name == "products": product_name = metadata.get("product_name", "N/A") display_text = f"Product: {product_name}" elif collection_name == "reviews": # Take first 15 words of the review text words = doc_text.split() display_text = "Review: " + " ".join(words[:15]) + ("..." if len(words) > 15 else "") summary_item = f"

{collection_name}: {doc_id} - {display_text} (Dist: {distance:.4f})

" retrieved_summary.append(summary_item) print(f" - Result {i+1} (ID: {doc_id}, Distance: {distance:.4f})") print(f" Type: {collection_name}") print(f" Display Text: {display_text}") print(f" Metadata: {metadata}") print(f" Document: {doc_text[:150].strip()}...") else: print(" No results found in this collection.") # Append to HTML report retrieved_html = f"

{''.join(retrieved_summary)}" if retrieved_summary else "None" report_content += f""" {query_id} {query} {' & '.join(item['expected_collections'])} {retrieved_html} {item['notes']} """ report_content += generate_report_footer() # --- Save Report --- try: with open(REPORT_FILE, "w", encoding="utf-8") as f: f.write(report_content) logger.info(f"Successfully generated evaluation report: {REPORT_FILE}") except IOError as e: logger.error(f"Failed to write report file: {e}") logger.info("--- Retrieval Evaluation Script Finished ---") if __name__ == '__main__': run_evaluation()