File size: 4,167 Bytes
00eef43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""

Quick test script for RAG system

Run this to verify everything is working

"""

import os
from dotenv import load_dotenv
from openai import OpenAI
from pathlib import Path

from rag_system import RAGSystem, QueryExpander
from evaluation import RAGEvaluator

# Load environment
load_dotenv(override=True)
openai_client = OpenAI()

print("="*60)
print("πŸ§ͺ RAG System Quick Test")
print("="*60)

# Test 1: Query Expansion
print("\n1️⃣  Testing Query Expansion...")
try:
    expander = QueryExpander(openai_client)
    query = "What are your skills?"
    expanded = expander.expand_query(query, num_variations=2)
    print(f"βœ“ Original: {query}")
    print(f"βœ“ Expanded to {len(expanded)} queries")
    for i, q in enumerate(expanded[1:], 1):
        print(f"  {i}. {q}")
except Exception as e:
    print(f"βœ— Query expansion failed: {e}")

# Test 2: Document Loading
print("\n2️⃣  Testing Document Loading...")
try:
    # Create simple test documents
    test_docs = {
        "doc1": "I have experience with Python, JavaScript, and SQL. I've worked on ML projects.",
        "doc2": "My education includes a degree in Computer Science. I studied AI and databases.",
        "doc3": "I'm passionate about building scalable systems and working with data."
    }
    
    rag_system = RAGSystem(openai_client, data_dir="data_test")
    rag_system.load_knowledge_base(test_docs, chunk_size=100, overlap=20)
    print("βœ“ RAG system initialized")
    print(f"βœ“ Loaded {len(test_docs)} test documents")
except Exception as e:
    print(f"βœ— Document loading failed: {e}")
    exit(1)

# Test 3: Retrieval Methods
print("\n3️⃣  Testing Retrieval Methods...")
test_query = "What programming languages?"

methods_to_test = ["bm25", "semantic", "hybrid", "hybrid_rerank"]

for method in methods_to_test:
    try:
        results = rag_system.retriever.retrieve(
            test_query,
            method=method,
            top_k=2
        )
        print(f"βœ“ {method:15s}: Retrieved {len(results)} documents")
        if results:
            print(f"  Top score: {results[0]['retrieval_score']:.4f}")
    except Exception as e:
        print(f"βœ— {method:15s}: Failed - {e}")

# Test 4: End-to-End RAG Query
print("\n4️⃣  Testing End-to-End RAG Query...")
try:
    system_prompt = "You are answering questions about a person's professional background."
    response = rag_system.query(
        "What programming languages do you know?",
        system_prompt,
        method="hybrid_rerank",
        top_k=3
    )
    
    print("βœ“ Query successful!")
    print(f"βœ“ Retrieved {len(response['context'])} context documents")
    print(f"βœ“ Generated answer ({len(response['answer'])} characters)")
    print(f"\nAnswer preview:\n{response['answer'][:200]}...")
except Exception as e:
    print(f"βœ— RAG query failed: {e}")

# Test 5: LLM-as-Judge
print("\n5️⃣  Testing LLM-as-Judge...")
try:
    evaluator = RAGEvaluator(openai_client)
    
    # Test relevance judgment
    judge_result = evaluator.llm_as_judge_relevance(
        query="What are your programming skills?",
        document="I have experience with Python, JavaScript, and SQL.",
        context="Professional background"
    )
    
    print("βœ“ LLM judge evaluation successful")
    print(f"  Relevance score: {judge_result['relevance_score']}/5")
    print(f"  Explanation: {judge_result['explanation']}")
except Exception as e:
    print(f"βœ— LLM judge failed: {e}")

# Summary
print("\n" + "="*60)
print("βœ… All tests completed!")
print("="*60)
print("\nπŸ’‘ Next steps:")
print("  1. Add your linkedin.pdf to the me/ folder")
print("  2. Edit me/summary.txt with your information")
print("  3. Update NAME in app.py")
print("  4. Run: python app.py")
print("\nπŸ“Š For full evaluation:")
print("  jupyter notebook demo_and_evaluation.ipynb")
print("="*60)

# Cleanup test data
print("\n🧹 Cleaning up test data...")
import shutil
if Path("data_test").exists():
    shutil.rmtree("data_test")
    print("βœ“ Test data cleaned up")