File size: 2,427 Bytes
cff57a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""Test script to debug query processing for 'Was kostet eine Namensänderung?'"""

import asyncio
import logging
import sys
from src.config import get_config
from src.indexing.indexer import DocumentIndexer
from src.pipeline.orchestrator import RAGOrchestrator

# Configure logging to see all DEBUG messages
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)

# Set specific loggers to INFO to see debug messages
logging.getLogger('src.retrieval.hybrid_retriever').setLevel(logging.INFO)
logging.getLogger('src.retrieval.query_rewriter').setLevel(logging.INFO)


async def test_query():
    """Test the problematic query."""
    print("\n" + "="*80)
    print("Testing query: 'Was kostet eine Namensänderung?'")
    print("="*80 + "\n")

    # Load config
    config = get_config()

    # Initialize indexer (this connects to document store)
    indexer = DocumentIndexer(config)

    # Initialize orchestrator
    orchestrator = RAGOrchestrator(config, indexer)

    # Test query
    query = "Was kostet eine Namensänderung?"

    print(f"\n>>> Running query: '{query}'\n")

    try:
        result = await orchestrator.process_query(query)

        print("\n" + "="*80)
        print("RESULTS SUMMARY")
        print("="*80)
        print(f"Documents retrieved: {len(result.retrieved_docs)}")
        print(f"Processing time: {result.processing_time:.2f}s")
        print(f"\nIntent detected:")
        print(f"  - Action: {result.intent.action_type}")
        print(f"  - Topic: {result.intent.topic}")
        print(f"  - Language: {result.intent.language}")

        if result.retrieved_docs:
            print(f"\nTop 3 retrieved documents:")
            for i, doc in enumerate(result.retrieved_docs[:3], 1):
                print(f"\n  [{i}] Score: {doc.get('score', 'N/A'):.4f}")
                print(f"      Source: {doc.get('meta', {}).get('source_file', 'Unknown')}")
                print(f"      Preview: {doc.get('content', '')[:150]}...")

        print(f"\nEmail draft preview:")
        print(f"  Subject: {result.email_draft.subject}")
        print(f"  Body (first 200 chars): {result.email_draft.body[:200]}...")

    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    asyncio.run(test_query())