File size: 3,928 Bytes
1660eeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197d34d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

class Config:
    """Configuration class for Insurance Helper RAG application"""
    
    # API Keys
    GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
    QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
    QDRANT_URL = os.getenv("QDRANT_HOST")
    
    # Gemini Model Configuration
    GEMINI_MODEL = "gemini-2.5-flash" 
    GEMINI_TEMPERATURE = 0.2 
    GEMINI_MAX_OUTPUT_TOKENS = 2048
    
    # Embedding Configuration
    EMBEDDING_MODEL = "gemini-embedding-001"
    EMBEDDING_DIMENSION = 768
    
    # PDF Processing Configuration
    CHUNK_SIZE = 1000  
    CHUNK_OVERLAP = 200  
    
    # Chunking separators optimized for insurance documents
    SEPARATORS = [
        "\n\n",  # Paragraph breaks
        "\n",    # Line breaks
        ". ",    # Sentence breaks
        ", ",    # Clause breaks
        " ",     # Word breaks
        ""       # Character breaks
    ]
    
    # Qdrant Configuration
    COLLECTION_NAME = "insurance_documents"
    VECTOR_SIZE = EMBEDDING_DIMENSION
    DISTANCE_METRIC = "Cosine"
    
    # Retrieval Configuration
    TOP_K = 5  
    SIMILARITY_THRESHOLD = 0.7  
    
    # RAG Prompt Template
    RAG_PROMPT_TEMPLATE = """You are an expert insurance advisor helping users understand their insurance documents.
Use the following context from insurance documents to answer the user's question.
Provide clear, accurate information and explain insurance terms in simple language.

Context from insurance documents:
{context}

User Question: {question}

Instructions:
- Answer based primarily on the provided context
- If the context doesn't contain enough information, clearly state that
- Explain insurance jargon in plain language
- For add-on recommendations, compare coverage, costs, and benefits
- Highlight important exclusions or limitations
- Be precise about coverage details and conditions

Answer:"""

    # System Instructions for Gemini
    SYSTEM_INSTRUCTION = """You are an expert insurance advisor. Your role is to:
1. Help users understand complex insurance documents
2. Explain insurance terms and conditions in simple language
3. Provide recommendations for add-ons based on coverage gaps
4. Compare different coverage options clearly
5. Highlight important exclusions and limitations
6. Always prioritize accuracy and clarity

Remember: You provide informational guidance only, not professional insurance advice."""

    @classmethod
    def validate_config(cls):
        """Validate that all required configuration is present"""
        required_keys = [
            ("GEMINI_API_KEY", cls.GEMINI_API_KEY),
            ("QDRANT_API_KEY", cls.QDRANT_API_KEY),
            ("QDRANT_URL", cls.QDRANT_URL)
        ]
        
        missing = [key for key, value in required_keys if not value]
        
        if missing:
            raise ValueError(
                f"Missing required environment variables: {', '.join(missing)}\n"
                "Please set them in your .env file"
            )
        
        return True
    
    @classmethod
    def get_qdrant_config(cls):
        """Returns Qdrant configuration dictionary"""
        return {
            "url": cls.QDRANT_URL,
            "api_key": cls.QDRANT_API_KEY,
            "collection_name": cls.COLLECTION_NAME,
            "vector_size": cls.VECTOR_SIZE,
            "distance": cls.DISTANCE_METRIC
        }
    
    @classmethod
    def get_chunking_config(cls):
        """Returns chunking configuration dictionary"""
        return {
            "chunk_size": cls.CHUNK_SIZE,
            "chunk_overlap": cls.CHUNK_OVERLAP,
            "separators": cls.SEPARATORS
        }
    
    @classmethod
    def get_retrieval_config(cls):
        """Returns retrieval configuration dictionary"""
        return {
            "top_k": cls.TOP_K,
            "similarity_threshold": cls.SIMILARITY_THRESHOLD
        }