Clocksp commited on
Commit
197d34d
·
verified ·
1 Parent(s): 28ca378

Upload config.py

Browse files
Files changed (1) hide show
  1. src/config.py +123 -0
src/config.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Load environment variables
5
+ load_dotenv()
6
+
7
+ class Config:
8
+ """Configuration class for Insurance Helper RAG application"""
9
+
10
+ # API Keys
11
+ GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
12
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
13
+ QDRANT_URL = os.getenv("QDRANT_HOST")
14
+
15
+ # Gemini Model Configuration
16
+ GEMINI_MODEL = "gemini-2.5-flash"
17
+ GEMINI_TEMPERATURE = 0.2
18
+ GEMINI_MAX_OUTPUT_TOKENS = 2048
19
+
20
+ # Embedding Configuration
21
+ EMBEDDING_MODEL = "models/text-embedding-004"
22
+ EMBEDDING_DIMENSION = 768
23
+
24
+ # PDF Processing Configuration
25
+ CHUNK_SIZE = 1000
26
+ CHUNK_OVERLAP = 200
27
+
28
+ # Chunking separators optimized for insurance documents
29
+ SEPARATORS = [
30
+ "\n\n", # Paragraph breaks
31
+ "\n", # Line breaks
32
+ ". ", # Sentence breaks
33
+ ", ", # Clause breaks
34
+ " ", # Word breaks
35
+ "" # Character breaks
36
+ ]
37
+
38
+ # Qdrant Configuration
39
+ COLLECTION_NAME = "insurance_documents"
40
+ VECTOR_SIZE = EMBEDDING_DIMENSION
41
+ DISTANCE_METRIC = "Cosine"
42
+
43
+ # Retrieval Configuration
44
+ TOP_K = 5
45
+ SIMILARITY_THRESHOLD = 0.7
46
+
47
+ # RAG Prompt Template
48
+ RAG_PROMPT_TEMPLATE = """You are an expert insurance advisor helping users understand their insurance documents.
49
+ Use the following context from insurance documents to answer the user's question.
50
+ Provide clear, accurate information and explain insurance terms in simple language.
51
+
52
+ Context from insurance documents:
53
+ {context}
54
+
55
+ User Question: {question}
56
+
57
+ Instructions:
58
+ - Answer based primarily on the provided context
59
+ - If the context doesn't contain enough information, clearly state that
60
+ - Explain insurance jargon in plain language
61
+ - For add-on recommendations, compare coverage, costs, and benefits
62
+ - Highlight important exclusions or limitations
63
+ - Be precise about coverage details and conditions
64
+
65
+ Answer:"""
66
+
67
+ # System Instructions for Gemini
68
+ SYSTEM_INSTRUCTION = """You are an expert insurance advisor. Your role is to:
69
+ 1. Help users understand complex insurance documents
70
+ 2. Explain insurance terms and conditions in simple language
71
+ 3. Provide recommendations for add-ons based on coverage gaps
72
+ 4. Compare different coverage options clearly
73
+ 5. Highlight important exclusions and limitations
74
+ 6. Always prioritize accuracy and clarity
75
+
76
+ Remember: You provide informational guidance only, not professional insurance advice."""
77
+
78
+ @classmethod
79
+ def validate_config(cls):
80
+ """Validate that all required configuration is present"""
81
+ required_keys = [
82
+ ("GEMINI_API_KEY", cls.GEMINI_API_KEY),
83
+ ("QDRANT_API_KEY", cls.QDRANT_API_KEY),
84
+ ("QDRANT_URL", cls.QDRANT_URL)
85
+ ]
86
+
87
+ missing = [key for key, value in required_keys if not value]
88
+
89
+ if missing:
90
+ raise ValueError(
91
+ f"Missing required environment variables: {', '.join(missing)}\n"
92
+ "Please set them in your .env file"
93
+ )
94
+
95
+ return True
96
+
97
+ @classmethod
98
+ def get_qdrant_config(cls):
99
+ """Returns Qdrant configuration dictionary"""
100
+ return {
101
+ "url": cls.QDRANT_URL,
102
+ "api_key": cls.QDRANT_API_KEY,
103
+ "collection_name": cls.COLLECTION_NAME,
104
+ "vector_size": cls.VECTOR_SIZE,
105
+ "distance": cls.DISTANCE_METRIC
106
+ }
107
+
108
+ @classmethod
109
+ def get_chunking_config(cls):
110
+ """Returns chunking configuration dictionary"""
111
+ return {
112
+ "chunk_size": cls.CHUNK_SIZE,
113
+ "chunk_overlap": cls.CHUNK_OVERLAP,
114
+ "separators": cls.SEPARATORS
115
+ }
116
+
117
+ @classmethod
118
+ def get_retrieval_config(cls):
119
+ """Returns retrieval configuration dictionary"""
120
+ return {
121
+ "top_k": cls.TOP_K,
122
+ "similarity_threshold": cls.SIMILARITY_THRESHOLD
123
+ }