Clocksp commited on
Commit
1660eeb
·
verified ·
1 Parent(s): d0f35dc

Update src/config.py

Browse files
Files changed (1) hide show
  1. src/config.py +122 -122
src/config.py CHANGED
@@ -1,123 +1,123 @@
1
- import os
2
- from dotenv import load_dotenv
3
-
4
- # Load environment variables
5
- load_dotenv()
6
-
7
- class Config:
8
- """Configuration class for Insurance Helper RAG application"""
9
-
10
- # API Keys
11
- GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
12
- QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
13
- QDRANT_URL = os.getenv("QDRANT_HOST")
14
-
15
- # Gemini Model Configuration
16
- GEMINI_MODEL = "gemini-2.5-flash"
17
- GEMINI_TEMPERATURE = 0.2
18
- GEMINI_MAX_OUTPUT_TOKENS = 2048
19
-
20
- # Embedding Configuration
21
- EMBEDDING_MODEL = "models/text-embedding-004"
22
- EMBEDDING_DIMENSION = 768
23
-
24
- # PDF Processing Configuration
25
- CHUNK_SIZE = 1000
26
- CHUNK_OVERLAP = 200
27
-
28
- # Chunking separators optimized for insurance documents
29
- SEPARATORS = [
30
- "\n\n", # Paragraph breaks
31
- "\n", # Line breaks
32
- ". ", # Sentence breaks
33
- ", ", # Clause breaks
34
- " ", # Word breaks
35
- "" # Character breaks
36
- ]
37
-
38
- # Qdrant Configuration
39
- COLLECTION_NAME = "insurance_documents"
40
- VECTOR_SIZE = EMBEDDING_DIMENSION
41
- DISTANCE_METRIC = "Cosine"
42
-
43
- # Retrieval Configuration
44
- TOP_K = 5
45
- SIMILARITY_THRESHOLD = 0.7
46
-
47
- # RAG Prompt Template
48
- RAG_PROMPT_TEMPLATE = """You are an expert insurance advisor helping users understand their insurance documents.
49
- Use the following context from insurance documents to answer the user's question.
50
- Provide clear, accurate information and explain insurance terms in simple language.
51
-
52
- Context from insurance documents:
53
- {context}
54
-
55
- User Question: {question}
56
-
57
- Instructions:
58
- - Answer based primarily on the provided context
59
- - If the context doesn't contain enough information, clearly state that
60
- - Explain insurance jargon in plain language
61
- - For add-on recommendations, compare coverage, costs, and benefits
62
- - Highlight important exclusions or limitations
63
- - Be precise about coverage details and conditions
64
-
65
- Answer:"""
66
-
67
- # System Instructions for Gemini
68
- SYSTEM_INSTRUCTION = """You are an expert insurance advisor. Your role is to:
69
- 1. Help users understand complex insurance documents
70
- 2. Explain insurance terms and conditions in simple language
71
- 3. Provide recommendations for add-ons based on coverage gaps
72
- 4. Compare different coverage options clearly
73
- 5. Highlight important exclusions and limitations
74
- 6. Always prioritize accuracy and clarity
75
-
76
- Remember: You provide informational guidance only, not professional insurance advice."""
77
-
78
- @classmethod
79
- def validate_config(cls):
80
- """Validate that all required configuration is present"""
81
- required_keys = [
82
- ("GEMINI_API_KEY", cls.GEMINI_API_KEY),
83
- ("QDRANT_API_KEY", cls.QDRANT_API_KEY),
84
- ("QDRANT_URL", cls.QDRANT_URL)
85
- ]
86
-
87
- missing = [key for key, value in required_keys if not value]
88
-
89
- if missing:
90
- raise ValueError(
91
- f"Missing required environment variables: {', '.join(missing)}\n"
92
- "Please set them in your .env file"
93
- )
94
-
95
- return True
96
-
97
- @classmethod
98
- def get_qdrant_config(cls):
99
- """Returns Qdrant configuration dictionary"""
100
- return {
101
- "url": cls.QDRANT_URL,
102
- "api_key": cls.QDRANT_API_KEY,
103
- "collection_name": cls.COLLECTION_NAME,
104
- "vector_size": cls.VECTOR_SIZE,
105
- "distance": cls.DISTANCE_METRIC
106
- }
107
-
108
- @classmethod
109
- def get_chunking_config(cls):
110
- """Returns chunking configuration dictionary"""
111
- return {
112
- "chunk_size": cls.CHUNK_SIZE,
113
- "chunk_overlap": cls.CHUNK_OVERLAP,
114
- "separators": cls.SEPARATORS
115
- }
116
-
117
- @classmethod
118
- def get_retrieval_config(cls):
119
- """Returns retrieval configuration dictionary"""
120
- return {
121
- "top_k": cls.TOP_K,
122
- "similarity_threshold": cls.SIMILARITY_THRESHOLD
123
  }
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Load environment variables
5
+ load_dotenv()
6
+
7
+ class Config:
8
+ """Configuration class for Insurance Helper RAG application"""
9
+
10
+ # API Keys
11
+ GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
12
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
13
+ QDRANT_URL = os.getenv("QDRANT_HOST")
14
+
15
+ # Gemini Model Configuration
16
+ GEMINI_MODEL = "gemini-2.5-flash"
17
+ GEMINI_TEMPERATURE = 0.2
18
+ GEMINI_MAX_OUTPUT_TOKENS = 2048
19
+
20
+ # Embedding Configuration
21
+ EMBEDDING_MODEL = "gemini-embedding-001"
22
+ EMBEDDING_DIMENSION = 768
23
+
24
+ # PDF Processing Configuration
25
+ CHUNK_SIZE = 1000
26
+ CHUNK_OVERLAP = 200
27
+
28
+ # Chunking separators optimized for insurance documents
29
+ SEPARATORS = [
30
+ "\n\n", # Paragraph breaks
31
+ "\n", # Line breaks
32
+ ". ", # Sentence breaks
33
+ ", ", # Clause breaks
34
+ " ", # Word breaks
35
+ "" # Character breaks
36
+ ]
37
+
38
+ # Qdrant Configuration
39
+ COLLECTION_NAME = "insurance_documents"
40
+ VECTOR_SIZE = EMBEDDING_DIMENSION
41
+ DISTANCE_METRIC = "Cosine"
42
+
43
+ # Retrieval Configuration
44
+ TOP_K = 5
45
+ SIMILARITY_THRESHOLD = 0.7
46
+
47
+ # RAG Prompt Template
48
+ RAG_PROMPT_TEMPLATE = """You are an expert insurance advisor helping users understand their insurance documents.
49
+ Use the following context from insurance documents to answer the user's question.
50
+ Provide clear, accurate information and explain insurance terms in simple language.
51
+
52
+ Context from insurance documents:
53
+ {context}
54
+
55
+ User Question: {question}
56
+
57
+ Instructions:
58
+ - Answer based primarily on the provided context
59
+ - If the context doesn't contain enough information, clearly state that
60
+ - Explain insurance jargon in plain language
61
+ - For add-on recommendations, compare coverage, costs, and benefits
62
+ - Highlight important exclusions or limitations
63
+ - Be precise about coverage details and conditions
64
+
65
+ Answer:"""
66
+
67
+ # System Instructions for Gemini
68
+ SYSTEM_INSTRUCTION = """You are an expert insurance advisor. Your role is to:
69
+ 1. Help users understand complex insurance documents
70
+ 2. Explain insurance terms and conditions in simple language
71
+ 3. Provide recommendations for add-ons based on coverage gaps
72
+ 4. Compare different coverage options clearly
73
+ 5. Highlight important exclusions and limitations
74
+ 6. Always prioritize accuracy and clarity
75
+
76
+ Remember: You provide informational guidance only, not professional insurance advice."""
77
+
78
+ @classmethod
79
+ def validate_config(cls):
80
+ """Validate that all required configuration is present"""
81
+ required_keys = [
82
+ ("GEMINI_API_KEY", cls.GEMINI_API_KEY),
83
+ ("QDRANT_API_KEY", cls.QDRANT_API_KEY),
84
+ ("QDRANT_URL", cls.QDRANT_URL)
85
+ ]
86
+
87
+ missing = [key for key, value in required_keys if not value]
88
+
89
+ if missing:
90
+ raise ValueError(
91
+ f"Missing required environment variables: {', '.join(missing)}\n"
92
+ "Please set them in your .env file"
93
+ )
94
+
95
+ return True
96
+
97
+ @classmethod
98
+ def get_qdrant_config(cls):
99
+ """Returns Qdrant configuration dictionary"""
100
+ return {
101
+ "url": cls.QDRANT_URL,
102
+ "api_key": cls.QDRANT_API_KEY,
103
+ "collection_name": cls.COLLECTION_NAME,
104
+ "vector_size": cls.VECTOR_SIZE,
105
+ "distance": cls.DISTANCE_METRIC
106
+ }
107
+
108
+ @classmethod
109
+ def get_chunking_config(cls):
110
+ """Returns chunking configuration dictionary"""
111
+ return {
112
+ "chunk_size": cls.CHUNK_SIZE,
113
+ "chunk_overlap": cls.CHUNK_OVERLAP,
114
+ "separators": cls.SEPARATORS
115
+ }
116
+
117
+ @classmethod
118
+ def get_retrieval_config(cls):
119
+ """Returns retrieval configuration dictionary"""
120
+ return {
121
+ "top_k": cls.TOP_K,
122
+ "similarity_threshold": cls.SIMILARITY_THRESHOLD
123
  }