ChAbhishek28 commited on
Commit
4a1bc78
·
1 Parent(s): 9341027

Add 89999999999999999999999999

Browse files
Files changed (2) hide show
  1. enhanced_websocket_handler.py +77 -9
  2. rag_service.py +271 -30
enhanced_websocket_handler.py CHANGED
@@ -27,15 +27,48 @@ hybrid_llm_service = HybridLLMService()
27
  logger = logging.getLogger("voicebot")
28
 
29
  def analyze_query_context(query: str) -> dict:
30
- """Analyze query to determine if it's document-related or general"""
31
  query_lower = query.lower()
32
 
33
- # Government document keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  doc_keywords = [
35
  'pension', 'leave', 'allowance', 'da', 'dearness', 'procurement', 'tender',
36
  'medical', 'reimbursement', 'transfer', 'posting', 'promotion', 'service',
37
  'rules', 'policy', 'government', 'circular', 'notification', 'benefits',
38
- 'gratuity', 'provident fund', 'retirement', 'salary', 'pay commission'
 
 
39
  ]
40
 
41
  # General conversation keywords
@@ -45,14 +78,26 @@ def analyze_query_context(query: str) -> dict:
45
  'time', 'date', 'joke', 'story', 'song', 'recipe', 'movie'
46
  ]
47
 
48
- # Count matches
 
 
 
 
 
 
 
 
 
 
 
 
49
  doc_matches = sum(1 for kw in doc_keywords if kw in query_lower)
50
  general_matches = sum(1 for kw in general_keywords if kw in query_lower)
51
 
52
  # Determine query type
53
- if doc_matches > 0:
54
  query_type = "document_related"
55
- confidence = min(doc_matches * 0.3, 1.0)
56
  elif general_matches > 0:
57
  query_type = "general_conversation"
58
  confidence = min(general_matches * 0.4, 1.0)
@@ -67,7 +112,9 @@ def analyze_query_context(query: str) -> dict:
67
  "type": query_type,
68
  "confidence": confidence,
69
  "doc_keywords_found": doc_matches,
70
- "general_keywords_found": general_matches
 
 
71
  }
72
 
73
  async def generate_llm_fallback_response(user_message: str, query_context: dict) -> str:
@@ -76,12 +123,33 @@ async def generate_llm_fallback_response(user_message: str, query_context: dict)
76
  # Determine which LLM to use based on query complexity
77
  provider = hybrid_llm_service.choose_llm_provider(user_message)
78
 
79
- # Create appropriate system prompt based on query type
 
80
  if query_context.get("type") == "general_conversation":
81
  system_prompt = """You are a helpful assistant for a government document system.
82
  The user is asking a general question not related to government documents.
83
  Provide a friendly, helpful response and gently guide them to ask about government policies,
84
- pension rules, leave policies, or other administrative matters if they need official information."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  else:
86
  system_prompt = """You are an AI assistant for government document queries.
87
  The user asked about something that wasn't found in the document database.
 
27
  logger = logging.getLogger("voicebot")
28
 
29
  def analyze_query_context(query: str) -> dict:
30
+ """Analyze query to determine if it's document-related or general, and identify user role"""
31
  query_lower = query.lower()
32
 
33
+ # Role-specific keywords and queries
34
+ role_patterns = {
35
+ 'pension_beneficiary': [
36
+ 'pension eligibility', 'pension documents', 'pension application', 'retirement benefits',
37
+ 'pension calculation', 'pension amount', 'family pension', 'commutation',
38
+ 'gratuity eligibility', 'provident fund withdrawal', 'medical benefits after retirement',
39
+ 'pension certificate', 'life certificate', 'pension arrears', 'how to apply pension',
40
+ 'pension office', 'pension disbursement', 'pension inquiry', 'pension status'
41
+ ],
42
+ 'procurement_officer': [
43
+ 'tender process', 'bid submission', 'procurement thresholds', 'gem portal',
44
+ 'msme relaxation', 'vendor registration', 'procurement checklist', 'bid evaluation',
45
+ 'tender documents', 'procurement rules', 'bidding process', 'contract award',
46
+ 'procurement guidelines', 'tender notice', 'technical bid', 'financial bid',
47
+ 'procurement manual', 'vendor empanelment', 'tender committee'
48
+ ],
49
+ 'finance_staff': [
50
+ 'sanctioning authority', 'financial approval', 'budget allocation', 'expenditure sanction',
51
+ 'financial registers', 'audit compliance', 'treasury rules', 'payment authorization',
52
+ 'financial delegation', 'budget utilization', 'fund release', 'financial procedure',
53
+ 'accounting rules', 'financial reporting', 'expenditure control', 'financial audit',
54
+ 'cash book', 'voucher processing', 'financial clearance'
55
+ ],
56
+ 'leadership_policymaker': [
57
+ 'policy impact', 'scenario analysis', 'cost comparison', 'policy implementation',
58
+ 'evidence pack', 'policy evaluation', 'impact assessment', 'strategic planning',
59
+ 'policy formulation', 'comparative analysis', 'policy review', 'governance framework',
60
+ 'administrative reform', 'policy effectiveness', 'decision support', 'policy brief'
61
+ ]
62
+ }
63
+
64
+ # Government document keywords (expanded)
65
  doc_keywords = [
66
  'pension', 'leave', 'allowance', 'da', 'dearness', 'procurement', 'tender',
67
  'medical', 'reimbursement', 'transfer', 'posting', 'promotion', 'service',
68
  'rules', 'policy', 'government', 'circular', 'notification', 'benefits',
69
+ 'gratuity', 'provident fund', 'retirement', 'salary', 'pay commission',
70
+ 'eligibility', 'documents', 'application', 'process', 'approval', 'sanction',
71
+ 'audit', 'finance', 'budget', 'expenditure', 'treasury', 'guidelines'
72
  ]
73
 
74
  # General conversation keywords
 
78
  'time', 'date', 'joke', 'story', 'song', 'recipe', 'movie'
79
  ]
80
 
81
+ # Detect user role
82
+ detected_role = None
83
+ role_confidence = 0.0
84
+
85
+ for role, patterns in role_patterns.items():
86
+ role_matches = sum(1 for pattern in patterns if pattern in query_lower)
87
+ if role_matches > 0:
88
+ current_confidence = min(role_matches * 0.4, 1.0)
89
+ if current_confidence > role_confidence:
90
+ detected_role = role
91
+ role_confidence = current_confidence
92
+
93
+ # Count general matches
94
  doc_matches = sum(1 for kw in doc_keywords if kw in query_lower)
95
  general_matches = sum(1 for kw in general_keywords if kw in query_lower)
96
 
97
  # Determine query type
98
+ if doc_matches > 0 or detected_role:
99
  query_type = "document_related"
100
+ confidence = max(min(doc_matches * 0.3, 1.0), role_confidence)
101
  elif general_matches > 0:
102
  query_type = "general_conversation"
103
  confidence = min(general_matches * 0.4, 1.0)
 
112
  "type": query_type,
113
  "confidence": confidence,
114
  "doc_keywords_found": doc_matches,
115
+ "general_keywords_found": general_matches,
116
+ "detected_role": detected_role,
117
+ "role_confidence": role_confidence
118
  }
119
 
120
  async def generate_llm_fallback_response(user_message: str, query_context: dict) -> str:
 
123
  # Determine which LLM to use based on query complexity
124
  provider = hybrid_llm_service.choose_llm_provider(user_message)
125
 
126
+ # Create role-aware system prompt
127
+ detected_role = query_context.get("detected_role")
128
  if query_context.get("type") == "general_conversation":
129
  system_prompt = """You are a helpful assistant for a government document system.
130
  The user is asking a general question not related to government documents.
131
  Provide a friendly, helpful response and gently guide them to ask about government policies,
132
+ pension rules, leave policies, procurement procedures, or other administrative matters if they need official information."""
133
+ elif detected_role == "pension_beneficiary":
134
+ system_prompt = """You are an AI assistant specializing in government pension and retirement benefits.
135
+ The user appears to be a pension beneficiary or claimant. Provide helpful information about pension eligibility,
136
+ application processes, required documents, and procedures. Always remind them to verify information with
137
+ the pension disbursing authority and consult official government sources for the most current rules."""
138
+ elif detected_role == "procurement_officer":
139
+ system_prompt = """You are an AI assistant specializing in government procurement procedures.
140
+ The user appears to be involved in procurement or bidding processes. Provide helpful information about
141
+ tender procedures, MSME benefits, GeM portal usage, and procurement guidelines. Always remind them to
142
+ follow current procurement rules and consult the latest government circulars."""
143
+ elif detected_role == "finance_staff":
144
+ system_prompt = """You are an AI assistant specializing in government financial procedures.
145
+ The user appears to be finance staff. Provide helpful information about sanctioning procedures,
146
+ budget management, audit compliance, and treasury rules. Always remind them to follow current
147
+ financial rules and consult with the accounts department for official procedures."""
148
+ elif detected_role == "leadership_policymaker":
149
+ system_prompt = """You are an AI assistant specializing in policy analysis and decision support.
150
+ The user appears to be in a leadership or policy-making role. Provide helpful information about
151
+ policy impact analysis, evidence-based decision making, and strategic planning. Always recommend
152
+ consulting with relevant departments and conducting proper stakeholder consultations."""
153
  else:
154
  system_prompt = """You are an AI assistant for government document queries.
155
  The user asked about something that wasn't found in the document database.
rag_service.py CHANGED
@@ -11,6 +11,149 @@ import asyncio
11
 
12
  logger = logging.getLogger("voicebot")
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Fallback content for when database is empty
15
  FALLBACK_CONTENT = {
16
  "pension": """Pension is a regular payment made during a person's retirement from an investment fund. For government employees in India, pension includes:
@@ -47,7 +190,42 @@ DA 6% Increment Impact:
47
  - Monthly credit via NEFT
48
 
49
  3. Benefits include pension, gratuity, and provident fund
50
- 4. Enhanced benefits for teachers and staff"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
 
53
  def get_fallback_content(query: str) -> List[Dict[str, Any]]:
@@ -129,6 +307,22 @@ def get_fallback_content(query: str) -> List[Dict[str, Any]]:
129
  "Obtain necessary approvals",
130
  "Maintain leave records"
131
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  else:
133
  fallback_text = f"I understand you're asking about '{query}'. While I don't have specific documents loaded for this query, I can help with government policies, pension rules, allowances, and administrative procedures. Please try rephrasing your question or ask about specific government benefits."
134
  checklist = [
@@ -199,8 +393,40 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
199
  # Enhance query for better relevance based on category
200
  enhanced_query = query
201
 
202
- # Pension queries
203
- if "pension" in query_lower:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  if any(word in query_lower for word in ["changes", "impact", "rules"]):
205
  enhanced_query = f"{query} pension rules retirement benefits modifications"
206
  elif "calculation" in query_lower or "formula" in query_lower:
@@ -225,6 +451,18 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
225
  # Transfer queries
226
  elif any(word in query_lower for word in ["transfer", "posting"]):
227
  enhanced_query = f"{query} transfer posting policy rules"
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  logger.info(f"🔍 Enhanced query: '{enhanced_query}' (original: '{query}')")
230
 
@@ -265,8 +503,19 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
265
  # Calculate relevance score based on query intent
266
  relevance_score = getattr(doc, 'score', 0.5) # Base score
267
 
268
- # Define query categories and their keywords
269
  query_categories = {
 
 
 
 
 
 
 
 
 
 
 
270
  'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'gpf', 'cpf', 'superannuation'],
271
  'leave': ['leave', 'casual leave', 'earned leave', 'medical leave', 'maternity', 'paternity'],
272
  'allowance': ['allowance', 'dearness allowance', 'da', 'hra', 'house rent', 'travel allowance', 'increment'],
@@ -274,11 +523,22 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
274
  'medical': ['medical', 'health', 'treatment', 'reimbursement', 'cghs', 'hospital'],
275
  'transfer': ['transfer', 'posting', 'deputation', 'cadre'],
276
  'promotion': ['promotion', 'seniority', 'grade', 'advancement', 'career progression'],
277
- 'service': ['service', 'conduct', 'discipline', 'rules', 'regulation']
 
 
 
278
  }
279
 
280
- # Content categories - what each document type contains
281
  content_categories = {
 
 
 
 
 
 
 
 
282
  'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'superannuation', 'benefits'],
283
  'leave': ['leave', 'casual', 'earned', 'medical leave', 'maternity'],
284
  'allowance': ['allowance', 'dearness', 'house rent', 'travel', 'da', 'hra', 'increment'],
@@ -286,6 +546,9 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
286
  'medical': ['medical', 'health', 'cghs', 'reimbursement', 'treatment'],
287
  'transfer': ['transfer', 'posting', 'deputation', 'cadre'],
288
  'promotion': ['promotion', 'seniority', 'grade pay', 'advancement'],
 
 
 
289
  'service': ['service rules', 'conduct', 'discipline', 'misconduct']
290
  }
291
 
@@ -334,30 +597,8 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
334
  clause_text = doc.page_content
335
  # Simple extractive summary: first sentence or up to 2 lines
336
  summary = clause_text.split(". ")[0][:180] + ("..." if len(clause_text) > 180 else "")
337
- # Role-aware checklist logic (basic template)
338
- role_checklist = []
339
- query_lower = query.lower()
340
- if "pension" in query_lower:
341
- role_checklist = [
342
- "Check eligibility (service years, misconduct)",
343
- "Collect required documents (service book, ID, proof)",
344
- "Obtain approvals (sanctioning authority)",
345
- "Submit application to pension office"
346
- ]
347
- elif "procurement" in query_lower or "bid" in query_lower:
348
- role_checklist = [
349
- "Review procurement thresholds and MSME relaxations",
350
- "Prepare bid documents",
351
- "Complete registration and approvals",
352
- "Submit bid before deadline"
353
- ]
354
- elif "finance" in query_lower:
355
- role_checklist = [
356
- "Check sanctioning steps",
357
- "Update registers",
358
- "Obtain necessary approvals",
359
- "Notify stakeholders"
360
- ]
361
  results.append({
362
  "clause_text": clause_text,
363
  "summary": summary,
 
11
 
12
  logger = logging.getLogger("voicebot")
13
 
14
+ def generate_role_based_checklist(query: str, content: str) -> list:
15
+ """Generate role-specific checklists based on query and content"""
16
+ query_lower = query.lower()
17
+ content_lower = content.lower() if content else ""
18
+
19
+ # Pension Beneficiaries & Claimants
20
+ if any(phrase in query_lower for phrase in ['pension eligibility', 'pension documents', 'how to apply pension', 'pension application']):
21
+ return [
22
+ "Verify service eligibility (minimum 10 years qualifying service)",
23
+ "Gather required documents (service book, PPO, identity proof)",
24
+ "Check for any departmental proceedings or vigilance cases",
25
+ "Apply through proper channel 6 months before retirement",
26
+ "Follow up with pension disbursing authority for processing"
27
+ ]
28
+ elif any(phrase in query_lower for phrase in ['family pension', 'widow pension', 'dependent pension']):
29
+ return [
30
+ "Obtain death certificate and service documents of deceased employee",
31
+ "Submit family pension application with nominee details",
32
+ "Provide proof of relationship and dependency",
33
+ "Get certificate from employer about last drawn salary",
34
+ "Register with pension disbursing bank for regular payments"
35
+ ]
36
+ elif 'pension calculation' in query_lower or 'pension amount' in query_lower:
37
+ return [
38
+ "Collect last pay certificate with basic pay and DA details",
39
+ "Calculate qualifying service excluding breaks/suspensions",
40
+ "Apply pension formula: (Last pay × service years) ÷ 70",
41
+ "Check for minimum pension ceiling and DA applicability",
42
+ "Verify commutation options if considering lump sum"
43
+ ]
44
+
45
+ # Procurement Officers & Bidders
46
+ elif any(phrase in query_lower for phrase in ['tender process', 'bid submission', 'procurement threshold']):
47
+ return [
48
+ "Verify procurement threshold limits and delegation of powers",
49
+ "Check MSME purchase preference and price benefits applicable",
50
+ "Ensure technical specifications are non-discriminatory",
51
+ "Follow mandatory e-procurement process through GeM/portal",
52
+ "Maintain proper documentation for audit trail"
53
+ ]
54
+ elif any(phrase in query_lower for phrase in ['msme relaxation', 'msme benefits']):
55
+ return [
56
+ "Verify MSME registration certificate validity",
57
+ "Apply 15% price preference for MSME quotes",
58
+ "Check exemption from EMD (Earnest Money Deposit)",
59
+ "Ensure MSME gets advance payment facility if applicable",
60
+ "Follow tender splitting norms for MSME participation"
61
+ ]
62
+ elif any(phrase in query_lower for phrase in ['gem portal', 'vendor registration']):
63
+ return [
64
+ "Complete vendor registration on Government e-Marketplace",
65
+ "Upload all required business documents and certificates",
66
+ "Get product/service catalog approved by GeM",
67
+ "Maintain competitive pricing and service ratings",
68
+ "Respond promptly to buyer inquiries and orders"
69
+ ]
70
+
71
+ # Finance Staff
72
+ elif any(phrase in query_lower for phrase in ['sanctioning authority', 'financial approval', 'expenditure sanction']):
73
+ return [
74
+ "Verify delegated financial powers and approval limits",
75
+ "Check budget provision and availability of funds",
76
+ "Ensure compliance with financial rules and procedures",
77
+ "Obtain necessary pre-audit clearance if required",
78
+ "Maintain proper accounting and audit trail"
79
+ ]
80
+ elif any(phrase in query_lower for phrase in ['budget allocation', 'fund release']):
81
+ return [
82
+ "Verify budget allocation in approved estimates",
83
+ "Check fund availability in treasury/bank account",
84
+ "Ensure proper budget head classification",
85
+ "Follow fund release schedule and priority guidelines",
86
+ "Update budget utilization registers promptly"
87
+ ]
88
+ elif any(phrase in query_lower for phrase in ['audit compliance', 'financial audit']):
89
+ return [
90
+ "Maintain all vouchers and supporting documents",
91
+ "Ensure transactions are recorded in proper registers",
92
+ "Respond to audit queries within stipulated time",
93
+ "Implement audit recommendations and report compliance",
94
+ "Conduct internal audit and review before external audit"
95
+ ]
96
+
97
+ # Leadership & Policymakers
98
+ elif any(phrase in query_lower for phrase in ['policy impact', 'scenario analysis']):
99
+ return [
100
+ "Gather baseline data and impact measurement parameters",
101
+ "Conduct stakeholder consultation and feedback analysis",
102
+ "Prepare cost-benefit analysis for different scenarios",
103
+ "Assess implementation feasibility and resource requirements",
104
+ "Develop monitoring and evaluation framework"
105
+ ]
106
+ elif any(phrase in query_lower for phrase in ['evidence pack', 'policy brief']):
107
+ return [
108
+ "Compile relevant policy documents and legal framework",
109
+ "Gather statistical data and trend analysis",
110
+ "Include comparative analysis from other states/countries",
111
+ "Prepare executive summary with key recommendations",
112
+ "Ensure all sources are cited and verifiable"
113
+ ]
114
+
115
+ # General categories with enhanced checklists
116
+ elif "pension" in query_lower:
117
+ return [
118
+ "Check eligibility criteria and service requirements",
119
+ "Collect required documents (service book, PPO, ID proof)",
120
+ "Obtain necessary approvals and clearances",
121
+ "Submit application through proper channel",
122
+ "Follow up with pension office for processing status"
123
+ ]
124
+ elif any(word in query_lower for word in ["procurement", "tender", "bid"]):
125
+ return [
126
+ "Review procurement guidelines and threshold limits",
127
+ "Check MSME relaxations and price preferences",
128
+ "Prepare comprehensive bid documents",
129
+ "Ensure compliance with technical specifications",
130
+ "Submit bid through approved e-procurement platform"
131
+ ]
132
+ elif any(word in query_lower for word in ["finance", "budget", "expenditure"]):
133
+ return [
134
+ "Verify financial delegation and approval limits",
135
+ "Check budget provision and fund availability",
136
+ "Ensure compliance with treasury and accounting rules",
137
+ "Maintain proper documentation for audit",
138
+ "Update financial registers and reports"
139
+ ]
140
+ elif "leave" in query_lower:
141
+ return [
142
+ "Check leave balance and entitlement",
143
+ "Follow prescribed application procedure",
144
+ "Obtain necessary approvals from competent authority",
145
+ "Arrange work coverage during leave period",
146
+ "Update attendance records upon return"
147
+ ]
148
+ else:
149
+ return [
150
+ "Review relevant policy guidelines and procedures",
151
+ "Consult with appropriate authorities if needed",
152
+ "Ensure compliance with applicable rules",
153
+ "Maintain proper documentation",
154
+ "Seek clarification for any doubts"
155
+ ]
156
+
157
  # Fallback content for when database is empty
158
  FALLBACK_CONTENT = {
159
  "pension": """Pension is a regular payment made during a person's retirement from an investment fund. For government employees in India, pension includes:
 
190
  - Monthly credit via NEFT
191
 
192
  3. Benefits include pension, gratuity, and provident fund
193
+ 4. Enhanced benefits for teachers and staff""",
194
+
195
+ "procurement": """Government Procurement Guidelines:
196
+
197
+ 1. Threshold Limits:
198
+ - Goods: ₹25,000 to ₹25 lakh (departmental purchase committee)
199
+ - Works: ₹1 lakh to ₹5 crore (various committees)
200
+ - Services: As per delegation of powers
201
+
202
+ 2. MSME Benefits:
203
+ - 15% price preference in competitive bids
204
+ - Exemption from EMD (Earnest Money Deposit)
205
+ - No tender fee for MSME enterprises
206
+ - Advance payment facility available
207
+
208
+ 3. GeM Portal Usage:
209
+ - Mandatory for central government purchases
210
+ - Direct purchase up to ₹5 lakh
211
+ - Rate contract for common items""",
212
+
213
+ "finance": """Financial Management Guidelines:
214
+
215
+ 1. Sanctioning Authority:
216
+ - As per delegation of financial powers
217
+ - Budget provision must be available
218
+ - Pre-audit clearance where required
219
+
220
+ 2. Documentation:
221
+ - All expenditure must have proper vouchers
222
+ - Budget registers to be maintained
223
+ - Audit trail for all transactions
224
+
225
+ 3. Treasury Rules:
226
+ - Follow prescribed payment procedures
227
+ - Maintain cash book and other registers
228
+ - Submit periodic returns and statements"""
229
  }
230
 
231
  def get_fallback_content(query: str) -> List[Dict[str, Any]]:
 
307
  "Obtain necessary approvals",
308
  "Maintain leave records"
309
  ]
310
+ elif any(word in query_lower for word in ["audit", "financial", "budget", "expenditure", "accounts"]):
311
+ fallback_text = f"Regarding your query about '{query}', government financial audits and accounts are typically maintained at departmental and central levels. Financial audits cover budget utilization, expenditure patterns, and compliance with financial rules. For specific audit reports, you would need to access official government finance portals or contact the concerned audit department."
312
+ checklist = [
313
+ "Contact Controller and Auditor General (CAG) office",
314
+ "Check government finance portals for audit reports",
315
+ "Request specific financial year audit documents",
316
+ "Verify with concerned department's accounts section"
317
+ ]
318
+ elif any(word in query_lower for word in ["training", "development", "skill", "course"]):
319
+ fallback_text = f"Regarding your query about '{query}', government training and development programs are designed to enhance employee capabilities. These include induction training, skill development courses, leadership programs, and specialized technical training through various government training institutes."
320
+ checklist = [
321
+ "Check available training programs in your department",
322
+ "Contact training institutes for course details",
323
+ "Apply for relevant skill development programs",
324
+ "Utilize online learning platforms like iGOT Karmayogi"
325
+ ]
326
  else:
327
  fallback_text = f"I understand you're asking about '{query}'. While I don't have specific documents loaded for this query, I can help with government policies, pension rules, allowances, and administrative procedures. Please try rephrasing your question or ask about specific government benefits."
328
  checklist = [
 
393
  # Enhance query for better relevance based on category
394
  enhanced_query = query
395
 
396
+ # Role-specific query enhancement
397
+
398
+ # Pension Beneficiary queries
399
+ if any(word in query_lower for word in ["pension eligibility", "pension documents", "how to apply pension", "pension certificate"]):
400
+ enhanced_query = f"{query} pension eligibility documents application process beneficiary requirements"
401
+ elif any(word in query_lower for word in ["family pension", "widow pension", "dependent pension"]):
402
+ enhanced_query = f"{query} family pension eligibility widow dependent benefits"
403
+ elif any(word in query_lower for word in ["pension calculation", "pension amount", "pension formula"]):
404
+ enhanced_query = f"{query} pension calculation formula amount computation service years"
405
+
406
+ # Procurement Officer queries
407
+ elif any(word in query_lower for word in ["tender process", "bid submission", "procurement threshold"]):
408
+ enhanced_query = f"{query} procurement tender bidding process thresholds guidelines"
409
+ elif any(word in query_lower for word in ["msme relaxation", "msme benefits", "small scale industry"]):
410
+ enhanced_query = f"{query} msme relaxation benefits procurement small scale industry"
411
+ elif any(word in query_lower for word in ["gem portal", "vendor registration", "vendor empanelment"]):
412
+ enhanced_query = f"{query} gem portal vendor registration empanelment process"
413
+
414
+ # Finance Staff queries
415
+ elif any(word in query_lower for word in ["sanctioning authority", "financial approval", "expenditure sanction"]):
416
+ enhanced_query = f"{query} sanctioning authority financial approval expenditure delegation"
417
+ elif any(word in query_lower for word in ["budget allocation", "fund release", "treasury"]):
418
+ enhanced_query = f"{query} budget allocation fund release treasury rules procedures"
419
+ elif any(word in query_lower for word in ["audit compliance", "financial audit", "audit report"]):
420
+ enhanced_query = f"{query} audit compliance financial audit reporting procedures"
421
+
422
+ # Leadership/Policymaker queries
423
+ elif any(word in query_lower for word in ["policy impact", "scenario analysis", "comparative analysis"]):
424
+ enhanced_query = f"{query} policy impact scenario analysis comparison evidence"
425
+ elif any(word in query_lower for word in ["evidence pack", "policy brief", "decision support"]):
426
+ enhanced_query = f"{query} evidence pack policy brief decision support documentation"
427
+
428
+ # General category queries
429
+ elif "pension" in query_lower:
430
  if any(word in query_lower for word in ["changes", "impact", "rules"]):
431
  enhanced_query = f"{query} pension rules retirement benefits modifications"
432
  elif "calculation" in query_lower or "formula" in query_lower:
 
451
  # Transfer queries
452
  elif any(word in query_lower for word in ["transfer", "posting"]):
453
  enhanced_query = f"{query} transfer posting policy rules"
454
+
455
+ # Audit and financial queries
456
+ elif any(word in query_lower for word in ["audit", "financial", "budget", "expenditure", "accounts", "finance"]):
457
+ enhanced_query = f"{query} audit financial budget expenditure accounts"
458
+
459
+ # Training and development queries
460
+ elif any(word in query_lower for word in ["training", "development", "course", "skill"]):
461
+ enhanced_query = f"{query} training development skill course capacity building"
462
+
463
+ # Salary and pay queries
464
+ elif any(word in query_lower for word in ["salary", "pay", "grade", "scale"]):
465
+ enhanced_query = f"{query} salary pay grade scale compensation"
466
 
467
  logger.info(f"🔍 Enhanced query: '{enhanced_query}' (original: '{query}')")
468
 
 
503
  # Calculate relevance score based on query intent
504
  relevance_score = getattr(doc, 'score', 0.5) # Base score
505
 
506
+ # Define query categories and their keywords (including role-specific)
507
  query_categories = {
508
+ 'pension_beneficiary': ['pension eligibility', 'pension documents', 'pension application', 'pension certificate',
509
+ 'family pension', 'pension calculation', 'how to apply pension', 'pension office',
510
+ 'life certificate', 'pension arrears', 'commutation', 'gratuity eligibility'],
511
+ 'procurement_officer': ['tender process', 'bid submission', 'procurement threshold', 'gem portal',
512
+ 'msme relaxation', 'vendor registration', 'procurement checklist', 'bid evaluation',
513
+ 'tender documents', 'bidding process', 'contract award', 'vendor empanelment'],
514
+ 'finance_staff': ['sanctioning authority', 'financial approval', 'budget allocation', 'expenditure sanction',
515
+ 'financial registers', 'audit compliance', 'treasury rules', 'payment authorization',
516
+ 'financial delegation', 'fund release', 'financial procedure', 'voucher processing'],
517
+ 'leadership_policymaker': ['policy impact', 'scenario analysis', 'evidence pack', 'policy brief',
518
+ 'comparative analysis', 'decision support', 'policy evaluation', 'impact assessment'],
519
  'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'gpf', 'cpf', 'superannuation'],
520
  'leave': ['leave', 'casual leave', 'earned leave', 'medical leave', 'maternity', 'paternity'],
521
  'allowance': ['allowance', 'dearness allowance', 'da', 'hra', 'house rent', 'travel allowance', 'increment'],
 
523
  'medical': ['medical', 'health', 'treatment', 'reimbursement', 'cghs', 'hospital'],
524
  'transfer': ['transfer', 'posting', 'deputation', 'cadre'],
525
  'promotion': ['promotion', 'seniority', 'grade', 'advancement', 'career progression'],
526
+ 'service': ['service', 'conduct', 'discipline', 'rules', 'regulation'],
527
+ 'audit': ['audit', 'financial', 'budget', 'expenditure', 'accounts', 'finance', 'yearly', 'annual'],
528
+ 'training': ['training', 'development', 'skill', 'course', 'capacity building', 'learning'],
529
+ 'salary': ['salary', 'pay', 'grade', 'scale', 'compensation', 'structure']
530
  }
531
 
532
+ # Content categories - what each document type contains (including role-specific)
533
  content_categories = {
534
+ 'pension_beneficiary': ['pension', 'retirement', 'eligibility', 'documents', 'application', 'certificate',
535
+ 'family pension', 'gratuity', 'commutation', 'beneficiary', 'process', 'office'],
536
+ 'procurement_officer': ['procurement', 'tender', 'bidding', 'contract', 'vendor', 'gem', 'msme',
537
+ 'threshold', 'process', 'evaluation', 'registration', 'checklist', 'guidelines'],
538
+ 'finance_staff': ['sanctioning', 'approval', 'budget', 'expenditure', 'treasury', 'audit',
539
+ 'financial', 'fund', 'payment', 'authorization', 'delegation', 'registers'],
540
+ 'leadership_policymaker': ['policy', 'impact', 'scenario', 'analysis', 'evidence', 'comparative',
541
+ 'evaluation', 'decision', 'strategic', 'implementation', 'assessment'],
542
  'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'superannuation', 'benefits'],
543
  'leave': ['leave', 'casual', 'earned', 'medical leave', 'maternity'],
544
  'allowance': ['allowance', 'dearness', 'house rent', 'travel', 'da', 'hra', 'increment'],
 
546
  'medical': ['medical', 'health', 'cghs', 'reimbursement', 'treatment'],
547
  'transfer': ['transfer', 'posting', 'deputation', 'cadre'],
548
  'promotion': ['promotion', 'seniority', 'grade pay', 'advancement'],
549
+ 'audit': ['audit', 'financial', 'budget', 'expenditure', 'accounts', 'finance', 'yearly'],
550
+ 'training': ['training', 'development', 'skill', 'course', 'capacity', 'learning'],
551
+ 'salary': ['salary', 'pay', 'grade', 'scale', 'compensation', 'structure'],
552
  'service': ['service rules', 'conduct', 'discipline', 'misconduct']
553
  }
554
 
 
597
  clause_text = doc.page_content
598
  # Simple extractive summary: first sentence or up to 2 lines
599
  summary = clause_text.split(". ")[0][:180] + ("..." if len(clause_text) > 180 else "")
600
+ # Enhanced role-aware checklist logic
601
+ role_checklist = generate_role_based_checklist(query, clause_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
  results.append({
603
  "clause_text": clause_text,
604
  "summary": summary,