Spaces:
Runtime error
Runtime error
Commit ·
4a1bc78
1
Parent(s): 9341027
Add 89999999999999999999999999
Browse files- enhanced_websocket_handler.py +77 -9
- rag_service.py +271 -30
enhanced_websocket_handler.py
CHANGED
|
@@ -27,15 +27,48 @@ hybrid_llm_service = HybridLLMService()
|
|
| 27 |
logger = logging.getLogger("voicebot")
|
| 28 |
|
| 29 |
def analyze_query_context(query: str) -> dict:
|
| 30 |
-
"""Analyze query to determine if it's document-related or general"""
|
| 31 |
query_lower = query.lower()
|
| 32 |
|
| 33 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
doc_keywords = [
|
| 35 |
'pension', 'leave', 'allowance', 'da', 'dearness', 'procurement', 'tender',
|
| 36 |
'medical', 'reimbursement', 'transfer', 'posting', 'promotion', 'service',
|
| 37 |
'rules', 'policy', 'government', 'circular', 'notification', 'benefits',
|
| 38 |
-
'gratuity', 'provident fund', 'retirement', 'salary', 'pay commission'
|
|
|
|
|
|
|
| 39 |
]
|
| 40 |
|
| 41 |
# General conversation keywords
|
|
@@ -45,14 +78,26 @@ def analyze_query_context(query: str) -> dict:
|
|
| 45 |
'time', 'date', 'joke', 'story', 'song', 'recipe', 'movie'
|
| 46 |
]
|
| 47 |
|
| 48 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
doc_matches = sum(1 for kw in doc_keywords if kw in query_lower)
|
| 50 |
general_matches = sum(1 for kw in general_keywords if kw in query_lower)
|
| 51 |
|
| 52 |
# Determine query type
|
| 53 |
-
if doc_matches > 0:
|
| 54 |
query_type = "document_related"
|
| 55 |
-
confidence = min(doc_matches * 0.3, 1.0)
|
| 56 |
elif general_matches > 0:
|
| 57 |
query_type = "general_conversation"
|
| 58 |
confidence = min(general_matches * 0.4, 1.0)
|
|
@@ -67,7 +112,9 @@ def analyze_query_context(query: str) -> dict:
|
|
| 67 |
"type": query_type,
|
| 68 |
"confidence": confidence,
|
| 69 |
"doc_keywords_found": doc_matches,
|
| 70 |
-
"general_keywords_found": general_matches
|
|
|
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
async def generate_llm_fallback_response(user_message: str, query_context: dict) -> str:
|
|
@@ -76,12 +123,33 @@ async def generate_llm_fallback_response(user_message: str, query_context: dict)
|
|
| 76 |
# Determine which LLM to use based on query complexity
|
| 77 |
provider = hybrid_llm_service.choose_llm_provider(user_message)
|
| 78 |
|
| 79 |
-
# Create
|
|
|
|
| 80 |
if query_context.get("type") == "general_conversation":
|
| 81 |
system_prompt = """You are a helpful assistant for a government document system.
|
| 82 |
The user is asking a general question not related to government documents.
|
| 83 |
Provide a friendly, helpful response and gently guide them to ask about government policies,
|
| 84 |
-
pension rules, leave policies, or other administrative matters if they need official information."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
else:
|
| 86 |
system_prompt = """You are an AI assistant for government document queries.
|
| 87 |
The user asked about something that wasn't found in the document database.
|
|
|
|
| 27 |
logger = logging.getLogger("voicebot")
|
| 28 |
|
| 29 |
def analyze_query_context(query: str) -> dict:
|
| 30 |
+
"""Analyze query to determine if it's document-related or general, and identify user role"""
|
| 31 |
query_lower = query.lower()
|
| 32 |
|
| 33 |
+
# Role-specific keywords and queries
|
| 34 |
+
role_patterns = {
|
| 35 |
+
'pension_beneficiary': [
|
| 36 |
+
'pension eligibility', 'pension documents', 'pension application', 'retirement benefits',
|
| 37 |
+
'pension calculation', 'pension amount', 'family pension', 'commutation',
|
| 38 |
+
'gratuity eligibility', 'provident fund withdrawal', 'medical benefits after retirement',
|
| 39 |
+
'pension certificate', 'life certificate', 'pension arrears', 'how to apply pension',
|
| 40 |
+
'pension office', 'pension disbursement', 'pension inquiry', 'pension status'
|
| 41 |
+
],
|
| 42 |
+
'procurement_officer': [
|
| 43 |
+
'tender process', 'bid submission', 'procurement thresholds', 'gem portal',
|
| 44 |
+
'msme relaxation', 'vendor registration', 'procurement checklist', 'bid evaluation',
|
| 45 |
+
'tender documents', 'procurement rules', 'bidding process', 'contract award',
|
| 46 |
+
'procurement guidelines', 'tender notice', 'technical bid', 'financial bid',
|
| 47 |
+
'procurement manual', 'vendor empanelment', 'tender committee'
|
| 48 |
+
],
|
| 49 |
+
'finance_staff': [
|
| 50 |
+
'sanctioning authority', 'financial approval', 'budget allocation', 'expenditure sanction',
|
| 51 |
+
'financial registers', 'audit compliance', 'treasury rules', 'payment authorization',
|
| 52 |
+
'financial delegation', 'budget utilization', 'fund release', 'financial procedure',
|
| 53 |
+
'accounting rules', 'financial reporting', 'expenditure control', 'financial audit',
|
| 54 |
+
'cash book', 'voucher processing', 'financial clearance'
|
| 55 |
+
],
|
| 56 |
+
'leadership_policymaker': [
|
| 57 |
+
'policy impact', 'scenario analysis', 'cost comparison', 'policy implementation',
|
| 58 |
+
'evidence pack', 'policy evaluation', 'impact assessment', 'strategic planning',
|
| 59 |
+
'policy formulation', 'comparative analysis', 'policy review', 'governance framework',
|
| 60 |
+
'administrative reform', 'policy effectiveness', 'decision support', 'policy brief'
|
| 61 |
+
]
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
# Government document keywords (expanded)
|
| 65 |
doc_keywords = [
|
| 66 |
'pension', 'leave', 'allowance', 'da', 'dearness', 'procurement', 'tender',
|
| 67 |
'medical', 'reimbursement', 'transfer', 'posting', 'promotion', 'service',
|
| 68 |
'rules', 'policy', 'government', 'circular', 'notification', 'benefits',
|
| 69 |
+
'gratuity', 'provident fund', 'retirement', 'salary', 'pay commission',
|
| 70 |
+
'eligibility', 'documents', 'application', 'process', 'approval', 'sanction',
|
| 71 |
+
'audit', 'finance', 'budget', 'expenditure', 'treasury', 'guidelines'
|
| 72 |
]
|
| 73 |
|
| 74 |
# General conversation keywords
|
|
|
|
| 78 |
'time', 'date', 'joke', 'story', 'song', 'recipe', 'movie'
|
| 79 |
]
|
| 80 |
|
| 81 |
+
# Detect user role
|
| 82 |
+
detected_role = None
|
| 83 |
+
role_confidence = 0.0
|
| 84 |
+
|
| 85 |
+
for role, patterns in role_patterns.items():
|
| 86 |
+
role_matches = sum(1 for pattern in patterns if pattern in query_lower)
|
| 87 |
+
if role_matches > 0:
|
| 88 |
+
current_confidence = min(role_matches * 0.4, 1.0)
|
| 89 |
+
if current_confidence > role_confidence:
|
| 90 |
+
detected_role = role
|
| 91 |
+
role_confidence = current_confidence
|
| 92 |
+
|
| 93 |
+
# Count general matches
|
| 94 |
doc_matches = sum(1 for kw in doc_keywords if kw in query_lower)
|
| 95 |
general_matches = sum(1 for kw in general_keywords if kw in query_lower)
|
| 96 |
|
| 97 |
# Determine query type
|
| 98 |
+
if doc_matches > 0 or detected_role:
|
| 99 |
query_type = "document_related"
|
| 100 |
+
confidence = max(min(doc_matches * 0.3, 1.0), role_confidence)
|
| 101 |
elif general_matches > 0:
|
| 102 |
query_type = "general_conversation"
|
| 103 |
confidence = min(general_matches * 0.4, 1.0)
|
|
|
|
| 112 |
"type": query_type,
|
| 113 |
"confidence": confidence,
|
| 114 |
"doc_keywords_found": doc_matches,
|
| 115 |
+
"general_keywords_found": general_matches,
|
| 116 |
+
"detected_role": detected_role,
|
| 117 |
+
"role_confidence": role_confidence
|
| 118 |
}
|
| 119 |
|
| 120 |
async def generate_llm_fallback_response(user_message: str, query_context: dict) -> str:
|
|
|
|
| 123 |
# Determine which LLM to use based on query complexity
|
| 124 |
provider = hybrid_llm_service.choose_llm_provider(user_message)
|
| 125 |
|
| 126 |
+
# Create role-aware system prompt
|
| 127 |
+
detected_role = query_context.get("detected_role")
|
| 128 |
if query_context.get("type") == "general_conversation":
|
| 129 |
system_prompt = """You are a helpful assistant for a government document system.
|
| 130 |
The user is asking a general question not related to government documents.
|
| 131 |
Provide a friendly, helpful response and gently guide them to ask about government policies,
|
| 132 |
+
pension rules, leave policies, procurement procedures, or other administrative matters if they need official information."""
|
| 133 |
+
elif detected_role == "pension_beneficiary":
|
| 134 |
+
system_prompt = """You are an AI assistant specializing in government pension and retirement benefits.
|
| 135 |
+
The user appears to be a pension beneficiary or claimant. Provide helpful information about pension eligibility,
|
| 136 |
+
application processes, required documents, and procedures. Always remind them to verify information with
|
| 137 |
+
the pension disbursing authority and consult official government sources for the most current rules."""
|
| 138 |
+
elif detected_role == "procurement_officer":
|
| 139 |
+
system_prompt = """You are an AI assistant specializing in government procurement procedures.
|
| 140 |
+
The user appears to be involved in procurement or bidding processes. Provide helpful information about
|
| 141 |
+
tender procedures, MSME benefits, GeM portal usage, and procurement guidelines. Always remind them to
|
| 142 |
+
follow current procurement rules and consult the latest government circulars."""
|
| 143 |
+
elif detected_role == "finance_staff":
|
| 144 |
+
system_prompt = """You are an AI assistant specializing in government financial procedures.
|
| 145 |
+
The user appears to be finance staff. Provide helpful information about sanctioning procedures,
|
| 146 |
+
budget management, audit compliance, and treasury rules. Always remind them to follow current
|
| 147 |
+
financial rules and consult with the accounts department for official procedures."""
|
| 148 |
+
elif detected_role == "leadership_policymaker":
|
| 149 |
+
system_prompt = """You are an AI assistant specializing in policy analysis and decision support.
|
| 150 |
+
The user appears to be in a leadership or policy-making role. Provide helpful information about
|
| 151 |
+
policy impact analysis, evidence-based decision making, and strategic planning. Always recommend
|
| 152 |
+
consulting with relevant departments and conducting proper stakeholder consultations."""
|
| 153 |
else:
|
| 154 |
system_prompt = """You are an AI assistant for government document queries.
|
| 155 |
The user asked about something that wasn't found in the document database.
|
rag_service.py
CHANGED
|
@@ -11,6 +11,149 @@ import asyncio
|
|
| 11 |
|
| 12 |
logger = logging.getLogger("voicebot")
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Fallback content for when database is empty
|
| 15 |
FALLBACK_CONTENT = {
|
| 16 |
"pension": """Pension is a regular payment made during a person's retirement from an investment fund. For government employees in India, pension includes:
|
|
@@ -47,7 +190,42 @@ DA 6% Increment Impact:
|
|
| 47 |
- Monthly credit via NEFT
|
| 48 |
|
| 49 |
3. Benefits include pension, gratuity, and provident fund
|
| 50 |
-
4. Enhanced benefits for teachers and staff"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
}
|
| 52 |
|
| 53 |
def get_fallback_content(query: str) -> List[Dict[str, Any]]:
|
|
@@ -129,6 +307,22 @@ def get_fallback_content(query: str) -> List[Dict[str, Any]]:
|
|
| 129 |
"Obtain necessary approvals",
|
| 130 |
"Maintain leave records"
|
| 131 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
else:
|
| 133 |
fallback_text = f"I understand you're asking about '{query}'. While I don't have specific documents loaded for this query, I can help with government policies, pension rules, allowances, and administrative procedures. Please try rephrasing your question or ask about specific government benefits."
|
| 134 |
checklist = [
|
|
@@ -199,8 +393,40 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 199 |
# Enhance query for better relevance based on category
|
| 200 |
enhanced_query = query
|
| 201 |
|
| 202 |
-
#
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
if any(word in query_lower for word in ["changes", "impact", "rules"]):
|
| 205 |
enhanced_query = f"{query} pension rules retirement benefits modifications"
|
| 206 |
elif "calculation" in query_lower or "formula" in query_lower:
|
|
@@ -225,6 +451,18 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 225 |
# Transfer queries
|
| 226 |
elif any(word in query_lower for word in ["transfer", "posting"]):
|
| 227 |
enhanced_query = f"{query} transfer posting policy rules"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
logger.info(f"🔍 Enhanced query: '{enhanced_query}' (original: '{query}')")
|
| 230 |
|
|
@@ -265,8 +503,19 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 265 |
# Calculate relevance score based on query intent
|
| 266 |
relevance_score = getattr(doc, 'score', 0.5) # Base score
|
| 267 |
|
| 268 |
-
# Define query categories and their keywords
|
| 269 |
query_categories = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'gpf', 'cpf', 'superannuation'],
|
| 271 |
'leave': ['leave', 'casual leave', 'earned leave', 'medical leave', 'maternity', 'paternity'],
|
| 272 |
'allowance': ['allowance', 'dearness allowance', 'da', 'hra', 'house rent', 'travel allowance', 'increment'],
|
|
@@ -274,11 +523,22 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 274 |
'medical': ['medical', 'health', 'treatment', 'reimbursement', 'cghs', 'hospital'],
|
| 275 |
'transfer': ['transfer', 'posting', 'deputation', 'cadre'],
|
| 276 |
'promotion': ['promotion', 'seniority', 'grade', 'advancement', 'career progression'],
|
| 277 |
-
'service': ['service', 'conduct', 'discipline', 'rules', 'regulation']
|
|
|
|
|
|
|
|
|
|
| 278 |
}
|
| 279 |
|
| 280 |
-
# Content categories - what each document type contains
|
| 281 |
content_categories = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'superannuation', 'benefits'],
|
| 283 |
'leave': ['leave', 'casual', 'earned', 'medical leave', 'maternity'],
|
| 284 |
'allowance': ['allowance', 'dearness', 'house rent', 'travel', 'da', 'hra', 'increment'],
|
|
@@ -286,6 +546,9 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 286 |
'medical': ['medical', 'health', 'cghs', 'reimbursement', 'treatment'],
|
| 287 |
'transfer': ['transfer', 'posting', 'deputation', 'cadre'],
|
| 288 |
'promotion': ['promotion', 'seniority', 'grade pay', 'advancement'],
|
|
|
|
|
|
|
|
|
|
| 289 |
'service': ['service rules', 'conduct', 'discipline', 'misconduct']
|
| 290 |
}
|
| 291 |
|
|
@@ -334,30 +597,8 @@ async def search_documents_async(query: str, limit: int = 5) -> List[Dict[str, A
|
|
| 334 |
clause_text = doc.page_content
|
| 335 |
# Simple extractive summary: first sentence or up to 2 lines
|
| 336 |
summary = clause_text.split(". ")[0][:180] + ("..." if len(clause_text) > 180 else "")
|
| 337 |
-
#
|
| 338 |
-
role_checklist =
|
| 339 |
-
query_lower = query.lower()
|
| 340 |
-
if "pension" in query_lower:
|
| 341 |
-
role_checklist = [
|
| 342 |
-
"Check eligibility (service years, misconduct)",
|
| 343 |
-
"Collect required documents (service book, ID, proof)",
|
| 344 |
-
"Obtain approvals (sanctioning authority)",
|
| 345 |
-
"Submit application to pension office"
|
| 346 |
-
]
|
| 347 |
-
elif "procurement" in query_lower or "bid" in query_lower:
|
| 348 |
-
role_checklist = [
|
| 349 |
-
"Review procurement thresholds and MSME relaxations",
|
| 350 |
-
"Prepare bid documents",
|
| 351 |
-
"Complete registration and approvals",
|
| 352 |
-
"Submit bid before deadline"
|
| 353 |
-
]
|
| 354 |
-
elif "finance" in query_lower:
|
| 355 |
-
role_checklist = [
|
| 356 |
-
"Check sanctioning steps",
|
| 357 |
-
"Update registers",
|
| 358 |
-
"Obtain necessary approvals",
|
| 359 |
-
"Notify stakeholders"
|
| 360 |
-
]
|
| 361 |
results.append({
|
| 362 |
"clause_text": clause_text,
|
| 363 |
"summary": summary,
|
|
|
|
| 11 |
|
| 12 |
logger = logging.getLogger("voicebot")
|
| 13 |
|
| 14 |
+
def generate_role_based_checklist(query: str, content: str) -> list:
|
| 15 |
+
"""Generate role-specific checklists based on query and content"""
|
| 16 |
+
query_lower = query.lower()
|
| 17 |
+
content_lower = content.lower() if content else ""
|
| 18 |
+
|
| 19 |
+
# Pension Beneficiaries & Claimants
|
| 20 |
+
if any(phrase in query_lower for phrase in ['pension eligibility', 'pension documents', 'how to apply pension', 'pension application']):
|
| 21 |
+
return [
|
| 22 |
+
"Verify service eligibility (minimum 10 years qualifying service)",
|
| 23 |
+
"Gather required documents (service book, PPO, identity proof)",
|
| 24 |
+
"Check for any departmental proceedings or vigilance cases",
|
| 25 |
+
"Apply through proper channel 6 months before retirement",
|
| 26 |
+
"Follow up with pension disbursing authority for processing"
|
| 27 |
+
]
|
| 28 |
+
elif any(phrase in query_lower for phrase in ['family pension', 'widow pension', 'dependent pension']):
|
| 29 |
+
return [
|
| 30 |
+
"Obtain death certificate and service documents of deceased employee",
|
| 31 |
+
"Submit family pension application with nominee details",
|
| 32 |
+
"Provide proof of relationship and dependency",
|
| 33 |
+
"Get certificate from employer about last drawn salary",
|
| 34 |
+
"Register with pension disbursing bank for regular payments"
|
| 35 |
+
]
|
| 36 |
+
elif 'pension calculation' in query_lower or 'pension amount' in query_lower:
|
| 37 |
+
return [
|
| 38 |
+
"Collect last pay certificate with basic pay and DA details",
|
| 39 |
+
"Calculate qualifying service excluding breaks/suspensions",
|
| 40 |
+
"Apply pension formula: (Last pay × service years) ÷ 70",
|
| 41 |
+
"Check for minimum pension ceiling and DA applicability",
|
| 42 |
+
"Verify commutation options if considering lump sum"
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
# Procurement Officers & Bidders
|
| 46 |
+
elif any(phrase in query_lower for phrase in ['tender process', 'bid submission', 'procurement threshold']):
|
| 47 |
+
return [
|
| 48 |
+
"Verify procurement threshold limits and delegation of powers",
|
| 49 |
+
"Check MSME purchase preference and price benefits applicable",
|
| 50 |
+
"Ensure technical specifications are non-discriminatory",
|
| 51 |
+
"Follow mandatory e-procurement process through GeM/portal",
|
| 52 |
+
"Maintain proper documentation for audit trail"
|
| 53 |
+
]
|
| 54 |
+
elif any(phrase in query_lower for phrase in ['msme relaxation', 'msme benefits']):
|
| 55 |
+
return [
|
| 56 |
+
"Verify MSME registration certificate validity",
|
| 57 |
+
"Apply 15% price preference for MSME quotes",
|
| 58 |
+
"Check exemption from EMD (Earnest Money Deposit)",
|
| 59 |
+
"Ensure MSME gets advance payment facility if applicable",
|
| 60 |
+
"Follow tender splitting norms for MSME participation"
|
| 61 |
+
]
|
| 62 |
+
elif any(phrase in query_lower for phrase in ['gem portal', 'vendor registration']):
|
| 63 |
+
return [
|
| 64 |
+
"Complete vendor registration on Government e-Marketplace",
|
| 65 |
+
"Upload all required business documents and certificates",
|
| 66 |
+
"Get product/service catalog approved by GeM",
|
| 67 |
+
"Maintain competitive pricing and service ratings",
|
| 68 |
+
"Respond promptly to buyer inquiries and orders"
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
# Finance Staff
|
| 72 |
+
elif any(phrase in query_lower for phrase in ['sanctioning authority', 'financial approval', 'expenditure sanction']):
|
| 73 |
+
return [
|
| 74 |
+
"Verify delegated financial powers and approval limits",
|
| 75 |
+
"Check budget provision and availability of funds",
|
| 76 |
+
"Ensure compliance with financial rules and procedures",
|
| 77 |
+
"Obtain necessary pre-audit clearance if required",
|
| 78 |
+
"Maintain proper accounting and audit trail"
|
| 79 |
+
]
|
| 80 |
+
elif any(phrase in query_lower for phrase in ['budget allocation', 'fund release']):
|
| 81 |
+
return [
|
| 82 |
+
"Verify budget allocation in approved estimates",
|
| 83 |
+
"Check fund availability in treasury/bank account",
|
| 84 |
+
"Ensure proper budget head classification",
|
| 85 |
+
"Follow fund release schedule and priority guidelines",
|
| 86 |
+
"Update budget utilization registers promptly"
|
| 87 |
+
]
|
| 88 |
+
elif any(phrase in query_lower for phrase in ['audit compliance', 'financial audit']):
|
| 89 |
+
return [
|
| 90 |
+
"Maintain all vouchers and supporting documents",
|
| 91 |
+
"Ensure transactions are recorded in proper registers",
|
| 92 |
+
"Respond to audit queries within stipulated time",
|
| 93 |
+
"Implement audit recommendations and report compliance",
|
| 94 |
+
"Conduct internal audit and review before external audit"
|
| 95 |
+
]
|
| 96 |
+
|
| 97 |
+
# Leadership & Policymakers
|
| 98 |
+
elif any(phrase in query_lower for phrase in ['policy impact', 'scenario analysis']):
|
| 99 |
+
return [
|
| 100 |
+
"Gather baseline data and impact measurement parameters",
|
| 101 |
+
"Conduct stakeholder consultation and feedback analysis",
|
| 102 |
+
"Prepare cost-benefit analysis for different scenarios",
|
| 103 |
+
"Assess implementation feasibility and resource requirements",
|
| 104 |
+
"Develop monitoring and evaluation framework"
|
| 105 |
+
]
|
| 106 |
+
elif any(phrase in query_lower for phrase in ['evidence pack', 'policy brief']):
|
| 107 |
+
return [
|
| 108 |
+
"Compile relevant policy documents and legal framework",
|
| 109 |
+
"Gather statistical data and trend analysis",
|
| 110 |
+
"Include comparative analysis from other states/countries",
|
| 111 |
+
"Prepare executive summary with key recommendations",
|
| 112 |
+
"Ensure all sources are cited and verifiable"
|
| 113 |
+
]
|
| 114 |
+
|
| 115 |
+
# General categories with enhanced checklists
|
| 116 |
+
elif "pension" in query_lower:
|
| 117 |
+
return [
|
| 118 |
+
"Check eligibility criteria and service requirements",
|
| 119 |
+
"Collect required documents (service book, PPO, ID proof)",
|
| 120 |
+
"Obtain necessary approvals and clearances",
|
| 121 |
+
"Submit application through proper channel",
|
| 122 |
+
"Follow up with pension office for processing status"
|
| 123 |
+
]
|
| 124 |
+
elif any(word in query_lower for word in ["procurement", "tender", "bid"]):
|
| 125 |
+
return [
|
| 126 |
+
"Review procurement guidelines and threshold limits",
|
| 127 |
+
"Check MSME relaxations and price preferences",
|
| 128 |
+
"Prepare comprehensive bid documents",
|
| 129 |
+
"Ensure compliance with technical specifications",
|
| 130 |
+
"Submit bid through approved e-procurement platform"
|
| 131 |
+
]
|
| 132 |
+
elif any(word in query_lower for word in ["finance", "budget", "expenditure"]):
|
| 133 |
+
return [
|
| 134 |
+
"Verify financial delegation and approval limits",
|
| 135 |
+
"Check budget provision and fund availability",
|
| 136 |
+
"Ensure compliance with treasury and accounting rules",
|
| 137 |
+
"Maintain proper documentation for audit",
|
| 138 |
+
"Update financial registers and reports"
|
| 139 |
+
]
|
| 140 |
+
elif "leave" in query_lower:
|
| 141 |
+
return [
|
| 142 |
+
"Check leave balance and entitlement",
|
| 143 |
+
"Follow prescribed application procedure",
|
| 144 |
+
"Obtain necessary approvals from competent authority",
|
| 145 |
+
"Arrange work coverage during leave period",
|
| 146 |
+
"Update attendance records upon return"
|
| 147 |
+
]
|
| 148 |
+
else:
|
| 149 |
+
return [
|
| 150 |
+
"Review relevant policy guidelines and procedures",
|
| 151 |
+
"Consult with appropriate authorities if needed",
|
| 152 |
+
"Ensure compliance with applicable rules",
|
| 153 |
+
"Maintain proper documentation",
|
| 154 |
+
"Seek clarification for any doubts"
|
| 155 |
+
]
|
| 156 |
+
|
| 157 |
# Fallback content for when database is empty
|
| 158 |
FALLBACK_CONTENT = {
|
| 159 |
"pension": """Pension is a regular payment made during a person's retirement from an investment fund. For government employees in India, pension includes:
|
|
|
|
| 190 |
- Monthly credit via NEFT
|
| 191 |
|
| 192 |
3. Benefits include pension, gratuity, and provident fund
|
| 193 |
+
4. Enhanced benefits for teachers and staff""",
|
| 194 |
+
|
| 195 |
+
"procurement": """Government Procurement Guidelines:
|
| 196 |
+
|
| 197 |
+
1. Threshold Limits:
|
| 198 |
+
- Goods: ₹25,000 to ₹25 lakh (departmental purchase committee)
|
| 199 |
+
- Works: ₹1 lakh to ₹5 crore (various committees)
|
| 200 |
+
- Services: As per delegation of powers
|
| 201 |
+
|
| 202 |
+
2. MSME Benefits:
|
| 203 |
+
- 15% price preference in competitive bids
|
| 204 |
+
- Exemption from EMD (Earnest Money Deposit)
|
| 205 |
+
- No tender fee for MSME enterprises
|
| 206 |
+
- Advance payment facility available
|
| 207 |
+
|
| 208 |
+
3. GeM Portal Usage:
|
| 209 |
+
- Mandatory for central government purchases
|
| 210 |
+
- Direct purchase up to ₹5 lakh
|
| 211 |
+
- Rate contract for common items""",
|
| 212 |
+
|
| 213 |
+
"finance": """Financial Management Guidelines:
|
| 214 |
+
|
| 215 |
+
1. Sanctioning Authority:
|
| 216 |
+
- As per delegation of financial powers
|
| 217 |
+
- Budget provision must be available
|
| 218 |
+
- Pre-audit clearance where required
|
| 219 |
+
|
| 220 |
+
2. Documentation:
|
| 221 |
+
- All expenditure must have proper vouchers
|
| 222 |
+
- Budget registers to be maintained
|
| 223 |
+
- Audit trail for all transactions
|
| 224 |
+
|
| 225 |
+
3. Treasury Rules:
|
| 226 |
+
- Follow prescribed payment procedures
|
| 227 |
+
- Maintain cash book and other registers
|
| 228 |
+
- Submit periodic returns and statements"""
|
| 229 |
}
|
| 230 |
|
| 231 |
def get_fallback_content(query: str) -> List[Dict[str, Any]]:
|
|
|
|
| 307 |
"Obtain necessary approvals",
|
| 308 |
"Maintain leave records"
|
| 309 |
]
|
| 310 |
+
elif any(word in query_lower for word in ["audit", "financial", "budget", "expenditure", "accounts"]):
|
| 311 |
+
fallback_text = f"Regarding your query about '{query}', government financial audits and accounts are typically maintained at departmental and central levels. Financial audits cover budget utilization, expenditure patterns, and compliance with financial rules. For specific audit reports, you would need to access official government finance portals or contact the concerned audit department."
|
| 312 |
+
checklist = [
|
| 313 |
+
"Contact Controller and Auditor General (CAG) office",
|
| 314 |
+
"Check government finance portals for audit reports",
|
| 315 |
+
"Request specific financial year audit documents",
|
| 316 |
+
"Verify with concerned department's accounts section"
|
| 317 |
+
]
|
| 318 |
+
elif any(word in query_lower for word in ["training", "development", "skill", "course"]):
|
| 319 |
+
fallback_text = f"Regarding your query about '{query}', government training and development programs are designed to enhance employee capabilities. These include induction training, skill development courses, leadership programs, and specialized technical training through various government training institutes."
|
| 320 |
+
checklist = [
|
| 321 |
+
"Check available training programs in your department",
|
| 322 |
+
"Contact training institutes for course details",
|
| 323 |
+
"Apply for relevant skill development programs",
|
| 324 |
+
"Utilize online learning platforms like iGOT Karmayogi"
|
| 325 |
+
]
|
| 326 |
else:
|
| 327 |
fallback_text = f"I understand you're asking about '{query}'. While I don't have specific documents loaded for this query, I can help with government policies, pension rules, allowances, and administrative procedures. Please try rephrasing your question or ask about specific government benefits."
|
| 328 |
checklist = [
|
|
|
|
| 393 |
# Enhance query for better relevance based on category
|
| 394 |
enhanced_query = query
|
| 395 |
|
| 396 |
+
# Role-specific query enhancement
|
| 397 |
+
|
| 398 |
+
# Pension Beneficiary queries
|
| 399 |
+
if any(word in query_lower for word in ["pension eligibility", "pension documents", "how to apply pension", "pension certificate"]):
|
| 400 |
+
enhanced_query = f"{query} pension eligibility documents application process beneficiary requirements"
|
| 401 |
+
elif any(word in query_lower for word in ["family pension", "widow pension", "dependent pension"]):
|
| 402 |
+
enhanced_query = f"{query} family pension eligibility widow dependent benefits"
|
| 403 |
+
elif any(word in query_lower for word in ["pension calculation", "pension amount", "pension formula"]):
|
| 404 |
+
enhanced_query = f"{query} pension calculation formula amount computation service years"
|
| 405 |
+
|
| 406 |
+
# Procurement Officer queries
|
| 407 |
+
elif any(word in query_lower for word in ["tender process", "bid submission", "procurement threshold"]):
|
| 408 |
+
enhanced_query = f"{query} procurement tender bidding process thresholds guidelines"
|
| 409 |
+
elif any(word in query_lower for word in ["msme relaxation", "msme benefits", "small scale industry"]):
|
| 410 |
+
enhanced_query = f"{query} msme relaxation benefits procurement small scale industry"
|
| 411 |
+
elif any(word in query_lower for word in ["gem portal", "vendor registration", "vendor empanelment"]):
|
| 412 |
+
enhanced_query = f"{query} gem portal vendor registration empanelment process"
|
| 413 |
+
|
| 414 |
+
# Finance Staff queries
|
| 415 |
+
elif any(word in query_lower for word in ["sanctioning authority", "financial approval", "expenditure sanction"]):
|
| 416 |
+
enhanced_query = f"{query} sanctioning authority financial approval expenditure delegation"
|
| 417 |
+
elif any(word in query_lower for word in ["budget allocation", "fund release", "treasury"]):
|
| 418 |
+
enhanced_query = f"{query} budget allocation fund release treasury rules procedures"
|
| 419 |
+
elif any(word in query_lower for word in ["audit compliance", "financial audit", "audit report"]):
|
| 420 |
+
enhanced_query = f"{query} audit compliance financial audit reporting procedures"
|
| 421 |
+
|
| 422 |
+
# Leadership/Policymaker queries
|
| 423 |
+
elif any(word in query_lower for word in ["policy impact", "scenario analysis", "comparative analysis"]):
|
| 424 |
+
enhanced_query = f"{query} policy impact scenario analysis comparison evidence"
|
| 425 |
+
elif any(word in query_lower for word in ["evidence pack", "policy brief", "decision support"]):
|
| 426 |
+
enhanced_query = f"{query} evidence pack policy brief decision support documentation"
|
| 427 |
+
|
| 428 |
+
# General category queries
|
| 429 |
+
elif "pension" in query_lower:
|
| 430 |
if any(word in query_lower for word in ["changes", "impact", "rules"]):
|
| 431 |
enhanced_query = f"{query} pension rules retirement benefits modifications"
|
| 432 |
elif "calculation" in query_lower or "formula" in query_lower:
|
|
|
|
| 451 |
# Transfer queries
|
| 452 |
elif any(word in query_lower for word in ["transfer", "posting"]):
|
| 453 |
enhanced_query = f"{query} transfer posting policy rules"
|
| 454 |
+
|
| 455 |
+
# Audit and financial queries
|
| 456 |
+
elif any(word in query_lower for word in ["audit", "financial", "budget", "expenditure", "accounts", "finance"]):
|
| 457 |
+
enhanced_query = f"{query} audit financial budget expenditure accounts"
|
| 458 |
+
|
| 459 |
+
# Training and development queries
|
| 460 |
+
elif any(word in query_lower for word in ["training", "development", "course", "skill"]):
|
| 461 |
+
enhanced_query = f"{query} training development skill course capacity building"
|
| 462 |
+
|
| 463 |
+
# Salary and pay queries
|
| 464 |
+
elif any(word in query_lower for word in ["salary", "pay", "grade", "scale"]):
|
| 465 |
+
enhanced_query = f"{query} salary pay grade scale compensation"
|
| 466 |
|
| 467 |
logger.info(f"🔍 Enhanced query: '{enhanced_query}' (original: '{query}')")
|
| 468 |
|
|
|
|
| 503 |
# Calculate relevance score based on query intent
|
| 504 |
relevance_score = getattr(doc, 'score', 0.5) # Base score
|
| 505 |
|
| 506 |
+
# Define query categories and their keywords (including role-specific)
|
| 507 |
query_categories = {
|
| 508 |
+
'pension_beneficiary': ['pension eligibility', 'pension documents', 'pension application', 'pension certificate',
|
| 509 |
+
'family pension', 'pension calculation', 'how to apply pension', 'pension office',
|
| 510 |
+
'life certificate', 'pension arrears', 'commutation', 'gratuity eligibility'],
|
| 511 |
+
'procurement_officer': ['tender process', 'bid submission', 'procurement threshold', 'gem portal',
|
| 512 |
+
'msme relaxation', 'vendor registration', 'procurement checklist', 'bid evaluation',
|
| 513 |
+
'tender documents', 'bidding process', 'contract award', 'vendor empanelment'],
|
| 514 |
+
'finance_staff': ['sanctioning authority', 'financial approval', 'budget allocation', 'expenditure sanction',
|
| 515 |
+
'financial registers', 'audit compliance', 'treasury rules', 'payment authorization',
|
| 516 |
+
'financial delegation', 'fund release', 'financial procedure', 'voucher processing'],
|
| 517 |
+
'leadership_policymaker': ['policy impact', 'scenario analysis', 'evidence pack', 'policy brief',
|
| 518 |
+
'comparative analysis', 'decision support', 'policy evaluation', 'impact assessment'],
|
| 519 |
'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'gpf', 'cpf', 'superannuation'],
|
| 520 |
'leave': ['leave', 'casual leave', 'earned leave', 'medical leave', 'maternity', 'paternity'],
|
| 521 |
'allowance': ['allowance', 'dearness allowance', 'da', 'hra', 'house rent', 'travel allowance', 'increment'],
|
|
|
|
| 523 |
'medical': ['medical', 'health', 'treatment', 'reimbursement', 'cghs', 'hospital'],
|
| 524 |
'transfer': ['transfer', 'posting', 'deputation', 'cadre'],
|
| 525 |
'promotion': ['promotion', 'seniority', 'grade', 'advancement', 'career progression'],
|
| 526 |
+
'service': ['service', 'conduct', 'discipline', 'rules', 'regulation'],
|
| 527 |
+
'audit': ['audit', 'financial', 'budget', 'expenditure', 'accounts', 'finance', 'yearly', 'annual'],
|
| 528 |
+
'training': ['training', 'development', 'skill', 'course', 'capacity building', 'learning'],
|
| 529 |
+
'salary': ['salary', 'pay', 'grade', 'scale', 'compensation', 'structure']
|
| 530 |
}
|
| 531 |
|
| 532 |
+
# Content categories - what each document type contains (including role-specific)
|
| 533 |
content_categories = {
|
| 534 |
+
'pension_beneficiary': ['pension', 'retirement', 'eligibility', 'documents', 'application', 'certificate',
|
| 535 |
+
'family pension', 'gratuity', 'commutation', 'beneficiary', 'process', 'office'],
|
| 536 |
+
'procurement_officer': ['procurement', 'tender', 'bidding', 'contract', 'vendor', 'gem', 'msme',
|
| 537 |
+
'threshold', 'process', 'evaluation', 'registration', 'checklist', 'guidelines'],
|
| 538 |
+
'finance_staff': ['sanctioning', 'approval', 'budget', 'expenditure', 'treasury', 'audit',
|
| 539 |
+
'financial', 'fund', 'payment', 'authorization', 'delegation', 'registers'],
|
| 540 |
+
'leadership_policymaker': ['policy', 'impact', 'scenario', 'analysis', 'evidence', 'comparative',
|
| 541 |
+
'evaluation', 'decision', 'strategic', 'implementation', 'assessment'],
|
| 542 |
'pension': ['pension', 'retirement', 'gratuity', 'provident fund', 'superannuation', 'benefits'],
|
| 543 |
'leave': ['leave', 'casual', 'earned', 'medical leave', 'maternity'],
|
| 544 |
'allowance': ['allowance', 'dearness', 'house rent', 'travel', 'da', 'hra', 'increment'],
|
|
|
|
| 546 |
'medical': ['medical', 'health', 'cghs', 'reimbursement', 'treatment'],
|
| 547 |
'transfer': ['transfer', 'posting', 'deputation', 'cadre'],
|
| 548 |
'promotion': ['promotion', 'seniority', 'grade pay', 'advancement'],
|
| 549 |
+
'audit': ['audit', 'financial', 'budget', 'expenditure', 'accounts', 'finance', 'yearly'],
|
| 550 |
+
'training': ['training', 'development', 'skill', 'course', 'capacity', 'learning'],
|
| 551 |
+
'salary': ['salary', 'pay', 'grade', 'scale', 'compensation', 'structure'],
|
| 552 |
'service': ['service rules', 'conduct', 'discipline', 'misconduct']
|
| 553 |
}
|
| 554 |
|
|
|
|
| 597 |
clause_text = doc.page_content
|
| 598 |
# Simple extractive summary: first sentence or up to 2 lines
|
| 599 |
summary = clause_text.split(". ")[0][:180] + ("..." if len(clause_text) > 180 else "")
|
| 600 |
+
# Enhanced role-aware checklist logic
|
| 601 |
+
role_checklist = generate_role_based_checklist(query, clause_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
results.append({
|
| 603 |
"clause_text": clause_text,
|
| 604 |
"summary": summary,
|