Spaces:
Running
Running
Update chat/rag_engine.py
Browse filesimprovements done on rag_engine.py
- chat/rag_engine.py +720 -255
chat/rag_engine.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
rag_engine.py - Production-Ready Medical RAG Engine
|
| 3 |
-
Updated with
|
| 4 |
"""
|
| 5 |
|
| 6 |
from typing import List, Dict, Any, Optional, Tuple
|
|
@@ -385,65 +385,218 @@ except ImportError:
|
|
| 385 |
|
| 386 |
|
| 387 |
# ============================================================================
|
| 388 |
-
#
|
| 389 |
# ============================================================================
|
| 390 |
|
| 391 |
-
class
|
| 392 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
@staticmethod
|
| 395 |
-
def
|
| 396 |
-
|
| 397 |
-
"""Create
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
-
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
if guideline_info:
|
| 414 |
if guideline_info.get('guidelines_found'):
|
| 415 |
-
|
| 416 |
if guideline_info.get('critical_missing'):
|
| 417 |
-
|
| 418 |
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
DOMAIN: {domain}
|
| 423 |
-
PAPERS ANALYZED: {papers_count}
|
| 424 |
-
{guideline_context}
|
| 425 |
-
**Perform Evidence-Based Analysis:**
|
| 426 |
-
1. **Evidence Assessment:**
|
| 427 |
-
- What types of studies exist on this topic?
|
| 428 |
-
- What is the quality and strength of evidence?
|
| 429 |
-
- Are there systematic reviews or clinical trials?
|
| 430 |
-
- How does the evidence align with current guidelines?
|
| 431 |
-
2. **Domain-Specific Analysis:**
|
| 432 |
-
- How does this apply specifically to {domain}?
|
| 433 |
-
- What are the standard approaches in this field?
|
| 434 |
-
- What innovations or recent advances exist?
|
| 435 |
-
- How do findings compare to guideline recommendations?
|
| 436 |
-
3. **Critical Evaluation:**
|
| 437 |
-
- What are the strengths of current evidence?
|
| 438 |
-
- What limitations or knowledge gaps exist?
|
| 439 |
-
- Are there any controversies or alternative views?
|
| 440 |
-
- How complete is guideline coverage?
|
| 441 |
-
4. **Practical Implications:**
|
| 442 |
-
- What are the actionable insights for {user_context}?
|
| 443 |
-
- What are the recommendations or next steps?
|
| 444 |
-
- How should this evidence be applied in practice?
|
| 445 |
-
- What guideline considerations are important?
|
| 446 |
-
Provide a comprehensive, evidence-based answer that synthesizes medical knowledge with practical implications and guideline awareness."""
|
| 447 |
|
| 448 |
|
| 449 |
# ============================================================================
|
|
@@ -1162,7 +1315,7 @@ class EnhancedRAGEngine:
|
|
| 1162 |
print(f"⚠️ LLM not available - using fallback mode: {e}")
|
| 1163 |
self.llm = None
|
| 1164 |
|
| 1165 |
-
self.
|
| 1166 |
self.ranker = PaperRanker()
|
| 1167 |
self.confidence_scorer = ConfidenceScorer()
|
| 1168 |
self.context_detector = UserContextDetector()
|
|
@@ -1204,6 +1357,7 @@ class EnhancedRAGEngine:
|
|
| 1204 |
else:
|
| 1205 |
print(" 📄 Real paper fetching: DISABLED (using demo papers)")
|
| 1206 |
print(" 📋 Guideline detection: ENABLED")
|
|
|
|
| 1207 |
|
| 1208 |
def answer_research_question(self,
|
| 1209 |
query: str,
|
|
@@ -1212,8 +1366,10 @@ class EnhancedRAGEngine:
|
|
| 1212 |
use_memory: bool = True,
|
| 1213 |
user_context: str = "auto",
|
| 1214 |
use_fallback: bool = False,
|
|
|
|
|
|
|
| 1215 |
**kwargs) -> Dict[str, Any]:
|
| 1216 |
-
"""Answer medical research questions with
|
| 1217 |
|
| 1218 |
start_time = time.time()
|
| 1219 |
self.metrics['total_queries'] += 1
|
|
@@ -1221,23 +1377,35 @@ class EnhancedRAGEngine:
|
|
| 1221 |
|
| 1222 |
print(f"\n🔍 Processing query: '{query}'")
|
| 1223 |
print(f" Domain: {domain}")
|
|
|
|
| 1224 |
print(f" Max papers: {max_papers}")
|
| 1225 |
print(f" Real-time search: {self.use_real_time}")
|
| 1226 |
|
| 1227 |
try:
|
| 1228 |
-
# Auto-detect user context if needed
|
| 1229 |
if user_context == "auto":
|
| 1230 |
user_context = self.context_detector.detect_context(query, domain)
|
| 1231 |
|
| 1232 |
self.metrics['user_contexts'][user_context] += 1
|
| 1233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1234 |
# Retrieve papers using MedicalResearchEngine
|
| 1235 |
print("📚 Retrieving relevant papers...")
|
| 1236 |
papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback)
|
| 1237 |
|
| 1238 |
if not papers:
|
| 1239 |
print("⚠️ No papers found, creating fallback response...")
|
| 1240 |
-
return self._create_no_results_response(query, domain,
|
| 1241 |
|
| 1242 |
# Detect guideline citations
|
| 1243 |
print("📋 Detecting guideline citations...")
|
|
@@ -1254,7 +1422,7 @@ class EnhancedRAGEngine:
|
|
| 1254 |
})
|
| 1255 |
|
| 1256 |
# Rank papers
|
| 1257 |
-
ranked_papers = self.ranker.rank_papers(papers, query, domain,
|
| 1258 |
print(f"📊 Papers found: {len(ranked_papers)}")
|
| 1259 |
|
| 1260 |
# Track paper sources
|
|
@@ -1274,29 +1442,29 @@ class EnhancedRAGEngine:
|
|
| 1274 |
|
| 1275 |
# Calculate confidence with guideline consideration
|
| 1276 |
confidence = self.confidence_scorer.calculate_confidence(
|
| 1277 |
-
ranked_papers, query, "summary",
|
| 1278 |
)
|
| 1279 |
|
| 1280 |
-
# Generate analysis using
|
| 1281 |
-
print("🧠 Generating
|
| 1282 |
-
analysis = self.
|
| 1283 |
-
query, domain,
|
| 1284 |
)
|
| 1285 |
|
| 1286 |
-
# Generate clinical bottom line with
|
| 1287 |
-
bottom_line = self.
|
| 1288 |
-
query, domain,
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
# Synthesize final answer
|
| 1292 |
-
final_answer = self.
|
| 1293 |
-
query, domain,
|
| 1294 |
bottom_line, confidence, guideline_info
|
| 1295 |
)
|
| 1296 |
|
| 1297 |
# Update memory
|
| 1298 |
if use_memory and self.memory:
|
| 1299 |
-
self._update_memory(query, final_answer, domain,
|
| 1300 |
|
| 1301 |
# Update metrics
|
| 1302 |
response_time = time.time() - start_time
|
|
@@ -1317,7 +1485,300 @@ class EnhancedRAGEngine:
|
|
| 1317 |
print(f"❌ Error in research analysis: {e}")
|
| 1318 |
import traceback
|
| 1319 |
traceback.print_exc()
|
| 1320 |
-
return self._create_error_response(query, domain,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1321 |
|
| 1322 |
def _retrieve_real_papers(self, query: str, domain: str, max_papers: int,
|
| 1323 |
use_fallback: bool = False) -> List[Dict]:
|
|
@@ -1515,53 +1976,51 @@ class EnhancedRAGEngine:
|
|
| 1515 |
|
| 1516 |
return papers
|
| 1517 |
|
| 1518 |
-
def
|
| 1519 |
-
|
| 1520 |
-
|
|
|
|
| 1521 |
|
| 1522 |
if not self.llm:
|
| 1523 |
-
return self.
|
| 1524 |
|
| 1525 |
-
# Create
|
| 1526 |
-
prompt = self.
|
| 1527 |
-
query, domain,
|
| 1528 |
)
|
| 1529 |
|
| 1530 |
-
# Add paper information
|
| 1531 |
-
|
| 1532 |
-
|
| 1533 |
-
|
| 1534 |
-
|
|
|
|
|
|
|
| 1535 |
|
| 1536 |
# Add demo paper disclaimer if any demo papers
|
| 1537 |
demo_count = sum(1 for p in papers if p.get('is_demo', False))
|
| 1538 |
if demo_count > 0:
|
| 1539 |
-
prompt += f"\n\nNote: {demo_count}
|
| 1540 |
-
|
| 1541 |
-
# Add guideline details if available
|
| 1542 |
-
if guideline_info:
|
| 1543 |
-
if guideline_info.get('guidelines_found'):
|
| 1544 |
-
prompt += f"\n\nGuideline Context: Papers reference {len(guideline_info['guidelines_found'])} major guidelines."
|
| 1545 |
-
if guideline_info.get('critical_missing'):
|
| 1546 |
-
missing_guidelines = ', '.join(guideline_info['critical_missing'][:3])
|
| 1547 |
-
prompt += f"\nGuideline Gap: Missing explicit citations for {missing_guidelines} guidelines."
|
| 1548 |
-
|
| 1549 |
-
full_prompt = f"{prompt}\n\n**Relevant Papers:**\n{paper_info}\n\n**Analysis:**"
|
| 1550 |
|
| 1551 |
try:
|
|
|
|
|
|
|
|
|
|
| 1552 |
analysis = self.llm.generate(
|
| 1553 |
-
|
| 1554 |
-
system_message=
|
| 1555 |
-
max_tokens=
|
| 1556 |
)
|
| 1557 |
return analysis
|
| 1558 |
except Exception as e:
|
| 1559 |
-
print(f"⚠️ LLM analysis failed: {e}")
|
| 1560 |
-
return self.
|
| 1561 |
|
| 1562 |
-
def
|
| 1563 |
-
|
| 1564 |
"""Create fallback analysis when LLM is unavailable"""
|
|
|
|
|
|
|
| 1565 |
if CONFIG_AVAILABLE:
|
| 1566 |
try:
|
| 1567 |
domain_name = get_domain_display_name(domain)
|
|
@@ -1574,12 +2033,10 @@ class EnhancedRAGEngine:
|
|
| 1574 |
real_papers = [p for p in papers if not p.get('is_demo', False)]
|
| 1575 |
demo_papers = [p for p in papers if p.get('is_demo', False)]
|
| 1576 |
|
| 1577 |
-
|
| 1578 |
-
|
| 1579 |
-
analysis = f"""**Evidence-Based Analysis for {domain_name}**
|
| 1580 |
**Query:** {query}
|
| 1581 |
-
**
|
| 1582 |
-
**
|
| 1583 |
|
| 1584 |
# Add guideline information
|
| 1585 |
if guideline_info:
|
|
@@ -1589,39 +2046,44 @@ class EnhancedRAGEngine:
|
|
| 1589 |
analysis += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}"
|
| 1590 |
|
| 1591 |
analysis += f"""
|
| 1592 |
-
**Key
|
| 1593 |
-
Based on analysis of {len(papers)} relevant
|
| 1594 |
-
1. **Current Evidence:** The literature shows evolving understanding of this topic within {domain_name}. Recent studies have contributed significantly to our knowledge base."""
|
| 1595 |
|
| 1596 |
-
|
| 1597 |
-
|
| 1598 |
-
|
| 1599 |
-
|
| 1600 |
-
analysis += f"\n2. **Clinical Context:** Findings should be interpreted within established clinical frameworks and guidelines."
|
| 1601 |
|
| 1602 |
-
|
| 1603 |
-
|
| 1604 |
-
|
| 1605 |
-
|
| 1606 |
-
|
| 1607 |
-
|
| 1608 |
-
|
| 1609 |
-
-
|
| 1610 |
-
-
|
| 1611 |
-
|
| 1612 |
-
|
| 1613 |
-
-
|
| 1614 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1615 |
|
| 1616 |
if demo_papers:
|
| 1617 |
-
analysis += f"\n\n*
|
| 1618 |
|
| 1619 |
return analysis
|
| 1620 |
|
| 1621 |
-
def
|
| 1622 |
-
|
| 1623 |
-
|
| 1624 |
-
"""Generate
|
|
|
|
|
|
|
| 1625 |
if CONFIG_AVAILABLE:
|
| 1626 |
try:
|
| 1627 |
domain_name = get_domain_display_name(domain)
|
|
@@ -1630,41 +2092,43 @@ Based on analysis of {len(papers)} relevant papers, several key points emerge:
|
|
| 1630 |
else:
|
| 1631 |
domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
|
| 1632 |
|
| 1633 |
-
bottom_line = f"""**
|
| 1634 |
-
Based on
|
| 1635 |
|
| 1636 |
# Add guideline-specific bottom line
|
| 1637 |
if guideline_info:
|
| 1638 |
if guideline_info.get('guidelines_found'):
|
| 1639 |
-
bottom_line += f"\n\n**Guideline Context:** {len(guideline_info['guidelines_found'])} major guidelines referenced
|
| 1640 |
|
| 1641 |
if guideline_info.get('critical_missing'):
|
| 1642 |
-
missing_list = ', '.join(guideline_info['critical_missing'][:
|
| 1643 |
-
bottom_line += f"\n**
|
| 1644 |
|
| 1645 |
coverage = guideline_info.get('coverage_percentage', 0)
|
| 1646 |
if coverage < 50:
|
| 1647 |
-
bottom_line += f"\n**Evidence
|
| 1648 |
|
| 1649 |
bottom_line += f"""
|
| 1650 |
-
**
|
| 1651 |
-
-
|
| 1652 |
-
-
|
| 1653 |
-
-
|
| 1654 |
-
- {"
|
| 1655 |
-
-
|
| 1656 |
|
| 1657 |
if papers_count > real_papers_count:
|
| 1658 |
-
bottom_line += f"\n\n*Note: Includes {papers_count - real_papers_count} illustrative examples for
|
| 1659 |
|
| 1660 |
return bottom_line
|
| 1661 |
|
| 1662 |
-
def
|
| 1663 |
-
|
| 1664 |
-
|
| 1665 |
-
|
| 1666 |
-
"""Synthesize final answer with
|
| 1667 |
|
|
|
|
|
|
|
| 1668 |
if CONFIG_AVAILABLE:
|
| 1669 |
try:
|
| 1670 |
domain_info = {
|
|
@@ -1685,18 +2149,13 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1685 |
'description': f'Research in {domain.replace("_", " ")}'
|
| 1686 |
})
|
| 1687 |
|
| 1688 |
-
context_info = USER_CONTEXT_INFO.get(user_context, {
|
| 1689 |
-
'name': user_context.title(),
|
| 1690 |
-
'icon': '👤'
|
| 1691 |
-
})
|
| 1692 |
-
|
| 1693 |
# Count real vs demo papers
|
| 1694 |
real_papers = [p for p in papers if not p.get('is_demo', False)]
|
| 1695 |
demo_papers = [p for p in papers if p.get('is_demo', False)]
|
| 1696 |
|
| 1697 |
-
# Format paper citations
|
| 1698 |
paper_citations = []
|
| 1699 |
-
for i, paper in enumerate(papers[:
|
| 1700 |
title = paper.get('title', 'Untitled')
|
| 1701 |
authors = paper.get('authors', [])
|
| 1702 |
year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else ''
|
|
@@ -1705,11 +2164,6 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1705 |
is_demo = paper.get('is_demo', False)
|
| 1706 |
is_preprint = paper.get('is_preprint', False)
|
| 1707 |
|
| 1708 |
-
# Check if paper mentions guidelines
|
| 1709 |
-
text = f"{title} {paper.get('abstract', '')}".lower()
|
| 1710 |
-
has_guidelines = any(keyword in text for keyword in ['guideline', 'recommendation', 'consensus',
|
| 1711 |
-
'ada', 'aha', 'acc', 'esc', 'idsa', 'ats'])
|
| 1712 |
-
|
| 1713 |
# Format authors
|
| 1714 |
if authors and isinstance(authors, list) and len(authors) > 0:
|
| 1715 |
if len(authors) == 1:
|
|
@@ -1726,23 +2180,19 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1726 |
|
| 1727 |
demo_indicator = "📄 " if is_demo else ""
|
| 1728 |
preprint_indicator = "⚡ " if is_preprint else ""
|
| 1729 |
-
guideline_indicator = "📋 " if has_guidelines else ""
|
| 1730 |
|
| 1731 |
if author_str and year:
|
| 1732 |
-
citation += f"\n {demo_indicator}{preprint_indicator}
|
| 1733 |
elif author_str:
|
| 1734 |
-
citation += f"\n {demo_indicator}{preprint_indicator}
|
| 1735 |
else:
|
| 1736 |
-
citation += f"\n {demo_indicator}{preprint_indicator}
|
| 1737 |
|
| 1738 |
if journal:
|
| 1739 |
citation += f"\n Journal: {journal}"
|
| 1740 |
elif source and source != 'unknown':
|
| 1741 |
citation += f"\n Source: {source}"
|
| 1742 |
|
| 1743 |
-
if has_guidelines:
|
| 1744 |
-
citation += f"\n *References clinical guidelines*"
|
| 1745 |
-
|
| 1746 |
paper_citations.append(citation)
|
| 1747 |
|
| 1748 |
# Build guideline summary section
|
|
@@ -1753,13 +2203,6 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1753 |
if guideline_info.get('guidelines_found'):
|
| 1754 |
guideline_summary += f"**✅ Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}\n\n"
|
| 1755 |
|
| 1756 |
-
# Show papers that mention guidelines
|
| 1757 |
-
if guideline_info.get('papers_with_guidelines'):
|
| 1758 |
-
guideline_summary += "**Papers Citing Guidelines:**\n"
|
| 1759 |
-
for paper_info in guideline_info['papers_with_guidelines'][:3]:
|
| 1760 |
-
guideline_summary += f"- {paper_info['title']} ({', '.join(paper_info['guidelines'][:2])})\n"
|
| 1761 |
-
guideline_summary += "\n"
|
| 1762 |
-
|
| 1763 |
if guideline_info.get('critical_missing'):
|
| 1764 |
missing_list = ', '.join(guideline_info['critical_missing'])
|
| 1765 |
guideline_summary += f"**⚠️ Missing Guideline Citations:** {missing_list}\n\n"
|
|
@@ -1767,11 +2210,11 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1767 |
guideline_summary += f"**Coverage Score:** {guideline_info.get('coverage_percentage', 0)}%\n\n"
|
| 1768 |
|
| 1769 |
# Build answer
|
| 1770 |
-
answer = f"""# 🔬 **
|
|
|
|
| 1771 |
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
|
| 1772 |
-
**User Context:** {context_info['name']} {context_info.get('icon', '')}
|
| 1773 |
**Evidence Confidence:** {confidence['level']} ({confidence['overall_score']}/100)
|
| 1774 |
-
**
|
| 1775 |
---
|
| 1776 |
## 📋 **Executive Summary**
|
| 1777 |
{bottom_line}
|
|
@@ -1780,22 +2223,22 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1780 |
{analysis}
|
| 1781 |
---
|
| 1782 |
## 📊 **Supporting Evidence**
|
| 1783 |
-
{chr(10).join(paper_citations)}
|
| 1784 |
---
|
| 1785 |
-
## 🎯 **Key Takeaways**
|
| 1786 |
-
1.
|
| 1787 |
2. Domain-specific considerations for {domain_info['name'].lower()}
|
| 1788 |
-
3.
|
| 1789 |
-
4. {"
|
| 1790 |
-
*Analysis performed
|
| 1791 |
*Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}*"""
|
| 1792 |
|
| 1793 |
return {
|
| 1794 |
"query": query,
|
| 1795 |
"domain": domain,
|
| 1796 |
"domain_info": domain_info,
|
| 1797 |
-
"user_context":
|
| 1798 |
-
"user_context_info":
|
| 1799 |
"answer": answer,
|
| 1800 |
"analysis": analysis,
|
| 1801 |
"bottom_line": bottom_line,
|
|
@@ -1804,27 +2247,27 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1804 |
"demo_papers_used": len(demo_papers),
|
| 1805 |
"confidence_score": confidence,
|
| 1806 |
"guideline_info": guideline_info,
|
| 1807 |
-
"reasoning_method": "
|
| 1808 |
"real_time_search": self.use_real_time,
|
| 1809 |
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
|
| 1810 |
"metrics": {
|
| 1811 |
'response_time': time.time(),
|
| 1812 |
'papers_analyzed': len(papers),
|
| 1813 |
'domain': domain,
|
| 1814 |
-
'user_context':
|
| 1815 |
}
|
| 1816 |
}
|
| 1817 |
|
| 1818 |
def _update_memory(self, query: str, response: Dict[str, Any], domain: str,
|
| 1819 |
-
|
| 1820 |
-
"""Update conversation memory with
|
| 1821 |
if not self.memory:
|
| 1822 |
return
|
| 1823 |
|
| 1824 |
memory_data = {
|
| 1825 |
'query': query,
|
| 1826 |
'domain': domain,
|
| 1827 |
-
'
|
| 1828 |
'papers_used': len(papers),
|
| 1829 |
'real_papers': sum(1 for p in papers if not p.get('is_demo', False)),
|
| 1830 |
'demo_papers': sum(1 for p in papers if p.get('is_demo', False)),
|
|
@@ -1844,8 +2287,10 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1844 |
metadata=memory_data
|
| 1845 |
)
|
| 1846 |
|
| 1847 |
-
def _create_no_results_response(self, query: str, domain: str,
|
| 1848 |
"""Create response when no papers are found"""
|
|
|
|
|
|
|
| 1849 |
if CONFIG_AVAILABLE:
|
| 1850 |
try:
|
| 1851 |
domain_info = {
|
|
@@ -1864,29 +2309,38 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1864 |
})
|
| 1865 |
|
| 1866 |
answer = f"""# 🔍 **Limited Research Found**
|
| 1867 |
-
|
|
|
|
| 1868 |
**Domain:** {domain_info['name']}
|
| 1869 |
-
|
| 1870 |
-
**
|
|
|
|
|
|
|
| 1871 |
1. Try broadening your search terms
|
| 1872 |
-
2. Consider related {domain_info['name']}
|
| 1873 |
-
3. Check spelling of
|
| 1874 |
4. Try a more general domain selection
|
| 1875 |
-
|
| 1876 |
-
-
|
| 1877 |
-
-
|
| 1878 |
-
-
|
| 1879 |
-
|
| 1880 |
-
-
|
| 1881 |
-
|
| 1882 |
-
|
| 1883 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1884 |
|
| 1885 |
return {
|
| 1886 |
"query": query,
|
| 1887 |
"domain": domain,
|
| 1888 |
"domain_info": domain_info,
|
| 1889 |
-
"user_context":
|
|
|
|
| 1890 |
"answer": answer,
|
| 1891 |
"papers_used": 0,
|
| 1892 |
"real_papers_used": 0,
|
|
@@ -1900,8 +2354,10 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1900 |
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
|
| 1901 |
}
|
| 1902 |
|
| 1903 |
-
def _create_error_response(self, query: str, domain: str,
|
| 1904 |
"""Create error response"""
|
|
|
|
|
|
|
| 1905 |
if CONFIG_AVAILABLE:
|
| 1906 |
try:
|
| 1907 |
domain_info = {
|
|
@@ -1920,26 +2376,33 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
|
|
| 1920 |
})
|
| 1921 |
|
| 1922 |
answer = f"""# 🚨 **Analysis Error**
|
| 1923 |
-
|
|
|
|
| 1924 |
**Domain:** {domain_info['name']}
|
| 1925 |
-
|
|
|
|
| 1926 |
**Error:** {error}
|
| 1927 |
-
|
|
|
|
| 1928 |
1. Check your internet connection
|
| 1929 |
-
2. Try a simpler query
|
| 1930 |
-
3. Verify domain selection
|
| 1931 |
4. Contact support if problem persists
|
| 1932 |
-
|
| 1933 |
-
-
|
| 1934 |
-
-
|
| 1935 |
-
-
|
| 1936 |
-
|
|
|
|
|
|
|
|
|
|
| 1937 |
|
| 1938 |
return {
|
| 1939 |
"query": query,
|
| 1940 |
"domain": domain,
|
| 1941 |
"domain_info": domain_info,
|
| 1942 |
-
"user_context":
|
|
|
|
| 1943 |
"answer": answer,
|
| 1944 |
"papers_used": 0,
|
| 1945 |
"real_papers_used": 0,
|
|
@@ -2033,7 +2496,7 @@ Please try again or reformulate your question."""
|
|
| 2033 |
}
|
| 2034 |
|
| 2035 |
def get_engine_status(self) -> Dict[str, Any]:
|
| 2036 |
-
"""Get engine status with
|
| 2037 |
# Calculate average guideline coverage
|
| 2038 |
avg_guideline_coverage = 0
|
| 2039 |
if self.metrics['guideline_coverage']:
|
|
@@ -2042,12 +2505,13 @@ Please try again or reformulate your question."""
|
|
| 2042 |
|
| 2043 |
return {
|
| 2044 |
"engine_name": "Medical Research RAG Engine",
|
| 2045 |
-
"version": "2.
|
| 2046 |
"model": self.model if hasattr(self, 'model') else "Unknown",
|
| 2047 |
-
"features": ["
|
| 2048 |
-
"confidence_scoring", "guideline_detection", "
|
| 2049 |
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
|
| 2050 |
"real_time_search": self.use_real_time,
|
|
|
|
| 2051 |
"guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES),
|
| 2052 |
"metrics": {
|
| 2053 |
"total_queries": self.metrics['total_queries'],
|
|
@@ -2059,7 +2523,7 @@ Please try again or reformulate your question."""
|
|
| 2059 |
"demo_papers_used": self.metrics['demo_papers_used']
|
| 2060 |
},
|
| 2061 |
"domains_supported": len(DOMAIN_INFO),
|
| 2062 |
-
"
|
| 2063 |
}
|
| 2064 |
|
| 2065 |
def clear_memory(self):
|
|
@@ -2075,74 +2539,75 @@ Please try again or reformulate your question."""
|
|
| 2075 |
# TEST FUNCTION
|
| 2076 |
# ============================================================================
|
| 2077 |
|
| 2078 |
-
def
|
| 2079 |
-
"""Test the medical RAG engine with
|
| 2080 |
print("\n" + "=" * 60)
|
| 2081 |
-
print("🧪 TESTING
|
| 2082 |
print("=" * 60)
|
| 2083 |
|
| 2084 |
try:
|
| 2085 |
# Initialize engine
|
| 2086 |
engine = EnhancedRAGEngine(
|
| 2087 |
-
session_id="
|
| 2088 |
model="gpt-oss-120b",
|
| 2089 |
-
use_real_time=
|
| 2090 |
)
|
| 2091 |
|
| 2092 |
-
# Test queries with different
|
| 2093 |
test_cases = [
|
| 2094 |
{
|
| 2095 |
-
"query": "
|
| 2096 |
-
"domain": "
|
| 2097 |
-
"
|
| 2098 |
},
|
| 2099 |
{
|
| 2100 |
-
"query": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2101 |
"domain": "endocrinology",
|
| 2102 |
-
"
|
| 2103 |
},
|
| 2104 |
{
|
| 2105 |
-
"query": "
|
| 2106 |
-
"domain": "
|
| 2107 |
-
"
|
| 2108 |
}
|
| 2109 |
]
|
| 2110 |
|
| 2111 |
-
for i, test_case in enumerate(test_cases
|
| 2112 |
print(f"\n📝 Test Case {i}:")
|
| 2113 |
print(f" Query: '{test_case['query']}'")
|
| 2114 |
print(f" Domain: {test_case['domain']}")
|
| 2115 |
-
print(f"
|
| 2116 |
|
| 2117 |
# Process query
|
| 2118 |
response = engine.answer_research_question(
|
| 2119 |
query=test_case['query'],
|
| 2120 |
domain=test_case['domain'],
|
| 2121 |
-
max_papers=
|
| 2122 |
-
|
| 2123 |
use_fallback=True
|
| 2124 |
)
|
| 2125 |
|
| 2126 |
if response and 'error' not in response:
|
| 2127 |
print(f"\n✅ Test Successful!")
|
|
|
|
| 2128 |
print(f" Papers used: {response.get('papers_used', 0)}")
|
| 2129 |
-
print(f" Real papers: {response.get('real_papers_used', 0)}")
|
| 2130 |
print(f" Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100")
|
| 2131 |
|
| 2132 |
-
# Check
|
| 2133 |
-
|
| 2134 |
-
|
| 2135 |
-
print(f" Guidelines found: {len(guideline_info.get('guidelines_found', []))}")
|
| 2136 |
-
if guideline_info.get('critical_missing'):
|
| 2137 |
-
print(f" Missing guidelines: {', '.join(guideline_info['critical_missing'][:3])}")
|
| 2138 |
-
print(f" Guideline coverage: {guideline_info.get('coverage_percentage', 0)}%")
|
| 2139 |
|
| 2140 |
# Show engine status
|
| 2141 |
status = engine.get_engine_status()
|
| 2142 |
print(f"\n🔧 Engine Status:")
|
| 2143 |
-
print(f"
|
| 2144 |
-
print(f"
|
| 2145 |
-
print(f"
|
| 2146 |
print(f" Total queries: {status['metrics']['total_queries']}")
|
| 2147 |
|
| 2148 |
return True
|
|
@@ -2156,15 +2621,15 @@ def test_medical_rag_engine():
|
|
| 2156 |
|
| 2157 |
if __name__ == "__main__":
|
| 2158 |
# Run test
|
| 2159 |
-
test_result =
|
| 2160 |
|
| 2161 |
if test_result:
|
| 2162 |
print(f"\n{'=' * 60}")
|
| 2163 |
-
print("🎉
|
| 2164 |
-
print("
|
| 2165 |
-
print("
|
|
|
|
| 2166 |
print(" Guideline detection: ✓")
|
| 2167 |
-
print(" Guideline gap analysis: ✓")
|
| 2168 |
print(f"{'=' * 60}")
|
| 2169 |
else:
|
| 2170 |
print("\n❌ Engine test failed")
|
|
|
|
| 1 |
"""
|
| 2 |
rag_engine.py - Production-Ready Medical RAG Engine
|
| 3 |
+
Updated with role-based response handling and improved simple query detection
|
| 4 |
"""
|
| 5 |
|
| 6 |
from typing import List, Dict, Any, Optional, Tuple
|
|
|
|
| 385 |
|
| 386 |
|
| 387 |
# ============================================================================
|
| 388 |
+
# ROLE-BASED REASONING FOR MEDICAL RESEARCH
|
| 389 |
# ============================================================================
|
| 390 |
|
| 391 |
+
class RoleBasedReasoning:
|
| 392 |
+
"""Role-based reasoning technique focused on domain-agnostic, role-appropriate responses"""
|
| 393 |
+
|
| 394 |
+
# Role definitions with domain-agnostic prompts
|
| 395 |
+
ROLE_SYSTEM_PROMPTS = {
|
| 396 |
+
'patient': {
|
| 397 |
+
'name': 'Patient',
|
| 398 |
+
'icon': '🩺',
|
| 399 |
+
'prompt': '''You are helping a patient understand information. Use simple, clear, reassuring language.
|
| 400 |
+
- Focus on practical implications and what they need to know
|
| 401 |
+
- Avoid complex terminology or jargon
|
| 402 |
+
- Emphasize safety and when to seek professional help
|
| 403 |
+
- Be compassionate and supportive
|
| 404 |
+
- Do not provide diagnoses or specific medical advice
|
| 405 |
+
- Explain concepts in everyday terms'''
|
| 406 |
+
},
|
| 407 |
+
'student': {
|
| 408 |
+
'name': 'Student',
|
| 409 |
+
'icon': '🎓',
|
| 410 |
+
'prompt': '''You are teaching a student. Focus on educational value and understanding.
|
| 411 |
+
- Explain foundational concepts and definitions
|
| 412 |
+
- Provide examples and analogies
|
| 413 |
+
- Encourage critical thinking and questions
|
| 414 |
+
- Structure information logically
|
| 415 |
+
- Connect to broader knowledge areas
|
| 416 |
+
- Mention learning resources when helpful'''
|
| 417 |
+
},
|
| 418 |
+
'clinician': {
|
| 419 |
+
'name': 'Clinician',
|
| 420 |
+
'icon': '👨⚕️',
|
| 421 |
+
'prompt': '''You are assisting a healthcare professional. Be concise, actionable, and evidence-based.
|
| 422 |
+
- Focus on practical implications and decision-making
|
| 423 |
+
- Reference guidelines and evidence levels when relevant
|
| 424 |
+
- Consider workflow and implementation
|
| 425 |
+
- Be precise but efficient with time
|
| 426 |
+
- Address risks and benefits clearly
|
| 427 |
+
- Maintain professional tone'''
|
| 428 |
+
},
|
| 429 |
+
'doctor': {
|
| 430 |
+
'name': 'Doctor',
|
| 431 |
+
'icon': '⚕️',
|
| 432 |
+
'prompt': '''You are assisting a physician. Use appropriate terminology and clinical reasoning.
|
| 433 |
+
- Focus on differential diagnosis, treatment options, and management
|
| 434 |
+
- Reference current standards of care and guidelines
|
| 435 |
+
- Consider patient factors and comorbidities
|
| 436 |
+
- Discuss evidence quality and limitations
|
| 437 |
+
- Be thorough but organized
|
| 438 |
+
- Maintain clinical accuracy'''
|
| 439 |
+
},
|
| 440 |
+
'researcher': {
|
| 441 |
+
'name': 'Researcher',
|
| 442 |
+
'icon': '🔬',
|
| 443 |
+
'prompt': '''You are assisting a research scientist. Focus on methodology and evidence.
|
| 444 |
+
- Discuss study designs, methods, and limitations
|
| 445 |
+
- Analyze evidence quality and gaps
|
| 446 |
+
- Consider statistical significance and clinical relevance
|
| 447 |
+
- Reference current literature and trends
|
| 448 |
+
- Discuss implications for future research
|
| 449 |
+
- Maintain scientific rigor'''
|
| 450 |
+
},
|
| 451 |
+
'professor': {
|
| 452 |
+
'name': 'Professor',
|
| 453 |
+
'icon': '📚',
|
| 454 |
+
'prompt': '''You are assisting an academic educator. Focus on knowledge synthesis and pedagogy.
|
| 455 |
+
- Provide comprehensive overviews with context
|
| 456 |
+
- Compare theories, methods, and findings
|
| 457 |
+
- Discuss historical development and future directions
|
| 458 |
+
- Emphasize critical evaluation and synthesis
|
| 459 |
+
- Connect to broader academic discourse
|
| 460 |
+
- Support teaching and learning objectives'''
|
| 461 |
+
},
|
| 462 |
+
'pharmacist': {
|
| 463 |
+
'name': 'Pharmacist',
|
| 464 |
+
'icon': '💊',
|
| 465 |
+
'prompt': '''You are assisting a pharmacy professional. Focus on medications and safety.
|
| 466 |
+
- Discuss drug mechanisms, interactions, and pharmacokinetics
|
| 467 |
+
- Emphasize safety profiles and monitoring
|
| 468 |
+
- Consider dosing, administration, and compliance
|
| 469 |
+
- Address patient counseling points
|
| 470 |
+
- Reference formularies and guidelines
|
| 471 |
+
- Maintain focus on medication optimization'''
|
| 472 |
+
},
|
| 473 |
+
'general': {
|
| 474 |
+
'name': 'General User',
|
| 475 |
+
'icon': '👤',
|
| 476 |
+
'prompt': '''You are assisting a general user. Provide balanced, accessible information.
|
| 477 |
+
- Adjust complexity based on the query
|
| 478 |
+
- Be helpful and informative without overwhelming
|
| 479 |
+
- Provide context and practical implications
|
| 480 |
+
- Use clear language with minimal jargon
|
| 481 |
+
- Consider diverse backgrounds and knowledge levels
|
| 482 |
+
- Maintain neutral, objective tone'''
|
| 483 |
+
}
|
| 484 |
+
}
|
| 485 |
|
| 486 |
@staticmethod
|
| 487 |
+
def create_role_prompt(query: str, domain: str, role: str,
|
| 488 |
+
papers_count: int = 0, guideline_info: Dict = None) -> str:
|
| 489 |
+
"""Create role-appropriate prompt with domain-agnostic focus"""
|
| 490 |
+
|
| 491 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 492 |
+
|
| 493 |
+
# Simple query detection - greetings and basic questions
|
| 494 |
+
simple_queries = ['hi', 'hello', 'hey', 'greetings', 'good morning', 'good afternoon',
|
| 495 |
+
'good evening', 'how are you', "what's up", 'sup']
|
| 496 |
+
query_lower = query.lower().strip()
|
| 497 |
+
|
| 498 |
+
if query_lower in simple_queries or len(query.split()) <= 2:
|
| 499 |
+
# Simple greeting or short query
|
| 500 |
+
if role == 'patient':
|
| 501 |
+
return f"""You are helping a patient. Use warm, reassuring tone.
|
| 502 |
+
|
| 503 |
+
Query: {query}
|
| 504 |
+
|
| 505 |
+
Respond with a friendly greeting and invitation to ask questions. Keep it brief and welcoming.
|
| 506 |
+
Example: "Hello! I'm here to help answer your health questions in simple, clear terms. What would you like to know?""""
|
| 507 |
+
|
| 508 |
+
elif role == 'student':
|
| 509 |
+
return f"""You are teaching a student.
|
| 510 |
+
|
| 511 |
+
Query: {query}
|
| 512 |
+
|
| 513 |
+
Respond with an encouraging greeting that invites learning questions.
|
| 514 |
+
Example: "Hi there! I'm here to help you learn about medical topics. What are you curious about today?""""
|
| 515 |
+
|
| 516 |
+
elif role in ['clinician', 'doctor']:
|
| 517 |
+
return f"""You are assisting a healthcare professional.
|
| 518 |
+
|
| 519 |
+
Query: {query}
|
| 520 |
+
|
| 521 |
+
Respond with a professional greeting appropriate for clinical setting.
|
| 522 |
+
Example: "Hello. I'm ready to assist with evidence-based medical information. How can I help you today?""""
|
| 523 |
+
|
| 524 |
+
elif role in ['researcher', 'professor']:
|
| 525 |
+
return f"""You are assisting an academic professional.
|
| 526 |
+
|
| 527 |
+
Query: {query}
|
| 528 |
|
| 529 |
+
Respond with a scholarly greeting that invites research questions.
|
| 530 |
+
Example: "Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?""""
|
| 531 |
+
|
| 532 |
+
elif role == 'pharmacist':
|
| 533 |
+
return f"""You are assisting a pharmacy professional.
|
| 534 |
+
|
| 535 |
+
Query: {query}
|
| 536 |
+
|
| 537 |
+
Respond with a professional greeting focused on medication information.
|
| 538 |
+
Example: "Hello. I can help with medication-related questions and information. How can I assist you today?""""
|
| 539 |
+
|
| 540 |
+
else: # general
|
| 541 |
+
return f"""You are assisting a general user.
|
| 542 |
+
|
| 543 |
+
Query: {query}
|
| 544 |
+
|
| 545 |
+
Respond with a friendly, welcoming greeting.
|
| 546 |
+
Example: "Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?""""
|
| 547 |
+
|
| 548 |
+
# For substantive queries, use role-appropriate analysis
|
| 549 |
+
role_prompt = role_info['prompt']
|
| 550 |
+
|
| 551 |
+
# Domain-agnostic instruction
|
| 552 |
+
domain_agnostic = f"""DOMAIN-AGNOSTIC APPROACH:
|
| 553 |
+
- This system can answer questions from ANY domain (tech, finance, health, education, general)
|
| 554 |
+
- Adapt your expertise to the query domain naturally
|
| 555 |
+
- Do NOT force medical framing on non-medical questions
|
| 556 |
+
- Only emphasize citations/guidelines when the query domain and role demand it
|
| 557 |
+
- Use appropriate terminology for the query's domain"""
|
| 558 |
+
|
| 559 |
+
# Build comprehensive prompt
|
| 560 |
+
prompt = f"""ROLE: {role_info['name']} {role_info['icon']}
|
| 561 |
+
{role_prompt}
|
| 562 |
+
|
| 563 |
+
{domain_agnostic}
|
| 564 |
|
| 565 |
+
QUERY: {query}
|
| 566 |
+
QUERY DOMAIN CONTEXT: {domain} (adapt your response appropriately)
|
| 567 |
+
|
| 568 |
+
RESPONSE GUIDELINES:
|
| 569 |
+
1. **Role-Appropriate Depth:**
|
| 570 |
+
- {role}: Adjust response complexity for {role_info['name'].lower()} needs
|
| 571 |
+
|
| 572 |
+
2. **Terminology Level:**
|
| 573 |
+
- Use language appropriate for {role_info['name'].lower()} understanding
|
| 574 |
+
|
| 575 |
+
3. **Evidence Awareness:**
|
| 576 |
+
- { 'Reference evidence/guidelines when domain-appropriate' if role in ['clinician', 'doctor', 'researcher'] else 'Mention evidence when helpful, not required' }
|
| 577 |
+
|
| 578 |
+
4. **Safety & Practicality:**
|
| 579 |
+
- { 'Include appropriate disclaimers' if role == 'patient' else 'Maintain professional standards' }
|
| 580 |
+
|
| 581 |
+
5. **Response Structure:**
|
| 582 |
+
- Organize information logically for {role_info['name'].lower()} understanding
|
| 583 |
+
- Prioritize most relevant information first
|
| 584 |
+
- Keep response focused and actionable"""
|
| 585 |
+
|
| 586 |
+
# Add research context if we have papers
|
| 587 |
+
if papers_count > 0:
|
| 588 |
+
prompt += f"\n\nRESEARCH CONTEXT: Analyzing {papers_count} relevant sources"
|
| 589 |
+
|
| 590 |
+
# Add guideline context if available
|
| 591 |
if guideline_info:
|
| 592 |
if guideline_info.get('guidelines_found'):
|
| 593 |
+
prompt += f"\nGUIDELINES REFERENCED: {', '.join(guideline_info['guidelines_found'])}"
|
| 594 |
if guideline_info.get('critical_missing'):
|
| 595 |
+
prompt += f"\nGUIDELINE GAPS: Missing {', '.join(guideline_info['critical_missing'][:2])}"
|
| 596 |
|
| 597 |
+
prompt += f"\n\nPlease provide a {role_info['name'].lower()}-appropriate response to: {query}"
|
| 598 |
+
|
| 599 |
+
return prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
|
| 601 |
|
| 602 |
# ============================================================================
|
|
|
|
| 1315 |
print(f"⚠️ LLM not available - using fallback mode: {e}")
|
| 1316 |
self.llm = None
|
| 1317 |
|
| 1318 |
+
self.role_reasoning = RoleBasedReasoning() # NEW: Role-based reasoning
|
| 1319 |
self.ranker = PaperRanker()
|
| 1320 |
self.confidence_scorer = ConfidenceScorer()
|
| 1321 |
self.context_detector = UserContextDetector()
|
|
|
|
| 1357 |
else:
|
| 1358 |
print(" 📄 Real paper fetching: DISABLED (using demo papers)")
|
| 1359 |
print(" 📋 Guideline detection: ENABLED")
|
| 1360 |
+
print(" 👤 Role-based responses: ENABLED")
|
| 1361 |
|
| 1362 |
def answer_research_question(self,
|
| 1363 |
query: str,
|
|
|
|
| 1366 |
use_memory: bool = True,
|
| 1367 |
user_context: str = "auto",
|
| 1368 |
use_fallback: bool = False,
|
| 1369 |
+
role: str = "general", # NEW: Explicit role parameter
|
| 1370 |
+
role_system_prompt: str = None, # NEW: Custom role prompt from frontend
|
| 1371 |
**kwargs) -> Dict[str, Any]:
|
| 1372 |
+
"""Answer medical research questions with role-based reasoning"""
|
| 1373 |
|
| 1374 |
start_time = time.time()
|
| 1375 |
self.metrics['total_queries'] += 1
|
|
|
|
| 1377 |
|
| 1378 |
print(f"\n🔍 Processing query: '{query}'")
|
| 1379 |
print(f" Domain: {domain}")
|
| 1380 |
+
print(f" Role: {role}")
|
| 1381 |
print(f" Max papers: {max_papers}")
|
| 1382 |
print(f" Real-time search: {self.use_real_time}")
|
| 1383 |
|
| 1384 |
try:
|
| 1385 |
+
# Auto-detect user context if needed (backward compatibility)
|
| 1386 |
if user_context == "auto":
|
| 1387 |
user_context = self.context_detector.detect_context(query, domain)
|
| 1388 |
|
| 1389 |
self.metrics['user_contexts'][user_context] += 1
|
| 1390 |
|
| 1391 |
+
# NEW: Check for simple queries first (greetings, basic questions)
|
| 1392 |
+
simple_response = self._handle_simple_query(query, domain, role)
|
| 1393 |
+
if simple_response:
|
| 1394 |
+
return simple_response
|
| 1395 |
+
|
| 1396 |
+
# Check if query requires research analysis
|
| 1397 |
+
requires_research = self._requires_research_analysis(query)
|
| 1398 |
+
if not requires_research:
|
| 1399 |
+
# For non-research queries, provide direct role-appropriate response
|
| 1400 |
+
return self._handle_direct_query(query, domain, role)
|
| 1401 |
+
|
| 1402 |
# Retrieve papers using MedicalResearchEngine
|
| 1403 |
print("📚 Retrieving relevant papers...")
|
| 1404 |
papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback)
|
| 1405 |
|
| 1406 |
if not papers:
|
| 1407 |
print("⚠️ No papers found, creating fallback response...")
|
| 1408 |
+
return self._create_no_results_response(query, domain, role)
|
| 1409 |
|
| 1410 |
# Detect guideline citations
|
| 1411 |
print("📋 Detecting guideline citations...")
|
|
|
|
| 1422 |
})
|
| 1423 |
|
| 1424 |
# Rank papers
|
| 1425 |
+
ranked_papers = self.ranker.rank_papers(papers, query, domain, role)
|
| 1426 |
print(f"📊 Papers found: {len(ranked_papers)}")
|
| 1427 |
|
| 1428 |
# Track paper sources
|
|
|
|
| 1442 |
|
| 1443 |
# Calculate confidence with guideline consideration
|
| 1444 |
confidence = self.confidence_scorer.calculate_confidence(
|
| 1445 |
+
ranked_papers, query, "summary", role, domain, guideline_info
|
| 1446 |
)
|
| 1447 |
|
| 1448 |
+
# Generate analysis using role-based reasoning
|
| 1449 |
+
print("🧠 Generating role-based analysis...")
|
| 1450 |
+
analysis = self._generate_role_based_analysis(
|
| 1451 |
+
query, domain, role, ranked_papers, guideline_info, role_system_prompt
|
| 1452 |
)
|
| 1453 |
|
| 1454 |
+
# Generate clinical bottom line with role awareness
|
| 1455 |
+
bottom_line = self._generate_role_bottom_line(
|
| 1456 |
+
query, domain, role, len(ranked_papers), real_papers, guideline_info
|
| 1457 |
)
|
| 1458 |
|
| 1459 |
# Synthesize final answer
|
| 1460 |
+
final_answer = self._synthesize_role_answer(
|
| 1461 |
+
query, domain, role, analysis, ranked_papers,
|
| 1462 |
bottom_line, confidence, guideline_info
|
| 1463 |
)
|
| 1464 |
|
| 1465 |
# Update memory
|
| 1466 |
if use_memory and self.memory:
|
| 1467 |
+
self._update_memory(query, final_answer, domain, role, ranked_papers, guideline_info)
|
| 1468 |
|
| 1469 |
# Update metrics
|
| 1470 |
response_time = time.time() - start_time
|
|
|
|
| 1485 |
print(f"❌ Error in research analysis: {e}")
|
| 1486 |
import traceback
|
| 1487 |
traceback.print_exc()
|
| 1488 |
+
return self._create_error_response(query, domain, role, str(e))
|
| 1489 |
+
|
| 1490 |
+
def _handle_simple_query(self, query: str, domain: str, role: str) -> Optional[Dict[str, Any]]:
|
| 1491 |
+
"""Handle simple queries like greetings with role-appropriate responses"""
|
| 1492 |
+
query_lower = query.lower().strip()
|
| 1493 |
+
|
| 1494 |
+
# Simple greetings
|
| 1495 |
+
simple_greetings = ['hi', 'hello', 'hey', 'greetings', 'good morning',
|
| 1496 |
+
'good afternoon', 'good evening', 'howdy']
|
| 1497 |
+
|
| 1498 |
+
if query_lower in simple_greetings:
|
| 1499 |
+
print(" 👋 Detected simple greeting")
|
| 1500 |
+
return self._create_greeting_response(query, domain, role)
|
| 1501 |
+
|
| 1502 |
+
# Very short queries (1-2 words) that aren't research questions
|
| 1503 |
+
if len(query.split()) <= 2 and not self._looks_like_research_query(query):
|
| 1504 |
+
print(" 💬 Detected simple query")
|
| 1505 |
+
return self._create_simple_response(query, domain, role)
|
| 1506 |
+
|
| 1507 |
+
return None
|
| 1508 |
+
|
| 1509 |
+
def _looks_like_research_query(self, query: str) -> bool:
|
| 1510 |
+
"""Check if query looks like a research question"""
|
| 1511 |
+
query_lower = query.lower()
|
| 1512 |
+
|
| 1513 |
+
# Research question indicators
|
| 1514 |
+
research_indicators = [
|
| 1515 |
+
'compare', 'difference', 'similar', 'contrast', 'analyze', 'analysis',
|
| 1516 |
+
'study', 'research', 'evidence', 'paper', 'article', 'trial', 'clinical',
|
| 1517 |
+
'method', 'approach', 'technique', 'treatment', 'therapy', 'diagnosis',
|
| 1518 |
+
'prognosis', 'outcome', 'efficacy', 'effectiveness', 'safety', 'risk',
|
| 1519 |
+
'benefit', 'recommendation', 'guideline', 'standard', 'protocol'
|
| 1520 |
+
]
|
| 1521 |
+
|
| 1522 |
+
# Check if query contains research indicators
|
| 1523 |
+
for indicator in research_indicators:
|
| 1524 |
+
if indicator in query_lower:
|
| 1525 |
+
return True
|
| 1526 |
+
|
| 1527 |
+
# Check question words
|
| 1528 |
+
question_words = ['what', 'why', 'how', 'when', 'where', 'which', 'who']
|
| 1529 |
+
if any(query_lower.startswith(word) for word in question_words):
|
| 1530 |
+
# Check if it's a complex question (more than basic)
|
| 1531 |
+
if len(query.split()) > 3:
|
| 1532 |
+
return True
|
| 1533 |
+
|
| 1534 |
+
return False
|
| 1535 |
+
|
| 1536 |
+
def _requires_research_analysis(self, query: str) -> bool:
|
| 1537 |
+
"""Determine if query requires full research analysis"""
|
| 1538 |
+
query_lower = query.lower().strip()
|
| 1539 |
+
|
| 1540 |
+
# Definitely simple queries
|
| 1541 |
+
simple_patterns = [
|
| 1542 |
+
r'^hi$', r'^hello$', r'^hey$', r'^greetings$',
|
| 1543 |
+
r'^good morning$', r'^good afternoon$', r'^good evening$',
|
| 1544 |
+
r'^how are you$', r"^what's up$", r'^sup$',
|
| 1545 |
+
r'^thanks$', r'^thank you$', r'^bye$', r'^goodbye$'
|
| 1546 |
+
]
|
| 1547 |
+
|
| 1548 |
+
for pattern in simple_patterns:
|
| 1549 |
+
if re.match(pattern, query_lower):
|
| 1550 |
+
return False
|
| 1551 |
+
|
| 1552 |
+
# Check if it's a substantive question
|
| 1553 |
+
if len(query.split()) <= 2 and not self._looks_like_research_query(query):
|
| 1554 |
+
return False
|
| 1555 |
+
|
| 1556 |
+
return True
|
| 1557 |
+
|
| 1558 |
+
def _create_greeting_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
|
| 1559 |
+
"""Create role-appropriate greeting response"""
|
| 1560 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 1561 |
+
|
| 1562 |
+
# Role-specific greetings
|
| 1563 |
+
greetings = {
|
| 1564 |
+
'patient': "👋 Hello! I'm here to help you understand health topics in simple, clear terms. What would you like to know?",
|
| 1565 |
+
'student': "👋 Hi there! I'm here to help you learn about medical topics. What are you curious about today?",
|
| 1566 |
+
'clinician': "👋 Hello. I'm ready to assist with evidence-based medical information. How can I help you today?",
|
| 1567 |
+
'doctor': "👋 Hello, doctor. I'm available to discuss clinical questions and evidence. What would you like to explore?",
|
| 1568 |
+
'researcher': "👋 Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?",
|
| 1569 |
+
'professor': "👋 Hello. I can assist with academic discussions and evidence synthesis. What topic interests you?",
|
| 1570 |
+
'pharmacist': "👋 Hello. I can help with medication-related questions and information. How can I assist you today?",
|
| 1571 |
+
'general': "👋 Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?"
|
| 1572 |
+
}
|
| 1573 |
+
|
| 1574 |
+
greeting = greetings.get(role, greetings['general'])
|
| 1575 |
+
|
| 1576 |
+
if CONFIG_AVAILABLE:
|
| 1577 |
+
try:
|
| 1578 |
+
domain_info = {
|
| 1579 |
+
'name': get_domain_display_name(domain),
|
| 1580 |
+
'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
|
| 1581 |
+
}
|
| 1582 |
+
except:
|
| 1583 |
+
domain_info = DOMAIN_INFO.get(domain, {
|
| 1584 |
+
'name': domain.replace('_', ' ').title(),
|
| 1585 |
+
'icon': '⚕️'
|
| 1586 |
+
})
|
| 1587 |
+
else:
|
| 1588 |
+
domain_info = DOMAIN_INFO.get(domain, {
|
| 1589 |
+
'name': domain.replace('_', ' ').title(),
|
| 1590 |
+
'icon': '⚕️'
|
| 1591 |
+
})
|
| 1592 |
+
|
| 1593 |
+
answer = f"""# {greeting}
|
| 1594 |
+
|
| 1595 |
+
**Role:** {role_info['name']} {role_info['icon']}
|
| 1596 |
+
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
|
| 1597 |
+
|
| 1598 |
+
Feel free to ask me anything! I'll provide information tailored to your needs as a {role_info['name'].lower()}."""
|
| 1599 |
+
|
| 1600 |
+
return {
|
| 1601 |
+
"query": query,
|
| 1602 |
+
"domain": domain,
|
| 1603 |
+
"domain_info": domain_info,
|
| 1604 |
+
"user_context": role,
|
| 1605 |
+
"user_context_info": role_info,
|
| 1606 |
+
"answer": answer,
|
| 1607 |
+
"analysis": greeting,
|
| 1608 |
+
"bottom_line": greeting,
|
| 1609 |
+
"papers_used": 0,
|
| 1610 |
+
"real_papers_used": 0,
|
| 1611 |
+
"demo_papers_used": 0,
|
| 1612 |
+
"confidence_score": {
|
| 1613 |
+
'overall_score': 95.0,
|
| 1614 |
+
'level': 'HIGH 🟢',
|
| 1615 |
+
'explanation': 'Simple greeting response'
|
| 1616 |
+
},
|
| 1617 |
+
"guideline_info": None,
|
| 1618 |
+
"reasoning_method": "greeting",
|
| 1619 |
+
"real_time_search": self.use_real_time,
|
| 1620 |
+
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
|
| 1621 |
+
"metrics": {
|
| 1622 |
+
'response_time': time.time(),
|
| 1623 |
+
'papers_analyzed': 0,
|
| 1624 |
+
'domain': domain,
|
| 1625 |
+
'user_context': role
|
| 1626 |
+
}
|
| 1627 |
+
}
|
| 1628 |
+
|
| 1629 |
+
def _create_simple_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
|
| 1630 |
+
"""Create role-appropriate response for simple queries"""
|
| 1631 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 1632 |
+
|
| 1633 |
+
# Generate simple, direct response
|
| 1634 |
+
simple_responses = {
|
| 1635 |
+
'patient': f"I'd be happy to help with '{query}'. Could you tell me a bit more about what you're looking for?",
|
| 1636 |
+
'student': f"That's an interesting topic! To help you best, could you provide more details about what you'd like to know regarding '{query}'?",
|
| 1637 |
+
'clinician': f"Regarding '{query}', I can provide evidence-based information. Please share more specifics about your clinical question.",
|
| 1638 |
+
'doctor': f"For '{query}', I can offer medical information. Could you elaborate on the clinical context or specific aspects you're interested in?",
|
| 1639 |
+
'researcher': f"On the topic of '{query}', I can discuss research perspectives. What specific aspect would you like to explore?",
|
| 1640 |
+
'professor': f"Regarding '{query}', I can provide academic perspectives. What particular angle or detail would you like to discuss?",
|
| 1641 |
+
'pharmacist': f"About '{query}', I can offer medication-related information. Could you specify what you'd like to know?",
|
| 1642 |
+
'general': f"I can help with information about '{query}'. Could you provide more details about what specifically you're interested in?"
|
| 1643 |
+
}
|
| 1644 |
+
|
| 1645 |
+
response = simple_responses.get(role, simple_responses['general'])
|
| 1646 |
+
|
| 1647 |
+
if CONFIG_AVAILABLE:
|
| 1648 |
+
try:
|
| 1649 |
+
domain_info = {
|
| 1650 |
+
'name': get_domain_display_name(domain),
|
| 1651 |
+
'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
|
| 1652 |
+
}
|
| 1653 |
+
except:
|
| 1654 |
+
domain_info = DOMAIN_INFO.get(domain, {
|
| 1655 |
+
'name': domain.replace('_', ' ').title(),
|
| 1656 |
+
'icon': '⚕️'
|
| 1657 |
+
})
|
| 1658 |
+
else:
|
| 1659 |
+
domain_info = DOMAIN_INFO.get(domain, {
|
| 1660 |
+
'name': domain.replace('_', ' ').title(),
|
| 1661 |
+
'icon': '⚕️'
|
| 1662 |
+
})
|
| 1663 |
+
|
| 1664 |
+
answer = f"""# 💬 **Response**
|
| 1665 |
+
|
| 1666 |
+
**Role:** {role_info['name']} {role_info['icon']}
|
| 1667 |
+
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
|
| 1668 |
+
|
| 1669 |
+
{response}
|
| 1670 |
+
|
| 1671 |
+
*Tip: For more detailed information, try asking a more specific question!*"""
|
| 1672 |
+
|
| 1673 |
+
return {
|
| 1674 |
+
"query": query,
|
| 1675 |
+
"domain": domain,
|
| 1676 |
+
"domain_info": domain_info,
|
| 1677 |
+
"user_context": role,
|
| 1678 |
+
"user_context_info": role_info,
|
| 1679 |
+
"answer": answer,
|
| 1680 |
+
"analysis": response,
|
| 1681 |
+
"bottom_line": response,
|
| 1682 |
+
"papers_used": 0,
|
| 1683 |
+
"real_papers_used": 0,
|
| 1684 |
+
"demo_papers_used": 0,
|
| 1685 |
+
"confidence_score": {
|
| 1686 |
+
'overall_score': 85.0,
|
| 1687 |
+
'level': 'HIGH 🟢',
|
| 1688 |
+
'explanation': 'Simple query response'
|
| 1689 |
+
},
|
| 1690 |
+
"guideline_info": None,
|
| 1691 |
+
"reasoning_method": "simple_response",
|
| 1692 |
+
"real_time_search": self.use_real_time,
|
| 1693 |
+
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
|
| 1694 |
+
"metrics": {
|
| 1695 |
+
'response_time': time.time(),
|
| 1696 |
+
'papers_analyzed': 0,
|
| 1697 |
+
'domain': domain,
|
| 1698 |
+
'user_context': role
|
| 1699 |
+
}
|
| 1700 |
+
}
|
| 1701 |
+
|
| 1702 |
+
def _handle_direct_query(self, query: str, domain: str, role: str) -> Dict[str, Any]:
|
| 1703 |
+
"""Handle direct queries without research papers"""
|
| 1704 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 1705 |
+
|
| 1706 |
+
# Use LLM for direct response if available
|
| 1707 |
+
if self.llm:
|
| 1708 |
+
try:
|
| 1709 |
+
prompt = self.role_reasoning.create_role_prompt(query, domain, role, 0, None)
|
| 1710 |
+
|
| 1711 |
+
response = self.llm.generate(
|
| 1712 |
+
prompt,
|
| 1713 |
+
system_message=f"You are assisting a {role_info['name'].lower()}. Provide helpful, accurate information.",
|
| 1714 |
+
max_tokens=1000
|
| 1715 |
+
)
|
| 1716 |
+
|
| 1717 |
+
# Clean up response
|
| 1718 |
+
response = response.strip()
|
| 1719 |
+
if not response:
|
| 1720 |
+
response = f"I'd be happy to help with '{query}'. Could you provide more details about what specifically you're looking for?"
|
| 1721 |
+
|
| 1722 |
+
except Exception as e:
|
| 1723 |
+
print(f"⚠️ LLM direct response failed: {e}")
|
| 1724 |
+
response = f"I can help with information about '{query}'. Please feel free to ask more specific questions!"
|
| 1725 |
+
else:
|
| 1726 |
+
response = f"I'd be happy to discuss '{query}'. What specific aspect would you like to know more about?"
|
| 1727 |
+
|
| 1728 |
+
if CONFIG_AVAILABLE:
|
| 1729 |
+
try:
|
| 1730 |
+
domain_info = {
|
| 1731 |
+
'name': get_domain_display_name(domain),
|
| 1732 |
+
'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
|
| 1733 |
+
}
|
| 1734 |
+
except:
|
| 1735 |
+
domain_info = DOMAIN_INFO.get(domain, {
|
| 1736 |
+
'name': domain.replace('_', ' ').title(),
|
| 1737 |
+
'icon': '⚕️'
|
| 1738 |
+
})
|
| 1739 |
+
else:
|
| 1740 |
+
domain_info = DOMAIN_INFO.get(domain, {
|
| 1741 |
+
'name': domain.replace('_', ' ').title(),
|
| 1742 |
+
'icon': '⚕️'
|
| 1743 |
+
})
|
| 1744 |
+
|
| 1745 |
+
answer = f"""# 💬 **Response**
|
| 1746 |
+
|
| 1747 |
+
**Role:** {role_info['name']} {role_info['icon']}
|
| 1748 |
+
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
|
| 1749 |
+
|
| 1750 |
+
{response}
|
| 1751 |
+
|
| 1752 |
+
*Note: This is a direct response. For evidence-based research analysis with papers, please ask a more specific research question.*"""
|
| 1753 |
+
|
| 1754 |
+
return {
|
| 1755 |
+
"query": query,
|
| 1756 |
+
"domain": domain,
|
| 1757 |
+
"domain_info": domain_info,
|
| 1758 |
+
"user_context": role,
|
| 1759 |
+
"user_context_info": role_info,
|
| 1760 |
+
"answer": answer,
|
| 1761 |
+
"analysis": response,
|
| 1762 |
+
"bottom_line": response,
|
| 1763 |
+
"papers_used": 0,
|
| 1764 |
+
"real_papers_used": 0,
|
| 1765 |
+
"demo_papers_used": 0,
|
| 1766 |
+
"confidence_score": {
|
| 1767 |
+
'overall_score': 80.0,
|
| 1768 |
+
'level': 'HIGH 🟢',
|
| 1769 |
+
'explanation': 'Direct query response without papers'
|
| 1770 |
+
},
|
| 1771 |
+
"guideline_info": None,
|
| 1772 |
+
"reasoning_method": "direct_response",
|
| 1773 |
+
"real_time_search": self.use_real_time,
|
| 1774 |
+
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
|
| 1775 |
+
"metrics": {
|
| 1776 |
+
'response_time': time.time(),
|
| 1777 |
+
'papers_analyzed': 0,
|
| 1778 |
+
'domain': domain,
|
| 1779 |
+
'user_context': role
|
| 1780 |
+
}
|
| 1781 |
+
}
|
| 1782 |
|
| 1783 |
def _retrieve_real_papers(self, query: str, domain: str, max_papers: int,
|
| 1784 |
use_fallback: bool = False) -> List[Dict]:
|
|
|
|
| 1976 |
|
| 1977 |
return papers
|
| 1978 |
|
| 1979 |
+
def _generate_role_based_analysis(self, query: str, domain: str, role: str,
|
| 1980 |
+
papers: List[Dict], guideline_info: Dict = None,
|
| 1981 |
+
custom_role_prompt: str = None) -> str:
|
| 1982 |
+
"""Generate role-based analysis using LLM if available"""
|
| 1983 |
|
| 1984 |
if not self.llm:
|
| 1985 |
+
return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)
|
| 1986 |
|
| 1987 |
+
# Create role-based prompt
|
| 1988 |
+
prompt = self.role_reasoning.create_role_prompt(
|
| 1989 |
+
query, domain, role, len(papers), guideline_info
|
| 1990 |
)
|
| 1991 |
|
| 1992 |
+
# Add paper information for research context
|
| 1993 |
+
if papers:
|
| 1994 |
+
paper_info = "\n".join([
|
| 1995 |
+
f"{i + 1}. {p.get('title', 'Untitled')} ({p.get('source', 'Unknown')})"
|
| 1996 |
+
for i, p in enumerate(papers[:3])
|
| 1997 |
+
])
|
| 1998 |
+
prompt += f"\n\n**Relevant Sources:**\n{paper_info}"
|
| 1999 |
|
| 2000 |
# Add demo paper disclaimer if any demo papers
|
| 2001 |
demo_count = sum(1 for p in papers if p.get('is_demo', False))
|
| 2002 |
if demo_count > 0:
|
| 2003 |
+
prompt += f"\n\nNote: {demo_count} illustrative examples included for context."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2004 |
|
| 2005 |
try:
|
| 2006 |
+
# Use custom role prompt if provided, otherwise use default
|
| 2007 |
+
system_message = custom_role_prompt if custom_role_prompt else f"You are assisting a {role}. Provide helpful, accurate information."
|
| 2008 |
+
|
| 2009 |
analysis = self.llm.generate(
|
| 2010 |
+
prompt,
|
| 2011 |
+
system_message=system_message,
|
| 2012 |
+
max_tokens=2000
|
| 2013 |
)
|
| 2014 |
return analysis
|
| 2015 |
except Exception as e:
|
| 2016 |
+
print(f"⚠️ LLM role-based analysis failed: {e}")
|
| 2017 |
+
return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)
|
| 2018 |
|
| 2019 |
+
def _create_fallback_role_analysis(self, query: str, domain: str, role: str,
|
| 2020 |
+
papers: List[Dict], guideline_info: Dict = None) -> str:
|
| 2021 |
"""Create fallback analysis when LLM is unavailable"""
|
| 2022 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 2023 |
+
|
| 2024 |
if CONFIG_AVAILABLE:
|
| 2025 |
try:
|
| 2026 |
domain_name = get_domain_display_name(domain)
|
|
|
|
| 2033 |
real_papers = [p for p in papers if not p.get('is_demo', False)]
|
| 2034 |
demo_papers = [p for p in papers if p.get('is_demo', False)]
|
| 2035 |
|
| 2036 |
+
analysis = f"""**{role_info['name']}-Focused Analysis**
|
|
|
|
|
|
|
| 2037 |
**Query:** {query}
|
| 2038 |
+
**Domain Context:** {domain_name}
|
| 2039 |
+
**Role Perspective:** {role_info['name']}"""
|
| 2040 |
|
| 2041 |
# Add guideline information
|
| 2042 |
if guideline_info:
|
|
|
|
| 2046 |
analysis += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}"
|
| 2047 |
|
| 2048 |
analysis += f"""
|
| 2049 |
+
**Key Information for {role_info['name']}:**
|
| 2050 |
+
Based on analysis of {len(papers)} relevant sources ({len(real_papers)} real, {len(demo_papers)} illustrative):
|
|
|
|
| 2051 |
|
| 2052 |
+
1. **{role_info['name']}-Relevant Insights:**
|
| 2053 |
+
- Information tailored to {role_info['name'].lower()} needs and perspective
|
| 2054 |
+
- Practical implications for {role_info['name'].lower()} context
|
| 2055 |
+
- Actionable takeaways appropriate for this role
|
|
|
|
| 2056 |
|
| 2057 |
+
2. **Domain Context:**
|
| 2058 |
+
- Considerations specific to {domain_name}
|
| 2059 |
+
- Relevant standards and approaches in this field
|
| 2060 |
+
- Important context for application
|
| 2061 |
+
|
| 2062 |
+
3. **Evidence Considerations:**
|
| 2063 |
+
- {len(papers)} sources analyzed
|
| 2064 |
+
- Quality and relevance assessed for {role_info['name'].lower()} needs
|
| 2065 |
+
- {"Guideline awareness as noted above" if guideline_info else "Standard evidence considerations"}
|
| 2066 |
+
|
| 2067 |
+
**Recommendations for {role_info['name']}:**
|
| 2068 |
+
- Apply information within {role_info['name'].lower()} role context
|
| 2069 |
+
- Consider individual circumstances and specific needs
|
| 2070 |
+
- {"Consult referenced guidelines as appropriate" if guideline_info and guideline_info.get('guidelines_found') else "Reference standard practices"}
|
| 2071 |
+
- Seek additional information for specific cases
|
| 2072 |
+
- Integrate with professional judgment and experience
|
| 2073 |
+
|
| 2074 |
+
*Note: This analysis is tailored for {role_info['name'].lower()} perspective. For other perspectives, different considerations may apply.*"""
|
| 2075 |
|
| 2076 |
if demo_papers:
|
| 2077 |
+
analysis += f"\n\n*Includes {len(demo_papers)} illustrative examples for comprehensive analysis.*"
|
| 2078 |
|
| 2079 |
return analysis
|
| 2080 |
|
| 2081 |
+
def _generate_role_bottom_line(self, query: str, domain: str, role: str,
|
| 2082 |
+
papers_count: int, real_papers_count: int,
|
| 2083 |
+
guideline_info: Dict = None) -> str:
|
| 2084 |
+
"""Generate role-appropriate bottom line"""
|
| 2085 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 2086 |
+
|
| 2087 |
if CONFIG_AVAILABLE:
|
| 2088 |
try:
|
| 2089 |
domain_name = get_domain_display_name(domain)
|
|
|
|
| 2092 |
else:
|
| 2093 |
domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
|
| 2094 |
|
| 2095 |
+
bottom_line = f"""**Bottom Line for {role_info['name']}:**
|
| 2096 |
+
Based on {papers_count} sources in {domain_name} ({real_papers_count} real sources), here are the key takeaways for {role_info['name'].lower()} perspective."""
|
| 2097 |
|
| 2098 |
# Add guideline-specific bottom line
|
| 2099 |
if guideline_info:
|
| 2100 |
if guideline_info.get('guidelines_found'):
|
| 2101 |
+
bottom_line += f"\n\n**Guideline Context:** {len(guideline_info['guidelines_found'])} major guidelines referenced."
|
| 2102 |
|
| 2103 |
if guideline_info.get('critical_missing'):
|
| 2104 |
+
missing_list = ', '.join(guideline_info['critical_missing'][:2])
|
| 2105 |
+
bottom_line += f"\n**Consider:** Missing explicit guideline citations for {missing_list}."
|
| 2106 |
|
| 2107 |
coverage = guideline_info.get('coverage_percentage', 0)
|
| 2108 |
if coverage < 50:
|
| 2109 |
+
bottom_line += f"\n**Evidence Note:** Guideline coverage is limited."
|
| 2110 |
|
| 2111 |
bottom_line += f"""
|
| 2112 |
+
**{role_info['name']}-Specific Considerations:**
|
| 2113 |
+
- Information tailored to {role_info['name'].lower()} role and needs
|
| 2114 |
+
- Practical application within {role_info['name'].lower()} context
|
| 2115 |
+
- Integration with {role_info['name'].lower()} knowledge and experience
|
| 2116 |
+
- {"Guideline-aware decision making" if guideline_info else "Evidence-informed approach"}
|
| 2117 |
+
- Consideration of specific circumstances and constraints"""
|
| 2118 |
|
| 2119 |
if papers_count > real_papers_count:
|
| 2120 |
+
bottom_line += f"\n\n*Note: Includes {papers_count - real_papers_count} illustrative examples for context.*"
|
| 2121 |
|
| 2122 |
return bottom_line
|
| 2123 |
|
| 2124 |
+
def _synthesize_role_answer(self, query: str, domain: str, role: str,
|
| 2125 |
+
analysis: str, papers: List[Dict],
|
| 2126 |
+
bottom_line: str, confidence: Dict[str, Any],
|
| 2127 |
+
guideline_info: Dict = None) -> Dict[str, Any]:
|
| 2128 |
+
"""Synthesize final answer with role information"""
|
| 2129 |
|
| 2130 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 2131 |
+
|
| 2132 |
if CONFIG_AVAILABLE:
|
| 2133 |
try:
|
| 2134 |
domain_info = {
|
|
|
|
| 2149 |
'description': f'Research in {domain.replace("_", " ")}'
|
| 2150 |
})
|
| 2151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2152 |
# Count real vs demo papers
|
| 2153 |
real_papers = [p for p in papers if not p.get('is_demo', False)]
|
| 2154 |
demo_papers = [p for p in papers if p.get('is_demo', False)]
|
| 2155 |
|
| 2156 |
+
# Format paper citations
|
| 2157 |
paper_citations = []
|
| 2158 |
+
for i, paper in enumerate(papers[:5], 1):
|
| 2159 |
title = paper.get('title', 'Untitled')
|
| 2160 |
authors = paper.get('authors', [])
|
| 2161 |
year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else ''
|
|
|
|
| 2164 |
is_demo = paper.get('is_demo', False)
|
| 2165 |
is_preprint = paper.get('is_preprint', False)
|
| 2166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2167 |
# Format authors
|
| 2168 |
if authors and isinstance(authors, list) and len(authors) > 0:
|
| 2169 |
if len(authors) == 1:
|
|
|
|
| 2180 |
|
| 2181 |
demo_indicator = "📄 " if is_demo else ""
|
| 2182 |
preprint_indicator = "⚡ " if is_preprint else ""
|
|
|
|
| 2183 |
|
| 2184 |
if author_str and year:
|
| 2185 |
+
citation += f"\n {demo_indicator}{preprint_indicator}*{author_str} ({year})*"
|
| 2186 |
elif author_str:
|
| 2187 |
+
citation += f"\n {demo_indicator}{preprint_indicator}*{author_str}*"
|
| 2188 |
else:
|
| 2189 |
+
citation += f"\n {demo_indicator}{preprint_indicator}*Unknown authors*"
|
| 2190 |
|
| 2191 |
if journal:
|
| 2192 |
citation += f"\n Journal: {journal}"
|
| 2193 |
elif source and source != 'unknown':
|
| 2194 |
citation += f"\n Source: {source}"
|
| 2195 |
|
|
|
|
|
|
|
|
|
|
| 2196 |
paper_citations.append(citation)
|
| 2197 |
|
| 2198 |
# Build guideline summary section
|
|
|
|
| 2203 |
if guideline_info.get('guidelines_found'):
|
| 2204 |
guideline_summary += f"**✅ Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}\n\n"
|
| 2205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2206 |
if guideline_info.get('critical_missing'):
|
| 2207 |
missing_list = ', '.join(guideline_info['critical_missing'])
|
| 2208 |
guideline_summary += f"**⚠️ Missing Guideline Citations:** {missing_list}\n\n"
|
|
|
|
| 2210 |
guideline_summary += f"**Coverage Score:** {guideline_info.get('coverage_percentage', 0)}%\n\n"
|
| 2211 |
|
| 2212 |
# Build answer
|
| 2213 |
+
answer = f"""# 🔬 **{role_info['name']}-Focused Analysis**
|
| 2214 |
+
**Role:** {role_info['name']} {role_info['icon']}
|
| 2215 |
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
|
|
|
|
| 2216 |
**Evidence Confidence:** {confidence['level']} ({confidence['overall_score']}/100)
|
| 2217 |
+
**Sources Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)
|
| 2218 |
---
|
| 2219 |
## 📋 **Executive Summary**
|
| 2220 |
{bottom_line}
|
|
|
|
| 2223 |
{analysis}
|
| 2224 |
---
|
| 2225 |
## 📊 **Supporting Evidence**
|
| 2226 |
+
{chr(10).join(paper_citations) if paper_citations else "*No papers cited for this simple query*"}
|
| 2227 |
---
|
| 2228 |
+
## 🎯 **Key Takeaways for {role_info['name']}**
|
| 2229 |
+
1. Role-appropriate information and insights
|
| 2230 |
2. Domain-specific considerations for {domain_info['name'].lower()}
|
| 2231 |
+
3. Practical implications tailored to {role_info['name'].lower()} needs
|
| 2232 |
+
4. {"Guideline-aware recommendations" if guideline_info else "Evidence-informed approach"}
|
| 2233 |
+
*Analysis performed with {role_info['name'].lower()}-focused reasoning*
|
| 2234 |
*Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}*"""
|
| 2235 |
|
| 2236 |
return {
|
| 2237 |
"query": query,
|
| 2238 |
"domain": domain,
|
| 2239 |
"domain_info": domain_info,
|
| 2240 |
+
"user_context": role,
|
| 2241 |
+
"user_context_info": role_info,
|
| 2242 |
"answer": answer,
|
| 2243 |
"analysis": analysis,
|
| 2244 |
"bottom_line": bottom_line,
|
|
|
|
| 2247 |
"demo_papers_used": len(demo_papers),
|
| 2248 |
"confidence_score": confidence,
|
| 2249 |
"guideline_info": guideline_info,
|
| 2250 |
+
"reasoning_method": "role_based",
|
| 2251 |
"real_time_search": self.use_real_time,
|
| 2252 |
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
|
| 2253 |
"metrics": {
|
| 2254 |
'response_time': time.time(),
|
| 2255 |
'papers_analyzed': len(papers),
|
| 2256 |
'domain': domain,
|
| 2257 |
+
'user_context': role
|
| 2258 |
}
|
| 2259 |
}
|
| 2260 |
|
| 2261 |
def _update_memory(self, query: str, response: Dict[str, Any], domain: str,
|
| 2262 |
+
role: str, papers: List[Dict], guideline_info: Dict = None):
|
| 2263 |
+
"""Update conversation memory with role info"""
|
| 2264 |
if not self.memory:
|
| 2265 |
return
|
| 2266 |
|
| 2267 |
memory_data = {
|
| 2268 |
'query': query,
|
| 2269 |
'domain': domain,
|
| 2270 |
+
'role': role,
|
| 2271 |
'papers_used': len(papers),
|
| 2272 |
'real_papers': sum(1 for p in papers if not p.get('is_demo', False)),
|
| 2273 |
'demo_papers': sum(1 for p in papers if p.get('is_demo', False)),
|
|
|
|
| 2287 |
metadata=memory_data
|
| 2288 |
)
|
| 2289 |
|
| 2290 |
+
def _create_no_results_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
|
| 2291 |
"""Create response when no papers are found"""
|
| 2292 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 2293 |
+
|
| 2294 |
if CONFIG_AVAILABLE:
|
| 2295 |
try:
|
| 2296 |
domain_info = {
|
|
|
|
| 2309 |
})
|
| 2310 |
|
| 2311 |
answer = f"""# 🔍 **Limited Research Found**
|
| 2312 |
+
|
| 2313 |
+
**Role:** {role_info['name']} {role_info['icon']}
|
| 2314 |
**Domain:** {domain_info['name']}
|
| 2315 |
+
|
| 2316 |
+
**Query:** {query}
|
| 2317 |
+
|
| 2318 |
+
**Suggestions for {role_info['name']}:**
|
| 2319 |
1. Try broadening your search terms
|
| 2320 |
+
2. Consider related topics in {domain_info['name']}
|
| 2321 |
+
3. Check spelling of technical terms
|
| 2322 |
4. Try a more general domain selection
|
| 2323 |
+
|
| 2324 |
+
**For Role-Appropriate Information:**
|
| 2325 |
+
- Ask more general questions about the topic
|
| 2326 |
+
- Request explanations of concepts
|
| 2327 |
+
- Inquire about standard approaches or practices
|
| 2328 |
+
- Seek practical guidance rather than specific research
|
| 2329 |
+
|
| 2330 |
+
**Example {role_info['name'].lower()}-appropriate queries:**
|
| 2331 |
+
- "Basic explanation of [topic] for {role_info['name'].lower()}"
|
| 2332 |
+
- "Standard approaches to [issue]"
|
| 2333 |
+
- "Practical guidance for [situation]"
|
| 2334 |
+
- "Key concepts about [subject]"
|
| 2335 |
+
|
| 2336 |
+
*Note: Some specialized topics may have limited published research. I can still provide general information and guidance tailored to your role.*"""
|
| 2337 |
|
| 2338 |
return {
|
| 2339 |
"query": query,
|
| 2340 |
"domain": domain,
|
| 2341 |
"domain_info": domain_info,
|
| 2342 |
+
"user_context": role,
|
| 2343 |
+
"user_context_info": role_info,
|
| 2344 |
"answer": answer,
|
| 2345 |
"papers_used": 0,
|
| 2346 |
"real_papers_used": 0,
|
|
|
|
| 2354 |
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
|
| 2355 |
}
|
| 2356 |
|
| 2357 |
+
def _create_error_response(self, query: str, domain: str, role: str, error: str) -> Dict[str, Any]:
|
| 2358 |
"""Create error response"""
|
| 2359 |
+
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
|
| 2360 |
+
|
| 2361 |
if CONFIG_AVAILABLE:
|
| 2362 |
try:
|
| 2363 |
domain_info = {
|
|
|
|
| 2376 |
})
|
| 2377 |
|
| 2378 |
answer = f"""# 🚨 **Analysis Error**
|
| 2379 |
+
|
| 2380 |
+
**Role:** {role_info['name']} {role_info['icon']}
|
| 2381 |
**Domain:** {domain_info['name']}
|
| 2382 |
+
|
| 2383 |
+
**Query:** {query}
|
| 2384 |
**Error:** {error}
|
| 2385 |
+
|
| 2386 |
+
**Troubleshooting for {role_info['name']}:**
|
| 2387 |
1. Check your internet connection
|
| 2388 |
+
2. Try a simpler query or rephrase
|
| 2389 |
+
3. Verify domain selection is appropriate
|
| 2390 |
4. Contact support if problem persists
|
| 2391 |
+
|
| 2392 |
+
**For Role-Appropriate Alternatives:**
|
| 2393 |
+
- Ask a simpler version of your question
|
| 2394 |
+
- Request general information instead of specific research
|
| 2395 |
+
- Try breaking complex questions into smaller parts
|
| 2396 |
+
- Use more common terminology
|
| 2397 |
+
|
| 2398 |
+
Please try again or reformulate your question for {role_info['name'].lower()}-appropriate assistance."""
|
| 2399 |
|
| 2400 |
return {
|
| 2401 |
"query": query,
|
| 2402 |
"domain": domain,
|
| 2403 |
"domain_info": domain_info,
|
| 2404 |
+
"user_context": role,
|
| 2405 |
+
"user_context_info": role_info,
|
| 2406 |
"answer": answer,
|
| 2407 |
"papers_used": 0,
|
| 2408 |
"real_papers_used": 0,
|
|
|
|
| 2496 |
}
|
| 2497 |
|
| 2498 |
def get_engine_status(self) -> Dict[str, Any]:
|
| 2499 |
+
"""Get engine status with role metrics"""
|
| 2500 |
# Calculate average guideline coverage
|
| 2501 |
avg_guideline_coverage = 0
|
| 2502 |
if self.metrics['guideline_coverage']:
|
|
|
|
| 2505 |
|
| 2506 |
return {
|
| 2507 |
"engine_name": "Medical Research RAG Engine",
|
| 2508 |
+
"version": "2.2.0",
|
| 2509 |
"model": self.model if hasattr(self, 'model') else "Unknown",
|
| 2510 |
+
"features": ["role_based_reasoning", "real_paper_fetching",
|
| 2511 |
+
"confidence_scoring", "guideline_detection", "simple_query_handling"],
|
| 2512 |
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
|
| 2513 |
"real_time_search": self.use_real_time,
|
| 2514 |
+
"roles_supported": list(RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.keys()),
|
| 2515 |
"guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES),
|
| 2516 |
"metrics": {
|
| 2517 |
"total_queries": self.metrics['total_queries'],
|
|
|
|
| 2523 |
"demo_papers_used": self.metrics['demo_papers_used']
|
| 2524 |
},
|
| 2525 |
"domains_supported": len(DOMAIN_INFO),
|
| 2526 |
+
"simple_query_handling": "ENABLED"
|
| 2527 |
}
|
| 2528 |
|
| 2529 |
def clear_memory(self):
|
|
|
|
| 2539 |
# TEST FUNCTION
|
| 2540 |
# ============================================================================
|
| 2541 |
|
| 2542 |
+
def test_role_based_rag_engine():
|
| 2543 |
+
"""Test the medical RAG engine with role-based responses"""
|
| 2544 |
print("\n" + "=" * 60)
|
| 2545 |
+
print("🧪 TESTING ROLE-BASED RAG ENGINE")
|
| 2546 |
print("=" * 60)
|
| 2547 |
|
| 2548 |
try:
|
| 2549 |
# Initialize engine
|
| 2550 |
engine = EnhancedRAGEngine(
|
| 2551 |
+
session_id="role_test",
|
| 2552 |
model="gpt-oss-120b",
|
| 2553 |
+
use_real_time=False # Disable real-time for faster testing
|
| 2554 |
)
|
| 2555 |
|
| 2556 |
+
# Test queries with different roles
|
| 2557 |
test_cases = [
|
| 2558 |
{
|
| 2559 |
+
"query": "hi",
|
| 2560 |
+
"domain": "general_medical",
|
| 2561 |
+
"role": "patient"
|
| 2562 |
},
|
| 2563 |
{
|
| 2564 |
+
"query": "hello",
|
| 2565 |
+
"domain": "cardiology",
|
| 2566 |
+
"role": "doctor"
|
| 2567 |
+
},
|
| 2568 |
+
{
|
| 2569 |
+
"query": "hey",
|
| 2570 |
"domain": "endocrinology",
|
| 2571 |
+
"role": "student"
|
| 2572 |
},
|
| 2573 |
{
|
| 2574 |
+
"query": "Compare first-line antibiotics for community-acquired pneumonia",
|
| 2575 |
+
"domain": "infectious_disease",
|
| 2576 |
+
"role": "clinician"
|
| 2577 |
}
|
| 2578 |
]
|
| 2579 |
|
| 2580 |
+
for i, test_case in enumerate(test_cases, 1):
|
| 2581 |
print(f"\n📝 Test Case {i}:")
|
| 2582 |
print(f" Query: '{test_case['query']}'")
|
| 2583 |
print(f" Domain: {test_case['domain']}")
|
| 2584 |
+
print(f" Role: {test_case['role']}")
|
| 2585 |
|
| 2586 |
# Process query
|
| 2587 |
response = engine.answer_research_question(
|
| 2588 |
query=test_case['query'],
|
| 2589 |
domain=test_case['domain'],
|
| 2590 |
+
max_papers=5,
|
| 2591 |
+
role=test_case['role'],
|
| 2592 |
use_fallback=True
|
| 2593 |
)
|
| 2594 |
|
| 2595 |
if response and 'error' not in response:
|
| 2596 |
print(f"\n✅ Test Successful!")
|
| 2597 |
+
print(f" Response type: {response.get('reasoning_method', 'unknown')}")
|
| 2598 |
print(f" Papers used: {response.get('papers_used', 0)}")
|
|
|
|
| 2599 |
print(f" Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100")
|
| 2600 |
|
| 2601 |
+
# Check if it's a simple response
|
| 2602 |
+
if response.get('reasoning_method') in ['greeting', 'simple_response', 'direct_response']:
|
| 2603 |
+
print(f" ⭐ Simple query handled appropriately!")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2604 |
|
| 2605 |
# Show engine status
|
| 2606 |
status = engine.get_engine_status()
|
| 2607 |
print(f"\n🔧 Engine Status:")
|
| 2608 |
+
print(f" Role-based responses: ENABLED")
|
| 2609 |
+
print(f" Simple query handling: ENABLED")
|
| 2610 |
+
print(f" Roles supported: {len(status['roles_supported'])}")
|
| 2611 |
print(f" Total queries: {status['metrics']['total_queries']}")
|
| 2612 |
|
| 2613 |
return True
|
|
|
|
| 2621 |
|
| 2622 |
if __name__ == "__main__":
|
| 2623 |
# Run test
|
| 2624 |
+
test_result = test_role_based_rag_engine()
|
| 2625 |
|
| 2626 |
if test_result:
|
| 2627 |
print(f"\n{'=' * 60}")
|
| 2628 |
+
print("🎉 ROLE-BASED RAG ENGINE TEST COMPLETE!")
|
| 2629 |
+
print(" Role-based reasoning: ✓")
|
| 2630 |
+
print(" Simple query handling: ✓")
|
| 2631 |
+
print(" Domain-agnostic approach: ✓")
|
| 2632 |
print(" Guideline detection: ✓")
|
|
|
|
| 2633 |
print(f"{'=' * 60}")
|
| 2634 |
else:
|
| 2635 |
print("\n❌ Engine test failed")
|